Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ISSUE-241: Use mypy for type checking #247

Merged
merged 12 commits into from
Oct 25, 2020
Merged
3 changes: 3 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ jobs:
- run:
name: Show installed Python packages
command: pip list
- run:
name: Type checking
command: mypy
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved
- run:
name: Lint
working_directory: ~/
Expand Down
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@ __pycache__/
*.egg-info/
*.log
**/tars
**/freq_tsvs
**/freq_tsvs
env/

.idea/
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ Unreleased
- Added Shan (`shn`) with custom extraction. (\#229)
- Split Latin (`lat`) into its dialects. (\#233)
- Added support for python 3.9 (\236)
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved
- Added MyPy coverage for `wikipron`, `data` and `tests` directories
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved

### Changed

Expand Down
7 changes: 4 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ To work on a feature or bug fix, here are the development steps:
The `wikipron` repo has continuous integration (CI) turned on,
with autobuilds running pytest and flake8 for the test suite
(in the [`tests/`](tests) directory) and code style checks, respectively.
If an autobuild at a pending pull request fails because of pytest or flake8
errors, then the errors must be fixed by further commits pushed to the branch
by the author.
If an autobuild at a pending pull request fails because of `pytest`, `flake8` or
`mypy` errors, then the errors must be fixed by further commits pushed to the
branch by the author.
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved

If you would like to help avoid wasting free Internet resources
(every push triggers a new CI autobuild),
Expand All @@ -85,4 +85,5 @@ you can run pytest and flake8 checks locally before pushing commits:
```bash
flake8 setup.py wikipron/ tests/
pytest -vv tests/
mypy
```
1 change: 1 addition & 0 deletions data/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ regex>=2019.12.9
requests
requests-html
wikipron>=1.0.0
mypy
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved
10 changes: 8 additions & 2 deletions data/src/codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,15 @@ def _get_language_sizes(categories: List[str]) -> Dict[str, int]:
).json()
for page in data["query"]["pages"].values():
size = page["categoryinfo"]["size"]
language = re.search(

language_search = re.search(
r"Category:(.+?) terms with IPA pronunciation", page["title"]
).group(1)
)

if not language_search:
continue
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved

language = language_search.group(1)
language_sizes[language] = size
return language_sizes

Expand Down
3 changes: 3 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[mypy]
files=wikipron,data,tests
ignore_missing_imports=true
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 2 additions & 1 deletion tests/test_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import shutil

from contextlib import contextmanager
from typing import Iterator

_TESTS_DIR = os.path.dirname(os.getcwd())
_TSV_PATH = f"{_TESTS_DIR}/tsv"
Expand All @@ -21,7 +22,7 @@ def write_dummy_phones_files(key: str, dialect: str) -> None:


@contextmanager
def handle_dummy_files(phones: bool, key: str, dialect: str) -> str:
def handle_dummy_files(phones: bool, key: str, dialect: str) -> Iterator[str]:
"""Creates and removes dummy directories for housing
TSV and phones files."""
os.mkdir(_TSV_PATH)
Expand Down
5 changes: 3 additions & 2 deletions tests/test_data/test_scrape.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

from typing import List
from typing import List, Any, Dict
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved

import pytest

Expand Down Expand Up @@ -46,7 +46,7 @@
],
)
def test_file_creation(
config_settings: object,
config_settings: Dict[str, Any],
dialect_suffix: str,
phones: bool,
expected_file_name: List[str],
Expand All @@ -55,6 +55,7 @@ def test_file_creation(
file names based on presence or absence of dialect specification
or .phones files for a given language.
"""
dummy_tsv_path: str
with handle_dummy_files(
phones, config_settings["key"], dialect_suffix
) as dummy_tsv_path:
Expand Down
8 changes: 5 additions & 3 deletions tests/test_wikipron/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from typing import Dict

import requests

from wikipron.scrape import HTTP_HEADERS
from wikipron.config import Config
from wikipron.scrape import HTTP_HEADERS


def config_factory(**kwargs) -> Config:
"""Create a Config object for testing."""
config_dict = {"key": "eng"} # The one default; may be overridden.
config_dict: Dict = {"key": "eng"} # The one default; may be overridden.
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved
config_dict.update(**kwargs)
return Config(**config_dict)

Expand All @@ -17,7 +19,7 @@ def can_connect_to_wiktionary() -> bool:
requests.get(
"https://en.wiktionary.org/wiki/linguistics", headers=HTTP_HEADERS
)
except (requests.ConnectionError, requests.ConnectTimeout):
except requests.ConnectionError:
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved
return False
else:
return True
5 changes: 2 additions & 3 deletions tests/test_wikipron/test_extract.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import pytest
import requests
import requests_html

from wikipron.extract import EXTRACTION_FUNCTIONS
from wikipron.extract.core import _skip_pron
from wikipron.extract.default import extract_word_pron_default

from . import config_factory


Expand All @@ -14,7 +13,7 @@
def test_extraction_functions_have_the_same_signature(func):
expected_annotations = {
"word": "Word",
"request": requests.Response,
"request": requests_html,
"config": "Config",
"return": "Iterator[WordPronPair]",
}
Expand Down
8 changes: 6 additions & 2 deletions wikipron/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import re

from typing import Callable, Optional
from typing import Callable, Optional, cast

import iso639
import segments
Expand Down Expand Up @@ -121,7 +121,11 @@ def _get_cut_off_date(self, cut_off_date: Optional[str]) -> str:
return cut_off_date

def _get_casefold(self, casefold: bool) -> Callable[[Word], Word]:
return str.casefold if casefold else lambda word: word # noqa: E731
default_func: Callable[[Word], Word] = lambda word: word # noqa: E731
return self._casefold_word if casefold else default_func

def _casefold_word(self, word: Word):
ben-fernandes-sw marked this conversation as resolved.
Show resolved Hide resolved
return cast(Word, str.casefold(word))

def _get_process_pron(
self,
Expand Down
7 changes: 3 additions & 4 deletions wikipron/extract/cmn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.default import yield_pron, IPA_XPATH_SELECTOR


if typing.TYPE_CHECKING:
from wikipron.config import Config
from wikipron.typing import Iterator, Word, Pron, WordPronPair
Expand All @@ -22,14 +21,14 @@


def yield_cmn_pron(
request: requests.Response, config: "Config"
request: requests_html, config: "Config"
) -> "Iterator[Pron]":
for li_container in request.html.xpath(_PRON_XPATH_TEMPLATE):
yield from yield_pron(li_container, IPA_XPATH_SELECTOR, config)


def extract_word_pron_cmn(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
words = itertools.repeat(word)
prons = yield_cmn_pron(request, config)
Expand Down
7 changes: 3 additions & 4 deletions wikipron/extract/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.core import yield_pron


if typing.TYPE_CHECKING:
from wikipron.config import Config
from wikipron.typing import Iterator, Pron, Word, WordPronPair
Expand All @@ -17,14 +16,14 @@


def _yield_phn(
request: requests.Response, config: "Config"
request: requests_html, config: "Config"
) -> "Iterator[Pron]":
for pron_element in request.html.xpath(config.pron_xpath_selector):
yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)


def extract_word_pron_default(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
words = itertools.repeat(word)
prons = _yield_phn(request, config)
Expand Down
8 changes: 4 additions & 4 deletions wikipron/extract/jpn.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.default import yield_pron, IPA_XPATH_SELECTOR

Expand All @@ -34,7 +34,7 @@


def yield_jpn_pron(
request: requests.Response, config: "Config"
request: requests_html, config: "Config"
) -> "Iterator[Pron]":
# For simplicity, just want to grab the first transcription.
# Will encounter words that have no transcription.
Expand All @@ -44,7 +44,7 @@ def yield_jpn_pron(


def yield_jpn_word(
word: "Word", request: requests.Response
word: "Word", request: requests_html
) -> "Iterator[Word]":
# Again for simplicity, only grabbing first "sub"-word.
word_element = request.html.xpath(_WORD_XPATH_SELECTOR, first=True)
Expand All @@ -56,7 +56,7 @@ def yield_jpn_word(


def extract_word_pron_jpn(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
# If we can't find a kana alternative, then the headword
# must itself be kana.
Expand Down
4 changes: 2 additions & 2 deletions wikipron/extract/khb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.default import yield_pron

Expand All @@ -29,7 +29,7 @@


def extract_word_pron_lu(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
words = itertools.repeat(word)
prons = yield_pron(request.html, _IPA_XPATH_SELECTOR, config)
Expand Down
5 changes: 2 additions & 3 deletions wikipron/extract/khm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.default import yield_pron


if typing.TYPE_CHECKING:
from wikipron.config import Config
from wikipron.typing import Iterator, Word, WordPronPair
Expand All @@ -17,7 +16,7 @@


def extract_word_pron_khmer(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
words = itertools.repeat(word)
prons = yield_pron(request.html, _IPA_XPATH_SELECTOR, config)
Expand Down
15 changes: 6 additions & 9 deletions wikipron/extract/lat.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,12 @@

import itertools
import typing
from typing import List

import requests
import requests_html

from wikipron.extract.default import yield_pron, IPA_XPATH_SELECTOR

from typing import List


if typing.TYPE_CHECKING:
from wikipron.config import Config
from wikipron.typing import Iterator, Pron, Word, WordPronPair
Expand Down Expand Up @@ -87,7 +85,7 @@
"""


def _get_tags(request: requests.Response) -> List[str]:
def _get_tags(request: requests_html) -> List[str]:
"""Extract the Latin Etymology ID tags from the table of contents."""
tags = []
for a_element in request.html.xpath(_TOC_ETYMOLOGY_XPATH_SELECTOR):
Expand All @@ -101,7 +99,7 @@ def _get_tags(request: requests.Response) -> List[str]:


def _yield_latin_word(
request: requests.Response, tag: str
request: requests_html, tag: str
) -> "Iterator[Word]":
heading = "h2" if tag == "Latin" else "h3"
word_xpath_selector = _WORD_XPATH_TEMPLATE.format(heading=heading, tag=tag)
Expand All @@ -119,7 +117,7 @@ def _yield_latin_word(


def _yield_latin_pron(
request: requests.Response, config: "Config", tag: str
request: requests_html, config: "Config", tag: str
) -> "Iterator[Pron]":
heading = "h2" if tag == "Latin" else "h3"
if config.dialect:
Expand All @@ -140,12 +138,11 @@ def _yield_latin_pron(


def extract_word_pron_latin(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
# For Latin, we don't use the title word from the Wiktionary page,
# because it never has macrons (necessary for Latin vowel length).
# We will get the word from each "Etymology" section within the page.
word = None # noqa: F841
tags = _get_tags(request)
for tag in tags:
# The words and prons are extracted from the same request response but
Expand Down
4 changes: 2 additions & 2 deletions wikipron/extract/shn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.default import yield_pron

Expand All @@ -25,7 +25,7 @@


def extract_word_pron_shan(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
words = itertools.repeat(word)
prons = yield_pron(request.html, _IPA_XPATH_SELECTOR, config)
Expand Down
Loading