Skip to content

Commit

Permalink
Use mypy for type checking (#247)
Browse files Browse the repository at this point in the history
* ISSUE-241: Ignoring 'env' and '.idea' directories

* ISSUE-241: Added 'mypy' to 'requirements.txt'

* ISSUE-241: Added 'Type checking' step to CircleCI

* ISSUE-241: Fixed mypy issues

* ISSUE-241: Updated documentation

* ISSUE-241: Added mypy to the correct 'requirements.txt'

* ISSUE-241: Ran Black formatter

Also updated the contribution guidelines to include this as a step

* ISSUE-241: Markups

ISSUE-241: Markup - Alphabetised 'requirements.txt'
ISSUE-241: Markup - Log invalid page title
ISSUE-241: Markup - Alphabetised 'test_scrape.py' imports
ISSUE-241: Markup - Added explanatory comment
ISSUE-241: Markup - Improved 'config_dict' typing
ISSUE-241: Markup - Improved 'scrape.py' typing

* ISSUE-241: Markup - Using logger interpolation

* ISSUE-241: Markups

* ISSUE-241: Markup - Added working dir to Circle CI config
  • Loading branch information
benfernandes committed Oct 25, 2020
1 parent baa00c1 commit 711873d
Show file tree
Hide file tree
Showing 21 changed files with 93 additions and 73 deletions.
4 changes: 4 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ jobs:
- run:
name: Show installed Python packages
command: pip list
- run:
name: Type checking
working_directory: ~/
command: mypy --ignore-missing-imports project/wikipron project/tests project/data
- run:
name: Lint
working_directory: ~/
Expand Down
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@ __pycache__/
*.egg-info/
*.log
**/tars
**/freq_tsvs
**/freq_tsvs
env/

.idea/
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ Unreleased
- Split `ban` into Latin and Balinese scripts. (\#214)
- Split `kir` into Cyrillic and Arabic. (\#216)
- Split Latin (`lat`) into its dialects. (\#233)
- Added MyPy coverage for `wikipron`, `tests` and `data` directories. (\#247)

#### Fixed

Expand Down
18 changes: 9 additions & 9 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@ To work on a feature or bug fix, here are the development steps:
The `wikipron` repo has continuous integration (CI) turned on,
with autobuilds running pytest and flake8 for the test suite
(in the [`tests/`](tests) directory) and code style checks, respectively.
If an autobuild at a pending pull request fails because of pytest or flake8
errors, then the errors must be fixed by further commits pushed to the branch
by the author.
If an autobuild at a pending pull request fails because of `pytest`, `flake8` or
`mypy` errors, then the errors must be fixed by further commits pushed to the
branch by the author.

If you would like to help avoid wasting free Internet resources
(every push triggers a new CI autobuild),
you can run pytest and flake8 checks locally before pushing commits:

```bash
flake8 setup.py wikipron/ tests/
pytest -vv tests/
```
you can run the following checks locally before pushing commits:
* `mypy --ignore-missing-imports wikipron/ tests/ data/`
* `flake8 setup.py wikipron/ tests/`
* `black --line-length=79 --check setup.py wikipron/ tests/ data/`
* You can fix any errors by running the same command without `--check`
* `pytest tests/`
13 changes: 11 additions & 2 deletions data/src/codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,18 @@ def _get_language_sizes(categories: List[str]) -> Dict[str, int]:
).json()
for page in data["query"]["pages"].values():
size = page["categoryinfo"]["size"]
language = re.search(

language_search = re.search(
r"Category:(.+?) terms with IPA pronunciation", page["title"]
).group(1)
)

if not language_search:
logging.warning(
"Could not extract language from title: %s", page["title"]
)
continue

language = language_search.group(1)
language_sizes[language] = size
return language_sizes

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
black==20.8b1
flake8==3.8.4
iso639==0.1.4
mypy==0.790
pytest==6.1.1
requests==2.24.0
requests-html==0.10.0
segments==2.1.3
setuptools==50.3.1
black==20.8b1
3 changes: 2 additions & 1 deletion tests/test_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import shutil

from contextlib import contextmanager
from typing import Iterator

_TESTS_DIR = os.path.dirname(os.getcwd())
_TSV_PATH = f"{_TESTS_DIR}/tsv"
Expand All @@ -21,7 +22,7 @@ def write_dummy_phones_files(key: str, dialect: str) -> None:


@contextmanager
def handle_dummy_files(phones: bool, key: str, dialect: str) -> str:
def handle_dummy_files(phones: bool, key: str, dialect: str) -> Iterator[str]:
"""Creates and removes dummy directories for housing
TSV and phones files."""
os.mkdir(_TSV_PATH)
Expand Down
5 changes: 3 additions & 2 deletions tests/test_data/test_scrape.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

from typing import List
from typing import Any, Dict, List

import pytest

Expand Down Expand Up @@ -46,7 +46,7 @@
],
)
def test_file_creation(
config_settings: object,
config_settings: Dict[str, Any],
dialect_suffix: str,
phones: bool,
expected_file_name: List[str],
Expand All @@ -55,6 +55,7 @@ def test_file_creation(
file names based on presence or absence of dialect specification
or .phones files for a given language.
"""
dummy_tsv_path: str
with handle_dummy_files(
phones, config_settings["key"], dialect_suffix
) as dummy_tsv_path:
Expand Down
11 changes: 8 additions & 3 deletions tests/test_wikipron/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from typing import Any, Dict

import requests

from wikipron.scrape import HTTP_HEADERS
from wikipron.config import Config
from wikipron.scrape import HTTP_HEADERS


def config_factory(**kwargs) -> Config:
"""Create a Config object for testing."""
config_dict = {"key": "eng"} # The one default; may be overridden.
config_dict: Dict[str, Any] = {"key": "eng"} # Default; may be overridden.
config_dict.update(**kwargs)
return Config(**config_dict)

Expand All @@ -17,7 +19,10 @@ def can_connect_to_wiktionary() -> bool:
requests.get(
"https://en.wiktionary.org/wiki/linguistics", headers=HTTP_HEADERS
)
except (requests.ConnectionError, requests.ConnectTimeout):
except (
requests.exceptions.ConnectionError,
requests.exceptions.ConnectTimeout,
):
return False
else:
return True
5 changes: 2 additions & 3 deletions tests/test_wikipron/test_extract.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import pytest
import requests
import requests_html

from wikipron.extract import EXTRACTION_FUNCTIONS
from wikipron.extract.core import _skip_pron
from wikipron.extract.default import extract_word_pron_default

from . import config_factory


Expand All @@ -14,7 +13,7 @@
def test_extraction_functions_have_the_same_signature(func):
expected_annotations = {
"word": "Word",
"request": requests.Response,
"request": requests_html,
"config": "Config",
"return": "Iterator[WordPronPair]",
}
Expand Down
9 changes: 7 additions & 2 deletions wikipron/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import re

from typing import Callable, Optional
from typing import Callable, Optional, cast

import iso639
import segments
Expand Down Expand Up @@ -121,7 +121,12 @@ def _get_cut_off_date(self, cut_off_date: Optional[str]) -> str:
return cut_off_date

def _get_casefold(self, casefold: bool) -> Callable[[Word], Word]:
return str.casefold if casefold else lambda word: word # noqa: E731
default_func: Callable[[Word], Word] = lambda word: word # noqa: E731
return self._casefold_word if casefold else default_func

def _casefold_word(self, word: Word) -> Word:
# 'str.casefold' returns a 'str' so we need to cast it to a 'Word'
return cast(Word, str.casefold(word))

def _get_process_pron(
self,
Expand Down
7 changes: 3 additions & 4 deletions wikipron/extract/cmn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.default import yield_pron, IPA_XPATH_SELECTOR


if typing.TYPE_CHECKING:
from wikipron.config import Config
from wikipron.typing import Iterator, Word, Pron, WordPronPair
Expand All @@ -22,14 +21,14 @@


def yield_cmn_pron(
request: requests.Response, config: "Config"
request: requests_html, config: "Config"
) -> "Iterator[Pron]":
for li_container in request.html.xpath(_PRON_XPATH_TEMPLATE):
yield from yield_pron(li_container, IPA_XPATH_SELECTOR, config)


def extract_word_pron_cmn(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
words = itertools.repeat(word)
prons = yield_cmn_pron(request, config)
Expand Down
9 changes: 3 additions & 6 deletions wikipron/extract/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.core import yield_pron


if typing.TYPE_CHECKING:
from wikipron.config import Config
from wikipron.typing import Iterator, Pron, Word, WordPronPair
Expand All @@ -16,15 +15,13 @@
IPA_XPATH_SELECTOR = '//span[@class = "IPA"]'


def _yield_phn(
request: requests.Response, config: "Config"
) -> "Iterator[Pron]":
def _yield_phn(request: requests_html, config: "Config") -> "Iterator[Pron]":
for pron_element in request.html.xpath(config.pron_xpath_selector):
yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)


def extract_word_pron_default(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
words = itertools.repeat(word)
prons = _yield_phn(request, config)
Expand Down
10 changes: 4 additions & 6 deletions wikipron/extract/jpn.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.default import yield_pron, IPA_XPATH_SELECTOR

Expand All @@ -34,7 +34,7 @@


def yield_jpn_pron(
request: requests.Response, config: "Config"
request: requests_html, config: "Config"
) -> "Iterator[Pron]":
# For simplicity, just want to grab the first transcription.
# Will encounter words that have no transcription.
Expand All @@ -43,9 +43,7 @@ def yield_jpn_pron(
yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)


def yield_jpn_word(
word: "Word", request: requests.Response
) -> "Iterator[Word]":
def yield_jpn_word(word: "Word", request: requests_html) -> "Iterator[Word]":
# Again for simplicity, only grabbing first "sub"-word.
word_element = request.html.xpath(_WORD_XPATH_SELECTOR, first=True)
if word_element:
Expand All @@ -56,7 +54,7 @@ def yield_jpn_word(


def extract_word_pron_jpn(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
# If we can't find a kana alternative, then the headword
# must itself be kana.
Expand Down
4 changes: 2 additions & 2 deletions wikipron/extract/khb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.default import yield_pron

Expand All @@ -29,7 +29,7 @@


def extract_word_pron_lu(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
words = itertools.repeat(word)
prons = yield_pron(request.html, _IPA_XPATH_SELECTOR, config)
Expand Down
5 changes: 2 additions & 3 deletions wikipron/extract/khm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import itertools
import typing

import requests
import requests_html

from wikipron.extract.default import yield_pron


if typing.TYPE_CHECKING:
from wikipron.config import Config
from wikipron.typing import Iterator, Word, WordPronPair
Expand All @@ -17,7 +16,7 @@


def extract_word_pron_khmer(
word: "Word", request: requests.Response, config: "Config"
word: "Word", request: requests_html, config: "Config"
) -> "Iterator[WordPronPair]":
words = itertools.repeat(word)
prons = yield_pron(request.html, _IPA_XPATH_SELECTOR, config)
Expand Down
Loading

0 comments on commit 711873d

Please sign in to comment.