From e0a1a79286748178ec74503686c2620859a9a1bb Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Mon, 19 Oct 2020 22:00:34 +0100
Subject: [PATCH 01/11] ISSUE-241: Ignoring 'env' and '.idea' directories

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index d69c9bce..718f2b7d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,7 @@ __pycache__/
 *.egg-info/
 *.log
 **/tars
-**/freq_tsvs
\ No newline at end of file
+**/freq_tsvs
+env/
+
+.idea/

From 5c6a5046af2d8c77cdd10b520aa95e283fd01673 Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Mon, 19 Oct 2020 22:01:00 +0100
Subject: [PATCH 02/11] ISSUE-241: Added 'mypy' to 'requirements.txt'

---
 data/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/data/requirements.txt b/data/requirements.txt
index f1a1ab28..b4b89509 100644
--- a/data/requirements.txt
+++ b/data/requirements.txt
@@ -2,3 +2,4 @@ regex>=2019.12.9
 requests
 requests-html
 wikipron>=1.0.0
+mypy

From 3fb46ed0ae3f37bbacb438212b8e372c12920cd0 Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Mon, 19 Oct 2020 22:05:33 +0100
Subject: [PATCH 03/11] ISSUE-241: Added 'Type checking' step to CircleCI

---
 .circleci/config.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index e942ba88..f32d5985 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -31,6 +31,9 @@ jobs:
       - run:
           name: Show installed Python packages
           command: pip list
+      - run:
+          name: Type checking
+          command: mypy project/wikipron project/tests project/data
       - run:
           name: Lint
           working_directory: ~/

From eb946c32df0c9a14953547019cbbc6237b0c0181 Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Mon, 19 Oct 2020 22:36:28 +0100
Subject: [PATCH 04/11] ISSUE-241: Fixed mypy issues

---
 .circleci/config.yml                |  2 +-
 data/src/codes.py                   | 10 ++++++++--
 mypy.ini                            |  3 +++
 tests/test_data/__init__.py         |  3 ++-
 tests/test_data/test_scrape.py      |  5 +++--
 tests/test_wikipron/__init__.py     |  8 +++++---
 tests/test_wikipron/test_extract.py |  5 ++---
 wikipron/config.py                  |  8 ++++++--
 wikipron/extract/cmn.py             |  7 +++----
 wikipron/extract/default.py         |  7 +++----
 wikipron/extract/jpn.py             |  8 ++++----
 wikipron/extract/khb.py             |  4 ++--
 wikipron/extract/khm.py             |  5 ++---
 wikipron/extract/lat.py             | 15 ++++++---------
 wikipron/extract/shn.py             |  4 ++--
 wikipron/extract/tha.py             |  5 ++---
 wikipron/extract/vie.py             |  6 +++---
 wikipron/scrape.py                  |  7 +++++--
 18 files changed, 62 insertions(+), 50 deletions(-)
 create mode 100644 mypy.ini

diff --git a/.circleci/config.yml b/.circleci/config.yml
index f32d5985..e1023341 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,7 +33,7 @@ jobs:
           command: pip list
       - run:
           name: Type checking
-          command: mypy project/wikipron project/tests project/data
+          command: mypy
       - run:
           name: Lint
           working_directory: ~/
diff --git a/data/src/codes.py b/data/src/codes.py
index 3db81f37..6d343f39 100755
--- a/data/src/codes.py
+++ b/data/src/codes.py
@@ -91,9 +91,15 @@ def _get_language_sizes(categories: List[str]) -> Dict[str, int]:
         ).json()
         for page in data["query"]["pages"].values():
             size = page["categoryinfo"]["size"]
-            language = re.search(
+
+            language_search = re.search(
                 r"Category:(.+?) terms with IPA pronunciation", page["title"]
-            ).group(1)
+            )
+
+            if not language_search:
+                continue
+
+            language = language_search.group(1)
             language_sizes[language] = size
     return language_sizes
 
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 00000000..2f97cb32
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,3 @@
+[mypy]
+files=wikipron,data,tests
+ignore_missing_imports=true
\ No newline at end of file
diff --git a/tests/test_data/__init__.py b/tests/test_data/__init__.py
index 7731788e..5e9aee4b 100644
--- a/tests/test_data/__init__.py
+++ b/tests/test_data/__init__.py
@@ -2,6 +2,7 @@
 import shutil
 
 from contextlib import contextmanager
+from typing import Iterator
 
 _TESTS_DIR = os.path.dirname(os.getcwd())
 _TSV_PATH = f"{_TESTS_DIR}/tsv"
@@ -21,7 +22,7 @@ def write_dummy_phones_files(key: str, dialect: str) -> None:
 
 
 @contextmanager
-def handle_dummy_files(phones: bool, key: str, dialect: str) -> str:
+def handle_dummy_files(phones: bool, key: str, dialect: str) -> Iterator[str]:
     """Creates and removes dummy directories for housing
     TSV and phones files."""
     os.mkdir(_TSV_PATH)
diff --git a/tests/test_data/test_scrape.py b/tests/test_data/test_scrape.py
index f9bd5eba..03e1fa99 100644
--- a/tests/test_data/test_scrape.py
+++ b/tests/test_data/test_scrape.py
@@ -1,6 +1,6 @@
 import os
 
-from typing import List
+from typing import List, Any, Dict
 
 import pytest
 
@@ -46,7 +46,7 @@
     ],
 )
 def test_file_creation(
-    config_settings: object,
+    config_settings: Dict[str, Any],
     dialect_suffix: str,
     phones: bool,
     expected_file_name: List[str],
@@ -55,6 +55,7 @@ def test_file_creation(
     file names based on presence or absence of dialect specification
     or .phones files for a given language.
     """
+    dummy_tsv_path: str
     with handle_dummy_files(
         phones, config_settings["key"], dialect_suffix
     ) as dummy_tsv_path:
diff --git a/tests/test_wikipron/__init__.py b/tests/test_wikipron/__init__.py
index 5968896e..043cf4a5 100644
--- a/tests/test_wikipron/__init__.py
+++ b/tests/test_wikipron/__init__.py
@@ -1,12 +1,14 @@
+from typing import Dict
+
 import requests
 
-from wikipron.scrape import HTTP_HEADERS
 from wikipron.config import Config
+from wikipron.scrape import HTTP_HEADERS
 
 
 def config_factory(**kwargs) -> Config:
     """Create a Config object for testing."""
-    config_dict = {"key": "eng"}  # The one default; may be overridden.
+    config_dict: Dict = {"key": "eng"}  # The one default; may be overridden.
     config_dict.update(**kwargs)
     return Config(**config_dict)
 
@@ -17,7 +19,7 @@ def can_connect_to_wiktionary() -> bool:
         requests.get(
             "https://en.wiktionary.org/wiki/linguistics", headers=HTTP_HEADERS
         )
-    except (requests.ConnectionError, requests.ConnectTimeout):
+    except requests.ConnectionError:
         return False
     else:
         return True
diff --git a/tests/test_wikipron/test_extract.py b/tests/test_wikipron/test_extract.py
index ac0147bf..69129776 100644
--- a/tests/test_wikipron/test_extract.py
+++ b/tests/test_wikipron/test_extract.py
@@ -1,10 +1,9 @@
 import pytest
-import requests
+import requests_html
 
 from wikipron.extract import EXTRACTION_FUNCTIONS
 from wikipron.extract.core import _skip_pron
 from wikipron.extract.default import extract_word_pron_default
-
 from . import config_factory
 
 
@@ -14,7 +13,7 @@
 def test_extraction_functions_have_the_same_signature(func):
     expected_annotations = {
         "word": "Word",
-        "request": requests.Response,
+        "request": requests_html,
         "config": "Config",
         "return": "Iterator[WordPronPair]",
     }
diff --git a/wikipron/config.py b/wikipron/config.py
index 6686ea78..238a1534 100644
--- a/wikipron/config.py
+++ b/wikipron/config.py
@@ -3,7 +3,7 @@
 import logging
 import re
 
-from typing import Callable, Optional
+from typing import Callable, Optional, cast
 
 import iso639
 import segments
@@ -121,7 +121,11 @@ def _get_cut_off_date(self, cut_off_date: Optional[str]) -> str:
         return cut_off_date
 
     def _get_casefold(self, casefold: bool) -> Callable[[Word], Word]:
-        return str.casefold if casefold else lambda word: word  # noqa: E731
+        default_func: Callable[[Word], Word] = lambda word: word  # noqa: E731
+        return self._casefold_word if casefold else default_func
+
+    def _casefold_word(self, word: Word):
+        return cast(Word, str.casefold(word))
 
     def _get_process_pron(
         self,
diff --git a/wikipron/extract/cmn.py b/wikipron/extract/cmn.py
index c55c65db..ee06ba33 100644
--- a/wikipron/extract/cmn.py
+++ b/wikipron/extract/cmn.py
@@ -3,11 +3,10 @@
 import itertools
 import typing
 
-import requests
+import requests_html
 
 from wikipron.extract.default import yield_pron, IPA_XPATH_SELECTOR
 
-
 if typing.TYPE_CHECKING:
     from wikipron.config import Config
     from wikipron.typing import Iterator, Word, Pron, WordPronPair
@@ -22,14 +21,14 @@
 
 
 def yield_cmn_pron(
-    request: requests.Response, config: "Config"
+    request: requests_html, config: "Config"
 ) -> "Iterator[Pron]":
     for li_container in request.html.xpath(_PRON_XPATH_TEMPLATE):
         yield from yield_pron(li_container, IPA_XPATH_SELECTOR, config)
 
 
 def extract_word_pron_cmn(
-    word: "Word", request: requests.Response, config: "Config"
+    word: "Word", request: requests_html, config: "Config"
 ) -> "Iterator[WordPronPair]":
     words = itertools.repeat(word)
     prons = yield_cmn_pron(request, config)
diff --git a/wikipron/extract/default.py b/wikipron/extract/default.py
index f9ef0289..79fd7d1c 100644
--- a/wikipron/extract/default.py
+++ b/wikipron/extract/default.py
@@ -3,11 +3,10 @@
 import itertools
 import typing
 
-import requests
+import requests_html
 
 from wikipron.extract.core import yield_pron
 
-
 if typing.TYPE_CHECKING:
     from wikipron.config import Config
     from wikipron.typing import Iterator, Pron, Word, WordPronPair
@@ -17,14 +16,14 @@
 
 
 def _yield_phn(
-    request: requests.Response, config: "Config"
+    request: requests_html, config: "Config"
 ) -> "Iterator[Pron]":
     for pron_element in request.html.xpath(config.pron_xpath_selector):
         yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)
 
 
 def extract_word_pron_default(
-    word: "Word", request: requests.Response, config: "Config"
+    word: "Word", request: requests_html, config: "Config"
 ) -> "Iterator[WordPronPair]":
     words = itertools.repeat(word)
     prons = _yield_phn(request, config)
diff --git a/wikipron/extract/jpn.py b/wikipron/extract/jpn.py
index 2eb092ba..6cb5e232 100644
--- a/wikipron/extract/jpn.py
+++ b/wikipron/extract/jpn.py
@@ -16,7 +16,7 @@
 import itertools
 import typing
 
-import requests
+import requests_html
 
 from wikipron.extract.default import yield_pron, IPA_XPATH_SELECTOR
 
@@ -34,7 +34,7 @@
 
 
 def yield_jpn_pron(
-    request: requests.Response, config: "Config"
+    request: requests_html, config: "Config"
 ) -> "Iterator[Pron]":
     # For simplicity, just want to grab the first transcription.
     # Will encounter words that have no transcription.
@@ -44,7 +44,7 @@ def yield_jpn_pron(
 
 
 def yield_jpn_word(
-    word: "Word", request: requests.Response
+    word: "Word", request: requests_html
 ) -> "Iterator[Word]":
     # Again for simplicity, only grabbing first "sub"-word.
     word_element = request.html.xpath(_WORD_XPATH_SELECTOR, first=True)
@@ -56,7 +56,7 @@ def yield_jpn_word(
 
 
 def extract_word_pron_jpn(
-    word: "Word", request: requests.Response, config: "Config"
+    word: "Word", request: requests_html, config: "Config"
 ) -> "Iterator[WordPronPair]":
     # If we can't find a kana alternative, then the headword
     # must itself be kana.
diff --git a/wikipron/extract/khb.py b/wikipron/extract/khb.py
index 9ebdfb81..eace7170 100644
--- a/wikipron/extract/khb.py
+++ b/wikipron/extract/khb.py
@@ -7,7 +7,7 @@
 import itertools
 import typing
 
-import requests
+import requests_html
 
 from wikipron.extract.default import yield_pron
 
@@ -29,7 +29,7 @@
 
 
 def extract_word_pron_lu(
-    word: "Word", request: requests.Response, config: "Config"
+    word: "Word", request: requests_html, config: "Config"
 ) -> "Iterator[WordPronPair]":
     words = itertools.repeat(word)
     prons = yield_pron(request.html, _IPA_XPATH_SELECTOR, config)
diff --git a/wikipron/extract/khm.py b/wikipron/extract/khm.py
index 9ede84cf..93f14048 100644
--- a/wikipron/extract/khm.py
+++ b/wikipron/extract/khm.py
@@ -3,11 +3,10 @@
 import itertools
 import typing
 
-import requests
+import requests_html
 
 from wikipron.extract.default import yield_pron
 
-
 if typing.TYPE_CHECKING:
     from wikipron.config import Config
     from wikipron.typing import Iterator, Word, WordPronPair
@@ -17,7 +16,7 @@
 
 
 def extract_word_pron_khmer(
-    word: "Word", request: requests.Response, config: "Config"
+    word: "Word", request: requests_html, config: "Config"
 ) -> "Iterator[WordPronPair]":
     words = itertools.repeat(word)
     prons = yield_pron(request.html, _IPA_XPATH_SELECTOR, config)
diff --git a/wikipron/extract/lat.py b/wikipron/extract/lat.py
index 6a800749..d450ab3b 100644
--- a/wikipron/extract/lat.py
+++ b/wikipron/extract/lat.py
@@ -43,14 +43,12 @@
 
 import itertools
 import typing
+from typing import List
 
-import requests
+import requests_html
 
 from wikipron.extract.default import yield_pron, IPA_XPATH_SELECTOR
 
-from typing import List
-
-
 if typing.TYPE_CHECKING:
     from wikipron.config import Config
     from wikipron.typing import Iterator, Pron, Word, WordPronPair
@@ -87,7 +85,7 @@
 """
 
 
-def _get_tags(request: requests.Response) -> List[str]:
+def _get_tags(request: requests_html) -> List[str]:
     """Extract the Latin Etymology ID tags from the table of contents."""
     tags = []
     for a_element in request.html.xpath(_TOC_ETYMOLOGY_XPATH_SELECTOR):
@@ -101,7 +99,7 @@ def _get_tags(request: requests.Response) -> List[str]:
 
 
 def _yield_latin_word(
-    request: requests.Response, tag: str
+    request: requests_html, tag: str
 ) -> "Iterator[Word]":
     heading = "h2" if tag == "Latin" else "h3"
     word_xpath_selector = _WORD_XPATH_TEMPLATE.format(heading=heading, tag=tag)
@@ -119,7 +117,7 @@ def _yield_latin_word(
 
 
 def _yield_latin_pron(
-    request: requests.Response, config: "Config", tag: str
+    request: requests_html, config: "Config", tag: str
 ) -> "Iterator[Pron]":
     heading = "h2" if tag == "Latin" else "h3"
     if config.dialect:
@@ -140,12 +138,11 @@ def _yield_latin_pron(
 
 
 def extract_word_pron_latin(
-    word: "Word", request: requests.Response, config: "Config"
+    word: "Word", request: requests_html, config: "Config"
 ) -> "Iterator[WordPronPair]":
     # For Latin, we don't use the title word from the Wiktionary page,
     # because it never has macrons (necessary for Latin vowel length).
     # We will get the word from each "Etymology" section within the page.
-    word = None  # noqa: F841
     tags = _get_tags(request)
     for tag in tags:
         # The words and prons are extracted from the same request response but
diff --git a/wikipron/extract/shn.py b/wikipron/extract/shn.py
index 48ed4534..bcaea2d3 100644
--- a/wikipron/extract/shn.py
+++ b/wikipron/extract/shn.py
@@ -3,7 +3,7 @@
 import itertools
 import typing
 
-import requests
+import requests_html
 
 from wikipron.extract.default import yield_pron
 
@@ -25,7 +25,7 @@
 
 
 def extract_word_pron_shan(
-    word: "Word", request: requests.Response, config: "Config"
+    word: "Word", request: requests_html, config: "Config"
 ) -> "Iterator[WordPronPair]":
     words = itertools.repeat(word)
     prons = yield_pron(request.html, _IPA_XPATH_SELECTOR, config)
diff --git a/wikipron/extract/tha.py b/wikipron/extract/tha.py
index 3e6b0120..9b36b39c 100644
--- a/wikipron/extract/tha.py
+++ b/wikipron/extract/tha.py
@@ -3,18 +3,17 @@
 import itertools
 import typing
 
-import requests
+import requests_html
 
 from wikipron.extract.default import yield_pron, IPA_XPATH_SELECTOR
 
-
 if typing.TYPE_CHECKING:
     from wikipron.config import Config
     from wikipron.typing import Iterator, Word, WordPronPair
 
 
 def extract_word_pron_thai(
-    word: "Word", request: requests.Response, config: "Config"
+    word: "Word", request: requests_html, config: "Config"
 ) -> "Iterator[WordPronPair]":
     words = itertools.repeat(word)
     prons = yield_pron(request.html, IPA_XPATH_SELECTOR, config)
diff --git a/wikipron/extract/vie.py b/wikipron/extract/vie.py
index 223f66ce..a29d7c50 100644
--- a/wikipron/extract/vie.py
+++ b/wikipron/extract/vie.py
@@ -3,7 +3,7 @@
 import itertools
 import typing
 
-import requests
+import requests_html
 
 from wikipron.extract.default import yield_pron, IPA_XPATH_SELECTOR
 
@@ -32,14 +32,14 @@
 
 
 def extract_pron(
-    request: requests.Response, selector: str, config: "Config"
+    request: requests_html, selector: str, config: "Config"
 ) -> "Iterator[Pron]":
     for pron_element in request.html.xpath(selector):
         yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)
 
 
 def extract_word_pron_vie(
-    word: "Word", request: requests.Response, config: "Config"
+    word: "Word", request: requests_html, config: "Config"
 ) -> "Iterator[WordPronPair]":
     if config.dialect:
         dialect_selector = _DIALECT_XPATH_SELECTOR_TEMPLATE.format(
diff --git a/wikipron/scrape.py b/wikipron/scrape.py
index 1590fd31..7811d085 100644
--- a/wikipron/scrape.py
+++ b/wikipron/scrape.py
@@ -1,12 +1,14 @@
 import re
 import unicodedata
+from typing import cast
+
 import pkg_resources
 
 import requests
 import requests_html
 
 from wikipron.config import Config
-from wikipron.typing import Iterator, WordPronPair
+from wikipron.typing import Iterator, WordPronPair, Word, Pron
 
 # Queries for the MediaWiki backend.
 # Documentation here: https://www.mediawiki.org/wiki/API:Categorymembers
@@ -55,7 +57,8 @@ def _scrape_once(data, config: Config) -> Iterator[WordPronPair]:
         for word, pron in config.extract_word_pron(word, request, config):
             # Pronunciation processing is done in NFD-space;
             # we convert back to NFC aftewards.
-            yield word, unicodedata.normalize("NFC", pron)
+            normalized_pron = unicodedata.normalize("NFC", pron)
+            yield cast(Word, word), cast(Pron, normalized_pron)
 
 
 def scrape(config: Config) -> Iterator[WordPronPair]:

From 20e5127c6cd301ef9e2529c47eeae5acbd9d9977 Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Fri, 23 Oct 2020 16:28:19 +0100
Subject: [PATCH 05/11] ISSUE-241: Updated documentation

---
 CHANGELOG.md    | 1 +
 CONTRIBUTING.md | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e15363cb..39aaccf3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -66,6 +66,7 @@ Unreleased
 -   Added Shan (`shn`) with custom extraction. (\#229)
 -   Split Latin (`lat`) into its dialects. (\#233)
 -   Added support for python 3.9 (\236)
+-   Added MyPy coverage for `wikipron`, `data` and `tests` directories
 
 ### Changed
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index bd698463..ea523361 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -74,9 +74,9 @@ To work on a feature or bug fix, here are the development steps:
 The `wikipron` repo has continuous integration (CI) turned on,
 with autobuilds running pytest and flake8 for the test suite
 (in the [`tests/`](tests) directory) and code style checks, respectively.
-If an autobuild at a pending pull request fails because of pytest or flake8
-errors, then the errors must be fixed by further commits pushed to the branch
-by the author.
+If an autobuild at a pending pull request fails because of `pytest`, `flake8` or
+`mypy` errors, then the errors must be fixed by further commits pushed to the
+branch by the author.
 
 If you would like to help avoid wasting free Internet resources
 (every push triggers a new CI autobuild),
@@ -85,4 +85,5 @@ you can run pytest and flake8 checks locally before pushing commits:
 ```bash
 flake8 setup.py wikipron/ tests/
 pytest -vv tests/
+mypy
 ```

From d5bd8f9c2e45918af4a5e7c9b43bfe0ba04c22fc Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Fri, 23 Oct 2020 17:02:53 +0100
Subject: [PATCH 06/11] ISSUE-241: Added mypy to the correct 'requirements.txt'

---
 data/requirements.txt | 1 -
 requirements.txt      | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/data/requirements.txt b/data/requirements.txt
index b4b89509..f1a1ab28 100644
--- a/data/requirements.txt
+++ b/data/requirements.txt
@@ -2,4 +2,3 @@ regex>=2019.12.9
 requests
 requests-html
 wikipron>=1.0.0
-mypy
diff --git a/requirements.txt b/requirements.txt
index a6bac7f8..9f206115 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,5 @@ requests==2.24.0
 requests-html==0.10.0
 segments==2.1.3
 setuptools==50.3.1
-black==20.8b1
\ No newline at end of file
+black==20.8b1
+mypy==0.790
\ No newline at end of file

From 8ae7400a4ad9ef5201ff245a76cc5a97c7b9f46b Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Fri, 23 Oct 2020 17:11:49 +0100
Subject: [PATCH 07/11] ISSUE-241: Ran Black formatter

Also updated the contribution guidelines to include this as a step
---
 CONTRIBUTING.md             | 13 ++++++-------
 wikipron/extract/default.py |  4 +---
 wikipron/extract/jpn.py     |  4 +---
 wikipron/extract/lat.py     |  4 +---
 4 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ea523361..22f67250 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -80,10 +80,9 @@ branch by the author.
 
 If you would like to help avoid wasting free Internet resources
 (every push triggers a new CI autobuild),
-you can run pytest and flake8 checks locally before pushing commits:
-
-```bash
-flake8 setup.py wikipron/ tests/
-pytest -vv tests/
-mypy
-```
+you can run the following checks locally before pushing commits:
+* `mypy`
+* `flake8 setup.py wikipron/ tests/`
+* `black --line-length=79 --check setup.py wikipron tests data`
+    * You can fix any errors by running the same command without `--check`
+* `pytest tests/`
diff --git a/wikipron/extract/default.py b/wikipron/extract/default.py
index 79fd7d1c..2113344a 100644
--- a/wikipron/extract/default.py
+++ b/wikipron/extract/default.py
@@ -15,9 +15,7 @@
 IPA_XPATH_SELECTOR = '//span[@class = "IPA"]'
 
 
-def _yield_phn(
-    request: requests_html, config: "Config"
-) -> "Iterator[Pron]":
+def _yield_phn(request: requests_html, config: "Config") -> "Iterator[Pron]":
     for pron_element in request.html.xpath(config.pron_xpath_selector):
         yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)
 
diff --git a/wikipron/extract/jpn.py b/wikipron/extract/jpn.py
index 6cb5e232..44bc3b59 100644
--- a/wikipron/extract/jpn.py
+++ b/wikipron/extract/jpn.py
@@ -43,9 +43,7 @@ def yield_jpn_pron(
         yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)
 
 
-def yield_jpn_word(
-    word: "Word", request: requests_html
-) -> "Iterator[Word]":
+def yield_jpn_word(word: "Word", request: requests_html) -> "Iterator[Word]":
     # Again for simplicity, only grabbing first "sub"-word.
     word_element = request.html.xpath(_WORD_XPATH_SELECTOR, first=True)
     if word_element:
diff --git a/wikipron/extract/lat.py b/wikipron/extract/lat.py
index d450ab3b..204d2898 100644
--- a/wikipron/extract/lat.py
+++ b/wikipron/extract/lat.py
@@ -98,9 +98,7 @@ def _get_tags(request: requests_html) -> List[str]:
     return tags
 
 
-def _yield_latin_word(
-    request: requests_html, tag: str
-) -> "Iterator[Word]":
+def _yield_latin_word(request: requests_html, tag: str) -> "Iterator[Word]":
     heading = "h2" if tag == "Latin" else "h3"
     word_xpath_selector = _WORD_XPATH_TEMPLATE.format(heading=heading, tag=tag)
     try:

From 4fd09d2df86a05883af104029b94ddfb54f3a42d Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Fri, 23 Oct 2020 21:52:17 +0100
Subject: [PATCH 08/11] ISSUE-241: Markups

ISSUE-241: Markup - Alphabetised 'requirements.txt'
ISSUE-241: Markup - Log invalid page title
ISSUE-241: Markup - Alphabetised 'test_scrape.py' imports
ISSUE-241: Markup - Added explanatory comment
ISSUE-241: Markup - Improved 'config_dict' typing
ISSUE-241: Markup - Improved 'scrape.py' typing
---
 data/src/codes.py               |  3 +++
 requirements.txt                |  4 ++--
 tests/test_data/test_scrape.py  |  2 +-
 tests/test_wikipron/__init__.py |  4 ++--
 wikipron/config.py              |  3 ++-
 wikipron/scrape.py              | 20 +++++++++++---------
 6 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/data/src/codes.py b/data/src/codes.py
index 6d343f39..91298416 100755
--- a/data/src/codes.py
+++ b/data/src/codes.py
@@ -97,6 +97,9 @@ def _get_language_sizes(categories: List[str]) -> Dict[str, int]:
             )
 
             if not language_search:
+                logging.warning(
+                    f"Could not extract language from title: {page['title']}"
+                )
                 continue
 
             language = language_search.group(1)
diff --git a/requirements.txt b/requirements.txt
index 9f206115..8be90fab 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
+black==20.8b1
 flake8==3.8.4
 iso639==0.1.4
+mypy==0.790
 pytest==6.1.1
 requests==2.24.0
 requests-html==0.10.0
 segments==2.1.3
 setuptools==50.3.1
-black==20.8b1
-mypy==0.790
\ No newline at end of file
diff --git a/tests/test_data/test_scrape.py b/tests/test_data/test_scrape.py
index 03e1fa99..3edd59f8 100644
--- a/tests/test_data/test_scrape.py
+++ b/tests/test_data/test_scrape.py
@@ -1,6 +1,6 @@
 import os
 
-from typing import List, Any, Dict
+from typing import Any, Dict, List
 
 import pytest
 
diff --git a/tests/test_wikipron/__init__.py b/tests/test_wikipron/__init__.py
index 043cf4a5..720b4a4f 100644
--- a/tests/test_wikipron/__init__.py
+++ b/tests/test_wikipron/__init__.py
@@ -1,4 +1,4 @@
-from typing import Dict
+from typing import Any, Dict
 
 import requests
 
@@ -8,7 +8,7 @@
 
 def config_factory(**kwargs) -> Config:
     """Create a Config object for testing."""
-    config_dict: Dict = {"key": "eng"}  # The one default; may be overridden.
+    config_dict: Dict[str, Any] = {"key": "eng"}  # Default; may be overridden.
     config_dict.update(**kwargs)
     return Config(**config_dict)
 
diff --git a/wikipron/config.py b/wikipron/config.py
index 0a7032f2..3e8adbf1 100644
--- a/wikipron/config.py
+++ b/wikipron/config.py
@@ -124,7 +124,8 @@ def _get_casefold(self, casefold: bool) -> Callable[[Word], Word]:
         default_func: Callable[[Word], Word] = lambda word: word  # noqa: E731
         return self._casefold_word if casefold else default_func
 
-    def _casefold_word(self, word: Word):
+    def _casefold_word(self, word: Word) -> Word:
+        # 'str.casefold' returns a 'str' so we need to cast it to a 'Word'
         return cast(Word, str.casefold(word))
 
     def _get_process_pron(
diff --git a/wikipron/scrape.py b/wikipron/scrape.py
index 7811d085..bc373657 100644
--- a/wikipron/scrape.py
+++ b/wikipron/scrape.py
@@ -3,12 +3,11 @@
 from typing import cast
 
 import pkg_resources
-
 import requests
 import requests_html
 
 from wikipron.config import Config
-from wikipron.typing import Iterator, WordPronPair, Word, Pron
+from wikipron.typing import Iterator, WordPronPair, Pron
 
 # Queries for the MediaWiki backend.
 # Documentation here: https://www.mediawiki.org/wiki/API:Categorymembers
@@ -45,20 +44,23 @@ def _skip_date(date_from_word: str, cut_off_date: str) -> bool:
 def _scrape_once(data, config: Config) -> Iterator[WordPronPair]:
     session = requests_html.HTMLSession()
     for member in data["query"]["categorymembers"]:
-        word = member["title"]
-        date = member["timestamp"]
-        if _skip_word(word, config.no_skip_spaces_word) or _skip_date(
-            date, config.cut_off_date
+        title = member["title"]
+        timestamp = member["timestamp"]
+        if _skip_word(title, config.no_skip_spaces_word) or _skip_date(
+            timestamp, config.cut_off_date
         ):
             continue
         request = session.get(
-            _PAGE_TEMPLATE.format(word=word), timeout=10, headers=HTTP_HEADERS
+            _PAGE_TEMPLATE.format(word=title), timeout=10, headers=HTTP_HEADERS
         )
-        for word, pron in config.extract_word_pron(word, request, config):
+
+        # word_prons = config.extract_word_pron(word, request, config)
+        for word, pron in config.extract_word_pron(title, request, config):
             # Pronunciation processing is done in NFD-space;
             # we convert back to NFC aftewards.
             normalized_pron = unicodedata.normalize("NFC", pron)
-            yield cast(Word, word), cast(Pron, normalized_pron)
+            # 'cast' is required 'normalize' doesn't return a 'Pron'
+            yield word, cast(Pron, normalized_pron)
 
 
 def scrape(config: Config) -> Iterator[WordPronPair]:

From adf125e70454940cf2b9b0355d266b2c5c990e5c Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Sat, 24 Oct 2020 00:25:52 +0100
Subject: [PATCH 09/11] ISSUE-241: Markup - Using logger interpolation

---
 data/src/codes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/src/codes.py b/data/src/codes.py
index 91298416..a5ca2ab5 100755
--- a/data/src/codes.py
+++ b/data/src/codes.py
@@ -98,7 +98,7 @@ def _get_language_sizes(categories: List[str]) -> Dict[str, int]:
 
             if not language_search:
                 logging.warning(
-                    f"Could not extract language from title: {page['title']}"
+                    "Could not extract language from title: %s", page["title"]
                 )
                 continue
 

From 1d3089abd14b58e42df71af6c93315dd6bf67403 Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Sun, 25 Oct 2020 01:37:03 +0000
Subject: [PATCH 10/11] ISSUE-241: Markups

---
 .circleci/config.yml            | 2 +-
 CHANGELOG.md                    | 3 +--
 CONTRIBUTING.md                 | 4 ++--
 mypy.ini                        | 3 ---
 tests/test_wikipron/__init__.py | 5 ++++-
 wikipron/scrape.py              | 1 -
 6 files changed, 8 insertions(+), 10 deletions(-)
 delete mode 100644 mypy.ini

diff --git a/.circleci/config.yml b/.circleci/config.yml
index e1023341..a860823b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,7 +33,7 @@ jobs:
           command: pip list
       - run:
           name: Type checking
-          command: mypy
+          command: mypy --ignore-missing-imports project/wikipron project/tests project/data
       - run:
           name: Lint
           working_directory: ~/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8d638167..9992f56c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -63,8 +63,7 @@ Unreleased
 -   Split `ban` into Latin and Balinese scripts. (\#214)
 -   Split `kir` into Cyrillic and Arabic. (\#216)
 -   Split Latin (`lat`) into its dialects. (\#233)
--   Added support for python 3.9 (\236)
--   Added MyPy coverage for `wikipron`, `data` and `tests` directories
+-   Added MyPy coverage for `wikipron`, `tests` and `data` directories. (\#247)
 
 #### Fixed
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 22f67250..a3cfe308 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -81,8 +81,8 @@ branch by the author.
 If you would like to help avoid wasting free Internet resources
 (every push triggers a new CI autobuild),
 you can run the following checks locally before pushing commits:
-* `mypy`
+* `mypy --ignore-missing-imports wikipron/ tests/ data/`
 * `flake8 setup.py wikipron/ tests/`
-* `black --line-length=79 --check setup.py wikipron tests data`
+* `black --line-length=79 --check setup.py wikipron/ tests/ data/`
     * You can fix any errors by running the same command without `--check`
 * `pytest tests/`
diff --git a/mypy.ini b/mypy.ini
deleted file mode 100644
index 2f97cb32..00000000
--- a/mypy.ini
+++ /dev/null
@@ -1,3 +0,0 @@
-[mypy]
-files=wikipron,data,tests
-ignore_missing_imports=true
\ No newline at end of file
diff --git a/tests/test_wikipron/__init__.py b/tests/test_wikipron/__init__.py
index 720b4a4f..a4cdd90c 100644
--- a/tests/test_wikipron/__init__.py
+++ b/tests/test_wikipron/__init__.py
@@ -19,7 +19,10 @@ def can_connect_to_wiktionary() -> bool:
         requests.get(
             "https://en.wiktionary.org/wiki/linguistics", headers=HTTP_HEADERS
         )
-    except requests.ConnectionError:
+    except (
+        requests.exceptions.ConnectionError,
+        requests.exceptions.ConnectTimeout,
+    ):
         return False
     else:
         return True
diff --git a/wikipron/scrape.py b/wikipron/scrape.py
index bc373657..d209bf0e 100644
--- a/wikipron/scrape.py
+++ b/wikipron/scrape.py
@@ -54,7 +54,6 @@ def _scrape_once(data, config: Config) -> Iterator[WordPronPair]:
             _PAGE_TEMPLATE.format(word=title), timeout=10, headers=HTTP_HEADERS
         )
 
-        # word_prons = config.extract_word_pron(word, request, config)
         for word, pron in config.extract_word_pron(title, request, config):
             # Pronunciation processing is done in NFD-space;
             # we convert back to NFC aftewards.

From afb795c1a4e2ceccd8f24c406bc9fbd7d49748f9 Mon Sep 17 00:00:00 2001
From: Ben Fernandes <dev.benfernandes@gmail.com>
Date: Sun, 25 Oct 2020 02:23:51 +0000
Subject: [PATCH 11/11] ISSUE-241: Markup - Added working dir to Circle CI
 config

---
 .circleci/config.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index a860823b..9ef16ce3 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,6 +33,7 @@ jobs:
           command: pip list
       - run:
           name: Type checking
+          working_directory: ~/
           command: mypy --ignore-missing-imports project/wikipron project/tests project/data
       - run:
           name: Lint