diff --git a/.travis.yml b/.travis.yml index efb22c4..0bd9cdc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,10 @@ # https://travis-ci.com/JoseALermaIII dist: bionic # required for Python >= 3.7 language: python -cache: pip # Don't delete pip install +cache: + pip: true # Don't delete pip install + directories: + - $HOME/nltk_data/corpora/ # Branch safelist branches: only: diff --git a/docs/source/conf.py b/docs/source/conf.py index 1c1a756..3529a38 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -55,6 +55,7 @@ intersphinx_mapping = { 'python': ('https://docs.python.org/3/', None), 'docx': ('https://python-docx.readthedocs.io/en/latest/', None), + 'nltk': ('http://www.nltk.org/', None), } # Default options for autodoc directives. diff --git a/docs/source/src.ch08.rst b/docs/source/src.ch08.rst new file mode 100644 index 0000000..cfbdca5 --- /dev/null +++ b/docs/source/src.ch08.rst @@ -0,0 +1,22 @@ +src.ch08 package +================ + +Submodules +---------- + +src.ch08.p1\_count\_syllables module +------------------------------------ + +.. automodule:: src.ch08.p1_count_syllables + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: src.ch08 + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/src.rst b/docs/source/src.rst index c27558a..63a119c 100644 --- a/docs/source/src.rst +++ b/docs/source/src.rst @@ -13,6 +13,7 @@ Subpackages src.ch05 src.ch06 src.ch07 + src.ch08 Module contents --------------- diff --git a/requirements.txt b/requirements.txt index 71721d3..9250f1d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -python-docx==0.8.10 \ No newline at end of file +python-docx==0.8.10 +nltk==3.4.5 \ No newline at end of file diff --git a/src/ch08/__init__.py b/src/ch08/__init__.py new file mode 100644 index 0000000..6bb9e9d --- /dev/null +++ b/src/ch08/__init__.py @@ -0,0 +1 @@ +"""Chapter 8.""" diff --git a/src/ch08/p1_count_syllables.py b/src/ch08/p1_count_syllables.py new file mode 100644 index 0000000..90a2a00 --- /dev/null +++ b/src/ch08/p1_count_syllables.py @@ -0,0 +1,113 @@ +"""Test count_syllables with a word dictionary file. + +Randomly select words from a word dictionary file and pass them through +:func:`count_syllables` to find their syllable counts. Output each word with +their respective syllable count. + +Attributes: + CMUDICT (dict): Dictionary of CMUdict's phonemes with the word as a key + and its phonemes as a list of lists. + MISSING_WORDS (dict): Dictionary with syllable counts of words + missing from CMUdict's phoneme list where the word is the key and + its syllable count as an integer value. + +""" +import json +import os +from random import sample +from string import punctuation + +import nltk +from nltk.corpus import cmudict + +from src.ch02 import DICTIONARY_FILE_PATH +from src.ch02.p1_cleanup_dictionary import cleanup_dict + +if not os.path.exists( + os.path.expanduser('~/nltk_data/corpora/cmudict/cmudict')): + # pylint: disable=fixme + # FIXME: This is nearly impossible to test. + # Patching os affects every use of os in the module. + nltk.download('cmudict') + +# Convert CMUdict into a dictionary. +CMUDICT = cmudict.dict() + +with open(os.path.join(os.path.dirname(__file__), + 'p1files/missing_words.json')) as in_file: + # Load local dictionary of words with syllable counts. + # Words as strings are keys and integers are values. + MISSING_WORDS = json.load(in_file) + + +def format_words(words: str) -> list: + """Format words for processing. + + Remove hyphens, convert to lowercase, and strip both punctuation and + possessives from word or phrase. + + Args: + words (str): Word or phrase to format for processing. + + Returns: + List of strings containing processed words. + + """ + words = words.replace('-', ' ') + word_list = words.lower().split() + for i, word in enumerate(word_list): + word = word.strip(punctuation) + if any([word.endswith("'s"), word.endswith("’s")]): + word_list[i] = word[:-2] + else: + word_list[i] = word + return word_list + + +def count_syllables(words: list) -> int: + """Use CMUdict to count syllables in English word. + + Calculate sum of syllable counts for each word in **words**. Checks + syllable counts in the :py:mod:`nltk.corpus` CMUdict phoneme list, if word + is not found in CMUdict, also checks local dictionary with syllable + counts. + + Args: + words (list): List of strings to sum number of syllables. + + Returns: + Integer representing number of syllables in **words**. + + Note: + Defaults to first element in CMUdict phoneme list. So, multiple + syllable counts are ignored. + + """ + syllables = 0 + for word in words: + if word in MISSING_WORDS: + syllables += MISSING_WORDS[word] + else: + for phonemes in CMUDICT[word][0]: + for phoneme in phonemes: + if phoneme[-1].isdigit(): + syllables += 1 + return syllables + + +def main(): + """Demonstrate count_syllables with a word dictionary file.""" + word_list = cleanup_dict(DICTIONARY_FILE_PATH) + sample_list = sample(word_list, 15) + for word in sample_list: + try: + syllables = count_syllables(format_words(word)) + except KeyError: + # Skip words in neither dictionary. + print(f'Not found: {word}') + continue + print(f'{word} {syllables}') + + +if __name__ == '__main__': + main() diff --git a/src/ch08/p1files/missing_words.json b/src/ch08/p1files/missing_words.json new file mode 100644 index 0000000..99100cc --- /dev/null +++ b/src/ch08/p1files/missing_words.json @@ -0,0 +1 @@ +{"househusband": 3, "ibices": 3, "smooching": 2, "handpicking": 3, "tuxes": 2} \ No newline at end of file diff --git a/tests/data/ch08/dictionary.txt b/tests/data/ch08/dictionary.txt new file mode 100644 index 0000000..4e77037 --- /dev/null +++ b/tests/data/ch08/dictionary.txt @@ -0,0 +1,78 @@ +a +aardvark +abracadabra +b +bee +boson +c +cat +catatonic +d +dog +dirge +e +echo +ebeneezer +f +fox +finicky +g +gecko +gopher +h +hemoglobin +hermit +i +imp +indigo +j +jack-o-lantern +journey +k +kangaroo +kilometer +l +lemon +lime +m +mesolithic +moonlight +n +none +night +o +opaque +opulent +p +penny +pepper +q +quasar +quark +r +riddle +rubber +s +slight +swift +t +tonberry +tomato +u +ultraviolet +umbra +v +venus +vertiginous +w +whip +whirl +x +xena +xenon +y +yacht +yggdrasil +z +zen +zero \ No newline at end of file diff --git a/tests/data/ch08/main/count_syllables.txt b/tests/data/ch08/main/count_syllables.txt new file mode 100644 index 0000000..036a7b9 --- /dev/null +++ b/tests/data/ch08/main/count_syllables.txt @@ -0,0 +1,15 @@ +Not found: yggdrasil +dog 1 +hermit 2 +jack-o-lantern 4 +journey 2 +bee 1 +abracadabra 5 +penny 2 +hemoglobin 4 +opaque 2 +venus 2 +umbra 2 +cat 1 +whirl 1 +zen 1 diff --git a/tests/test_chapter08.py b/tests/test_chapter08.py new file mode 100644 index 0000000..e8d1a3f --- /dev/null +++ b/tests/test_chapter08.py @@ -0,0 +1,58 @@ +"""Test Chapter 8.""" +import unittest.mock +import os +from random import Random +from io import StringIO + +import src.ch08.p1_count_syllables as count_syllables + + +class TestCountSyllables(unittest.TestCase): + """Test Count Syllables.""" + + @classmethod + def setUpClass(cls): + """Configure attributes for use in this class only.""" + cls.random = Random() + + def test_format_words(self): + """Test format_words.""" + # Test convert to lowercase. + for word in ['YOU', 'You', 'yOu', 'yoU', 'yOU', 'YOu', 'YoU', 'you']: + self.assertEqual(count_syllables.format_words(word), ['you']) + # Test remove hyphens. + self.assertEqual(count_syllables.format_words('nit-pick'), ['nit', 'pick']) + # Test remove punctuation. + self.assertEqual(count_syllables.format_words('nit-pick!'), ['nit', 'pick']) + # Test remove possessives. + for word in ['test’s', 'test\'s']: + self.assertEqual(count_syllables.format_words(word), ['test']) + # Test phrase. + self.assertEqual(count_syllables.format_words('TEST nit-pick'), ['test', 'nit', 'pick']) + + def test_count_syllables(self): + """Test count_syllables.""" + # Test word not in CMUdict. + self.assertEqual(count_syllables.count_syllables(['tuxes']), 2) + # Test word in CMUdict. + self.assertEqual(count_syllables.count_syllables(['test']), 1) + + @unittest.mock.patch('src.ch08.p1_count_syllables.DICTIONARY_FILE_PATH', 'tests/data/ch08/dictionary.txt') + @unittest.mock.patch('sys.stdout', new_callable=StringIO) + @unittest.mock.patch('src.ch08.p1_count_syllables.sample') + def test_main(self, mock_sample, mock_stdout): + """Test demo main function.""" + self.random.seed(222) + mock_sample.side_effect = self.random.sample + + count_syllables.main() + + # Test sys.stdout output. + with open(os.path.normpath('tests/data/ch08/main/count_syllables.txt'), + 'r') as file: + file_data = ''.join(file.readlines()) + self.assertEqual(mock_stdout.getvalue(), file_data) + + +if __name__ == '__main__': + unittest.main() diff --git a/tox.ini b/tox.ini index e5fe121..a7984e2 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,7 @@ [tox] envlist = py36, py37, lint, pydocstyle, sphinx skip_missing_interpreters = True -skipsdist=True +skipsdist = True [testenv] deps =