Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
1fd6af7
Add nltk 3.4.5
JoseALermaIII Oct 18, 2019
87fb3b1
Initial commit
JoseALermaIII Oct 19, 2019
7486627
Add test_count_syllables to TestCountSyllables
JoseALermaIII Oct 19, 2019
3876c4f
Initial commit
JoseALermaIII Oct 19, 2019
941fe04
Add test_main and setUpClass to TestCountSyllables
JoseALermaIII Oct 19, 2019
4cc5d4e
Initial commit
JoseALermaIII Oct 19, 2019
a1edcc5
Add src.ch08
JoseALermaIII Oct 19, 2019
3dc409a
Fix spacing for skipsdist
JoseALermaIII Oct 19, 2019
56576bb
Add nltk to intersphinx_mapping
JoseALermaIII Oct 19, 2019
2a3ac2a
Add sphinx reference to nltk in count_syllables docstring
JoseALermaIII Oct 19, 2019
e65fac8
Add another test to test_format_words
JoseALermaIII Oct 19, 2019
e0a9d50
Add nltk.corpus.cmudict to install section
JoseALermaIII Oct 19, 2019
321281e
Add nltk to pip install
JoseALermaIII Oct 19, 2019
8519175
Revert "Add nltk to pip install"
JoseALermaIII Oct 19, 2019
ae8fde9
Revert "Add nltk.corpus.cmudict to install section"
JoseALermaIII Oct 19, 2019
34b6b61
Add cmudict download to module
JoseALermaIII Oct 19, 2019
6f37d7c
Add path check to cmudict download
JoseALermaIII Oct 19, 2019
bd92303
Fix pylint line-too-long and locally disable fixme
JoseALermaIII Oct 19, 2019
52c96a0
Add nltk corpora directory to cache
JoseALermaIII Oct 19, 2019
fc3663a
Add attributes section to module docstring
JoseALermaIII Oct 22, 2019
d2c3f56
Refactor main to use random sample
JoseALermaIII Oct 25, 2019
b99cc36
Refactor main to display words in neither dictionary
JoseALermaIII Oct 25, 2019
4a73bd4
Refactor tests to reflect changes
JoseALermaIII Oct 25, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
# https://travis-ci.com/JoseALermaIII
dist: bionic # required for Python >= 3.7
language: python
cache: pip # Don't delete pip install
cache:
pip: true # Don't delete pip install
directories:
- $HOME/nltk_data/corpora/
Comment on lines -9 to +12
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Caching the nltk corpora should relieve load on the nltk servers.

# Branch safelist
branches:
only:
Expand Down
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
intersphinx_mapping = {
'python': ('https://docs.python.org/3/', None),
'docx': ('https://python-docx.readthedocs.io/en/latest/', None),
'nltk': ('http://www.nltk.org/', None),
}

# Default options for autodoc directives.
Expand Down
22 changes: 22 additions & 0 deletions docs/source/src.ch08.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
src.ch08 package
================

Submodules
----------

src.ch08.p1\_count\_syllables module
------------------------------------

.. automodule:: src.ch08.p1_count_syllables
:members:
:undoc-members:
:show-inheritance:


Module contents
---------------

.. automodule:: src.ch08
:members:
:undoc-members:
:show-inheritance:
1 change: 1 addition & 0 deletions docs/source/src.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Subpackages
src.ch05
src.ch06
src.ch07
src.ch08

Module contents
---------------
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
python-docx==0.8.10
python-docx==0.8.10
nltk==3.4.5
1 change: 1 addition & 0 deletions src/ch08/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Chapter 8."""
113 changes: 113 additions & 0 deletions src/ch08/p1_count_syllables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""Test count_syllables with a word dictionary file.
Randomly select words from a word dictionary file and pass them through
:func:`count_syllables` to find their syllable counts. Output each word with
their respective syllable count.
Attributes:
CMUDICT (dict): Dictionary of CMUdict's phonemes with the word as a key
and its phonemes as a list of lists.
MISSING_WORDS (dict): Dictionary with syllable counts of words
missing from CMUdict's phoneme list where the word is the key and
its syllable count as an integer value.
"""
import json
import os
from random import sample
from string import punctuation

import nltk
from nltk.corpus import cmudict

from src.ch02 import DICTIONARY_FILE_PATH
from src.ch02.p1_cleanup_dictionary import cleanup_dict

if not os.path.exists(
os.path.expanduser('~/nltk_data/corpora/cmudict/cmudict')):
# pylint: disable=fixme
# FIXME: This is nearly impossible to test.
# Patching os affects every use of os in the module.
nltk.download('cmudict')

# Convert CMUdict into a dictionary.
CMUDICT = cmudict.dict()

with open(os.path.join(os.path.dirname(__file__),
'p1files/missing_words.json')) as in_file:
# Load local dictionary of words with syllable counts.
# Words as strings are keys and integers are values.
MISSING_WORDS = json.load(in_file)


def format_words(words: str) -> list:
"""Format words for processing.
Remove hyphens, convert to lowercase, and strip both punctuation and
possessives from word or phrase.
Args:
words (str): Word or phrase to format for processing.
Returns:
List of strings containing processed words.
"""
words = words.replace('-', ' ')
word_list = words.lower().split()
for i, word in enumerate(word_list):
word = word.strip(punctuation)
if any([word.endswith("'s"), word.endswith("’s")]):
word_list[i] = word[:-2]
else:
word_list[i] = word
return word_list


def count_syllables(words: list) -> int:
"""Use CMUdict to count syllables in English word.
Calculate sum of syllable counts for each word in **words**. Checks
syllable counts in the :py:mod:`nltk.corpus` CMUdict phoneme list, if word
is not found in CMUdict, also checks local dictionary with syllable
counts.
Args:
words (list): List of strings to sum number of syllables.
Returns:
Integer representing number of syllables in **words**.
Note:
Defaults to first element in CMUdict phoneme list. So, multiple
syllable counts are ignored.
"""
syllables = 0
for word in words:
if word in MISSING_WORDS:
syllables += MISSING_WORDS[word]
else:
for phonemes in CMUDICT[word][0]:
for phoneme in phonemes:
Comment on lines +89 to +92
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These can be implemented as MISSING_WORDS.get(word) and CMUDICT.get(word)[0]; however, the get() method doesn't raise a KeyError if the key isn't present. Instead, it returns a value of None by default, but this behavior can be changed.

Here's the thing: I'd be willing to refactor the tests to account for this change in behavior if not for one small detail. Setting values would still be done by doing MISSING_WORDS[word] == value. There isn't a set() dictionary method. Probably for good reason, but as a result, I don't want to mix syntax.

if phoneme[-1].isdigit():
syllables += 1
return syllables


def main():
"""Demonstrate count_syllables with a word dictionary file."""
word_list = cleanup_dict(DICTIONARY_FILE_PATH)
sample_list = sample(word_list, 15)
for word in sample_list:
try:
syllables = count_syllables(format_words(word))
except KeyError:
# Skip words in neither dictionary.
print(f'Not found: {word}')
continue
print(f'{word} {syllables}')
Comment on lines +100 to +109
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After reviewing the book's solution, I agree that using sample() is a better way to randomly select words to use without duplicates and makes for better looping. I also agree that displaying missing words helps add them to either dictionary.



if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions src/ch08/p1files/missing_words.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"househusband": 3, "ibices": 3, "smooching": 2, "handpicking": 3, "tuxes": 2}
78 changes: 78 additions & 0 deletions tests/data/ch08/dictionary.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
a
aardvark
abracadabra
b
bee
boson
c
cat
catatonic
d
dog
dirge
e
echo
ebeneezer
f
fox
finicky
g
gecko
gopher
h
hemoglobin
hermit
i
imp
indigo
j
jack-o-lantern
journey
k
kangaroo
kilometer
l
lemon
lime
m
mesolithic
moonlight
n
none
night
o
opaque
opulent
p
penny
pepper
q
quasar
quark
r
riddle
rubber
s
slight
swift
t
tonberry
tomato
u
ultraviolet
umbra
v
venus
vertiginous
w
whip
whirl
x
xena
xenon
y
yacht
yggdrasil
z
zen
zero
15 changes: 15 additions & 0 deletions tests/data/ch08/main/count_syllables.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Not found: yggdrasil
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd love to see a phoneme list for yggdrasil. In the meantime, I'd now be able to add it to missing_words.json as "yggdrasil": 3, which is handy.

dog 1
hermit 2
jack-o-lantern 4
journey 2
bee 1
abracadabra 5
penny 2
hemoglobin 4
opaque 2
venus 2
umbra 2
cat 1
whirl 1
zen 1
58 changes: 58 additions & 0 deletions tests/test_chapter08.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Test Chapter 8."""
import unittest.mock
import os
from random import Random
from io import StringIO

import src.ch08.p1_count_syllables as count_syllables


class TestCountSyllables(unittest.TestCase):
"""Test Count Syllables."""

@classmethod
def setUpClass(cls):
"""Configure attributes for use in this class only."""
cls.random = Random()

def test_format_words(self):
"""Test format_words."""
# Test convert to lowercase.
for word in ['YOU', 'You', 'yOu', 'yoU', 'yOU', 'YOu', 'YoU', 'you']:
self.assertEqual(count_syllables.format_words(word), ['you'])
# Test remove hyphens.
self.assertEqual(count_syllables.format_words('nit-pick'), ['nit', 'pick'])
# Test remove punctuation.
self.assertEqual(count_syllables.format_words('nit-pick!'), ['nit', 'pick'])
# Test remove possessives.
for word in ['test’s', 'test\'s']:
self.assertEqual(count_syllables.format_words(word), ['test'])
# Test phrase.
self.assertEqual(count_syllables.format_words('TEST nit-pick'), ['test', 'nit', 'pick'])

def test_count_syllables(self):
"""Test count_syllables."""
# Test word not in CMUdict.
self.assertEqual(count_syllables.count_syllables(['tuxes']), 2)
# Test word in CMUdict.
self.assertEqual(count_syllables.count_syllables(['test']), 1)

@unittest.mock.patch('src.ch08.p1_count_syllables.DICTIONARY_FILE_PATH', 'tests/data/ch08/dictionary.txt')
@unittest.mock.patch('sys.stdout', new_callable=StringIO)
@unittest.mock.patch('src.ch08.p1_count_syllables.sample')
def test_main(self, mock_sample, mock_stdout):
"""Test demo main function."""
self.random.seed(222)
mock_sample.side_effect = self.random.sample

count_syllables.main()

# Test sys.stdout output.
with open(os.path.normpath('tests/data/ch08/main/count_syllables.txt'),
'r') as file:
file_data = ''.join(file.readlines())
self.assertEqual(mock_stdout.getvalue(), file_data)


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[tox]
envlist = py36, py37, lint, pydocstyle, sphinx
skip_missing_interpreters = True
skipsdist=True
skipsdist = True

[testenv]
deps =
Expand Down