From da8a3ceec10df0ce38190c19bb65fdc63f437f4e Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Mon, 6 Feb 2017 22:30:29 +0000 Subject: [PATCH 1/6] Improve imperative mood check using stemmer/wordlist --- requirements/runtime.txt | 1 + setup.py | 4 + src/pydocstyle/checker.py | 16 +- src/pydocstyle/data/imperatives.txt | 232 ++++++++++++++++++ src/pydocstyle/data/imperatives_blacklist.txt | 109 ++++++++ src/pydocstyle/parser.py | 11 + src/pydocstyle/violations.py | 4 +- src/pydocstyle/wordlists.py | 39 +++ src/tests/test_cases/capitalization.py | 4 +- src/tests/test_cases/sections.py | 22 +- src/tests/test_cases/test.py | 8 +- tox.ini | 8 +- 12 files changed, 438 insertions(+), 20 deletions(-) create mode 100644 src/pydocstyle/data/imperatives.txt create mode 100644 src/pydocstyle/data/imperatives_blacklist.txt create mode 100644 src/pydocstyle/wordlists.py diff --git a/requirements/runtime.txt b/requirements/runtime.txt index e69de29b..96db5be8 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -0,0 +1 @@ +snowballstemmer==1.2.1 diff --git a/setup.py b/setup.py index 3e73303d..76ad1b0b 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,10 @@ keywords='pydocstyle, PEP 257, pep257, PEP 8, pep8, docstrings', packages=('pydocstyle',), package_dir={'': 'src'}, + package_data={'pydocstyle': ['data/*.txt']}, + install_requires=[ + 'snowballstemmer==1.2.1', + ], entry_points={ 'console_scripts': [ 'pydocstyle = pydocstyle.cli:main', diff --git a/src/pydocstyle/checker.py b/src/pydocstyle/checker.py index c94a1cf3..6454764e 100644 --- a/src/pydocstyle/checker.py +++ b/src/pydocstyle/checker.py @@ -13,6 +13,7 @@ from .parser import (Package, Module, Class, NestedClass, Definition, AllError, Method, Function, NestedFunction, Parser, StringIO) from .utils import log, is_blank +from .wordlists import IMPERATIVE_VERBS, IMPERATIVE_BLACKLIST, stem __all__ = ('check', ) @@ -361,12 +362,21 @@ def check_imperative_mood(self, function, docstring): # def context "Returns the pathname ...". """ - if docstring: + if docstring and not function.is_test: stripped = ast.literal_eval(docstring).strip() if stripped: first_word = stripped.split()[0] - if first_word.endswith('s') and not first_word.endswith('ss'): - return violations.D401(first_word[:-1], first_word) + check_word = first_word.lower() + + if check_word in IMPERATIVE_BLACKLIST: + return violations.D401b(first_word) + + correct_form = IMPERATIVE_VERBS.get(stem(check_word)) + if correct_form and correct_form != check_word: + return violations.D401( + correct_form.capitalize(), + first_word + ) @check_for(Function) def check_no_signature(self, function, docstring): # def context diff --git a/src/pydocstyle/data/imperatives.txt b/src/pydocstyle/data/imperatives.txt new file mode 100644 index 00000000..382e0f07 --- /dev/null +++ b/src/pydocstyle/data/imperatives.txt @@ -0,0 +1,232 @@ +# Imperative forms of verbs +# +# This file contains the imperative form of frequently encountered +# docstring verbs. Some of these may be more commonly encountered as +# nouns, but blacklisting them for this may cause false positives. +accept +access +add +adjust +aggregate +allow +append +apply +archive +assert +assign +attempt +authenticate +authorize +break +build +cache +calculate +call +cancel +capture +change +check +clean +clear +close +collect +combine +commit +compare +compute +configure +confirm +connect +construct +control +convert +copy +count +create +customize +declare +decode +decorate +define +delegate +delete +deprecate +derive +describe +detect +determine +display +download +drop +dump +emit +empty +enable +encapsulate +encode +end +ensure +enumerate +establish +evaluate +examine +execute +exit +expand +expect +export +extend +extract +feed +fetch +fill +filter +finalize +find +fire +fix +flag +force +format +forward +generate +get +give +go +group +handle +help +hold +identify +implement +import +indicate +init +initalise +initialise +initialize +input +insert +instantiate +intercept +invoke +iterate +join +keep +launch +list +listen +load +log +look +make +manage +manipulate +map +mark +match +merge +mock +modify +monitor +move +normalize +note +obtain +open +output +override +overwrite +pad +parse +partial +pass +perform +persist +pick +plot +poll +populate +post +prepare +print +process +produce +provide +publish +pull +put +query +raise +read +record +refer +refresh +register +reload +remove +rename +render +replace +reply +report +represent +request +require +reset +resolve +retrieve +return +roll +rollback +round +run +sample +save +scan +search +select +send +serialise +serialize +serve +set +show +simulate +source +specify +split +start +step +stop +store +strip +submit +subscribe +sum +swap +sync +synchronise +synchronize +take +tear +test +time +transform +translate +transmit +truncate +try +turn +tweak +update +upload +use +validate +verify +view +wait +walk +wrap +write +yield diff --git a/src/pydocstyle/data/imperatives_blacklist.txt b/src/pydocstyle/data/imperatives_blacklist.txt new file mode 100644 index 00000000..404aef82 --- /dev/null +++ b/src/pydocstyle/data/imperatives_blacklist.txt @@ -0,0 +1,109 @@ +# Blacklisted imperative words +# +# These are words that, if they begin a docstring, are a good indicator that +# the docstring is not written in an imperative voice. +# +# The words included in this list fall into a number of categories: +# +# - Starting with a noun/pronoun indicates that the docstring is a noun phrase +# or a sentence but not in the imperative mood +# - Adjectives are always followed by a noun, so same +# - Particles are also followed by a noun +# - Some adverbs don't really indicate an imperative sentence, for example +# "importantly" or "currently". +# - Some irregular verb forms that don't stem to the same string as the +# imperative does (eg. 'does') +:param +:return +:return: +:returns +:returns: +:rtype +a +an +the +action +always +api +base +basic +business +calculation +callback +collection +common +constructor +convenience +convenient +current +currently +custom +data +data +default +deprecated +description +description: +dict +dictionary +does +dummy +example +factory +false +final +formula +function +generic +handler +handler +helper +here +hook +implementation +importantly +internal +it +main +method +module +new +number +optional +package +parameters: +placeholder +reference +result +returns: +same +schema +setup +should +simple +some +special +sql +standard +static +string +subclasses +that +these +this +true +unique +unit +utility +what +wrapper + + +# These are nouns, but often used in the context of functions that act as +# objects; thus we do not blacklist these. +# +# context # as in context manager +# decorator +# class # as in class decorator +# property +# generator diff --git a/src/pydocstyle/parser.py b/src/pydocstyle/parser.py index c8eb3c65..8b3f6630 100644 --- a/src/pydocstyle/parser.py +++ b/src/pydocstyle/parser.py @@ -133,6 +133,17 @@ def is_public(self): else: return not self.name.startswith('_') + @property + def is_test(self): + """Return True if this function is a test function/method. + + We exclude tests from the imperative mood check, because to phrase + their docstring in the imperative mood, they would have to start with + a highly redundant "Test that ...". + + """ + return self.name.startswith('test') or self.name == 'runTest' + class NestedFunction(Function): """A Python source code nested function.""" diff --git a/src/pydocstyle/violations.py b/src/pydocstyle/violations.py index 40381f76..4e05f7d9 100644 --- a/src/pydocstyle/violations.py +++ b/src/pydocstyle/violations.py @@ -202,7 +202,9 @@ def to_rst(cls): D400 = D4xx.create_error('D400', 'First line should end with a period', 'not {0!r}') D401 = D4xx.create_error('D401', 'First line should be in imperative mood', - '{0!r}, not {1!r}') + "'{0}', not '{1}'") +D401b = D4xx.create_error('D401', 'First line should be in imperative mood; ' + 'try rephrasing', "found '{0}'") D402 = D4xx.create_error('D402', 'First line should not be the function\'s ' '"signature"') D403 = D4xx.create_error('D403', 'First word of the first line should be ' diff --git a/src/pydocstyle/wordlists.py b/src/pydocstyle/wordlists.py new file mode 100644 index 00000000..776b87bb --- /dev/null +++ b/src/pydocstyle/wordlists.py @@ -0,0 +1,39 @@ +"""Wordlists loaded from package data. + +We can treat them as part of the code for the imperative mood check, and +therefore we load them at import time, rather than on-demand. + +""" +import re +import pkgutil +import snowballstemmer + + +#: Regular expression for stripping comments from the wordlists +COMMENT_RE = re.compile(r'\s*#.*') + +#: Stemmer function for stemming words in English +stem = snowballstemmer.stemmer('english').stemWord + + +def load_wordlist(name): + """Iterate over lines of a wordlist data file. + + `name` should be the name of a package data file within the data/ + directory. + + Whitespace and #-prefixed comments are stripped from each line. + + """ + text = pkgutil.get_data('pydocstyle', 'data/' + name).decode('utf8') + for l in text.splitlines(): + l = COMMENT_RE.sub('', l).strip() + if l: + yield l + + +#: A dict mapping stemmed verbs to the imperative form +IMPERATIVE_VERBS = {stem(v): v for v in load_wordlist('imperatives.txt')} + +#: Words that are forbidden to appear as the first word in a docstring +IMPERATIVE_BLACKLIST = set(load_wordlist('imperatives_blacklist.txt')) diff --git a/src/tests/test_cases/capitalization.py b/src/tests/test_cases/capitalization.py index b688767a..91ecf45c 100644 --- a/src/tests/test_cases/capitalization.py +++ b/src/tests/test_cases/capitalization.py @@ -55,6 +55,6 @@ def more_partial_caps(): @expect("D403: First word of the first line should be properly capitalized " - "('A', not 'a')") + "('Generate', not 'generate')") def just_one_more_example(): - """a function.""" + """generate a function.""" diff --git a/src/tests/test_cases/sections.py b/src/tests/test_cases/sections.py index a3c7dec0..22e04d14 100644 --- a/src/tests/test_cases/sections.py +++ b/src/tests/test_cases/sections.py @@ -13,7 +13,7 @@ @expect("D405: Section name should be properly capitalized " "('Returns', not 'returns')") def not_capitalized(): - """Valid headline. + """Toggle the gizmo. returns ------- @@ -25,7 +25,7 @@ def not_capitalized(): @expect("D406: Section name should end with a newline " "('Returns', not 'Returns:')") def superfluous_suffix(): - """Valid headline. + """Toggle the gizmo. Returns: ------- @@ -36,7 +36,7 @@ def superfluous_suffix(): @expect(_D213) @expect("D407: Missing dashed underline after section ('Returns')") def no_underline(): - """Valid headline. + """Toggle the gizmo. Returns @@ -47,7 +47,7 @@ def no_underline(): @expect("D408: Section underline should be in the line following the " "section's name ('Returns')") def blank_line_before_underline(): - """Valid headline. + """Toggle the gizmo. Returns @@ -60,7 +60,7 @@ def blank_line_before_underline(): @expect("D409: Section underline should match the length of its name " "(Expected 7 dashes in section 'Returns', got 2)") def bad_underline_length(): - """Valid headline. + """Toggle the gizmo. Returns -- @@ -71,7 +71,7 @@ def bad_underline_length(): @expect(_D213) @expect("D410: Missing blank line after section ('Returns')") def no_blank_line_after_section(): - """Valid headline. + """Toggle the gizmo. Returns ------- @@ -82,7 +82,7 @@ def no_blank_line_after_section(): @expect(_D213) @expect("D411: Missing blank line before section ('Returns')") def no_blank_line_before_section(): - """Valid headline. + """Toggle the gizmo. The function's description. Returns @@ -94,7 +94,7 @@ def no_blank_line_before_section(): @expect(_D213) @expect("D214: Section is over-indented ('Returns')") def section_overindented(): - """Valid headline. + """Toggle the gizmo. Returns ------- @@ -105,7 +105,7 @@ def section_overindented(): @expect(_D213) @expect("D215: Section underline is over-indented (in section 'Returns')") def section_underline_overindented(): - """Valid headline. + """Toggle the gizmo. Returns ------- @@ -115,7 +115,7 @@ def section_underline_overindented(): @expect(_D213) def ignore_non_actual_section(): - """Valid headline. + """Toggle the gizmo. This is the function's description, which will also specify what it returns @@ -148,7 +148,7 @@ def section_name_in_first_line(): @expect("D407: Missing dashed underline after section ('Raises')") @expect("D410: Missing blank line after section ('Raises')") def multiple_sections(): - """Valid headline. + """Toggle the gizmo. Short summary ------------- diff --git a/src/tests/test_cases/test.py b/src/tests/test_cases/test.py index d033df4c..877c114f 100644 --- a/src/tests/test_cases/test.py +++ b/src/tests/test_cases/test.py @@ -283,6 +283,12 @@ def liouiwnlkjl(): """Returns foo.""" +@expect("D401: First line should be in imperative mood; try rephrasing " + "(found 'Constructor')") +def sdgfsdg23245(): + """Constructor for a foo.""" + + @expect('D402: First line should not be the function\'s "signature"') def foobar(): """Signature: foobar().""" @@ -323,7 +329,7 @@ def docstring_start_in_same_line(): """First Line. def function_with_lambda_arg(x=lambda y: y): - """A valid docstring.""" + """Wrap the given lambda.""" @expect('D213: Multi-line docstring summary should start at the second line') diff --git a/tox.ini b/tox.ini index 2dab0918..5011381c 100644 --- a/tox.ini +++ b/tox.ini @@ -12,11 +12,15 @@ setenv = LANG=C LC_ALL=C commands = py.test --pep8 --cache-clear -deps = -rrequirements/tests.txt +deps = + -rrequirements/runtime.txt + -rrequirements/tests.txt [testenv:docs] changedir=docs -deps = -rrequirements/docs.txt +deps = + -rrequirements/runtime.txt + -rrequirements/docs.txt commands=sphinx-build -b html . _build [pytest] From 53a312a3df125969d8da1ef773c98fdc62a5f84e Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Mon, 27 Feb 2017 21:28:17 +0000 Subject: [PATCH 2/6] Address test failures from merge --- src/tests/test_cases/sections.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/test_cases/sections.py b/src/tests/test_cases/sections.py index e8912f8d..9d611437 100644 --- a/src/tests/test_cases/sections.py +++ b/src/tests/test_cases/sections.py @@ -49,7 +49,7 @@ def no_underline(): @expect(_D213) @expect("D407: Missing dashed underline after section ('Returns')") def no_underline(): - """Valid headline. + """Toggle the gizmo. Returns @@ -62,7 +62,7 @@ def no_underline(): @expect("D411: Missing blank line before section ('Yields')") @expect("D414: Section has no content ('Yields')") def consecutive_sections(): - """Valid headline. + """Toggle the gizmo. Returns ------- @@ -156,7 +156,7 @@ def section_underline_overindented(): @expect("D413: Missing blank line after last section ('Returns')") @expect("D414: Section has no content ('Returns')") def section_underline_overindented_and_contentless(): - """Valid headline. + """Toggle the gizmo. Returns ------- From fb23a9bfdc01ba66d5776e74f7c03412375791df Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Mon, 27 Feb 2017 21:44:15 +0000 Subject: [PATCH 3/6] Configure docs to build with ReadTheDocs theme --- docs/conf.py | 7 ++++++- requirements/docs.txt | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 0265e939..a942f231 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -103,7 +103,12 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +try: + import sphinx_rtd_theme +except ImportError: + html_theme = 'default' +else: + html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/requirements/docs.txt b/requirements/docs.txt index 388d6123..558c484d 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1 +1,2 @@ -sphinxcontrib-issuetracker \ No newline at end of file +sphinxcontrib-issuetracker +sphinx_rtd_theme From bd86b9b44de241341bf3a7d9d553c4fa33a91bab Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Mon, 27 Feb 2017 21:44:59 +0000 Subject: [PATCH 4/6] Remove Sphinx markup junk from wordlists --- src/pydocstyle/data/imperatives_blacklist.txt | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/pydocstyle/data/imperatives_blacklist.txt b/src/pydocstyle/data/imperatives_blacklist.txt index 404aef82..5057b9fc 100644 --- a/src/pydocstyle/data/imperatives_blacklist.txt +++ b/src/pydocstyle/data/imperatives_blacklist.txt @@ -13,12 +13,6 @@ # "importantly" or "currently". # - Some irregular verb forms that don't stem to the same string as the # imperative does (eg. 'does') -:param -:return -:return: -:returns -:returns: -:rtype a an the @@ -43,7 +37,6 @@ data default deprecated description -description: dict dictionary does @@ -71,11 +64,9 @@ new number optional package -parameters: placeholder reference result -returns: same schema setup From f24168edd201ce2b1a7cf24c631dbdc3c415eda5 Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Mon, 27 Feb 2017 21:45:49 +0000 Subject: [PATCH 5/6] Don't use 'l' as a variable name --- src/pydocstyle/wordlists.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pydocstyle/wordlists.py b/src/pydocstyle/wordlists.py index 776b87bb..1180c689 100644 --- a/src/pydocstyle/wordlists.py +++ b/src/pydocstyle/wordlists.py @@ -26,10 +26,10 @@ def load_wordlist(name): """ text = pkgutil.get_data('pydocstyle', 'data/' + name).decode('utf8') - for l in text.splitlines(): - l = COMMENT_RE.sub('', l).strip() - if l: - yield l + for line in text.splitlines(): + line = COMMENT_RE.sub('', line).strip() + if line: + yield line #: A dict mapping stemmed verbs to the imperative form From 490edfaab48ec010f53e1653c2ec4593fcd11a44 Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Tue, 7 Mar 2017 19:07:00 +0000 Subject: [PATCH 6/6] Remove version number for snowballstemmer --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 76ad1b0b..0d6cc2fe 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ package_dir={'': 'src'}, package_data={'pydocstyle': ['data/*.txt']}, install_requires=[ - 'snowballstemmer==1.2.1', + 'snowballstemmer', ], entry_points={ 'console_scripts': [