Skip to content

Commit

Permalink
Fixes to make validate_docstrings.py not generate warnings or unwante…
Browse files Browse the repository at this point in the history
…d output (pandas-dev#23552)
  • Loading branch information
datapythonista authored and Pingviinituutti committed Feb 28, 2019
1 parent cf12120 commit 3e6f961
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 115 deletions.
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5100,7 +5100,7 @@ def get_ftype_counts(self):
1 b 2 2.0
2 c 3 3.0
>>> df.get_ftype_counts()
>>> df.get_ftype_counts() # doctest: +SKIP
float64:dense 1
int64:dense 1
object:dense 1
Expand Down
14 changes: 5 additions & 9 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1875,35 +1875,31 @@ def get_duplicates(self):
Works on different Index of types.
>>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates()
>>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() # doctest: +SKIP
[2, 3]
>>> pd.Index([1., 2., 2., 3., 3., 3., 4.]).get_duplicates()
[2.0, 3.0]
>>> pd.Index(['a', 'b', 'b', 'c', 'c', 'c', 'd']).get_duplicates()
['b', 'c']
Note that for a DatetimeIndex, it does not return a list but a new
DatetimeIndex:
>>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03',
... '2018-01-03', '2018-01-04', '2018-01-04'],
... format='%Y-%m-%d')
>>> pd.Index(dates).get_duplicates()
>>> pd.Index(dates).get_duplicates() # doctest: +SKIP
DatetimeIndex(['2018-01-03', '2018-01-04'],
dtype='datetime64[ns]', freq=None)
Sorts duplicated elements even when indexes are unordered.
>>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates()
>>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() # doctest: +SKIP
[2, 3]
Return empty array-like structure when all elements are unique.
>>> pd.Index([1, 2, 3, 4]).get_duplicates()
>>> pd.Index([1, 2, 3, 4]).get_duplicates() # doctest: +SKIP
[]
>>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'],
... format='%Y-%m-%d')
>>> pd.Index(dates).get_duplicates()
>>> pd.Index(dates).get_duplicates() # doctest: +SKIP
DatetimeIndex([], dtype='datetime64[ns]', freq=None)
"""
warnings.warn("'get_duplicates' is deprecated and will be removed in "
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,21 +1013,21 @@ def apply(self, func, axis='major', **kwargs):
Returns a Panel with the square root of each element
>>> p = pd.Panel(np.random.rand(4,3,2))
>>> p = pd.Panel(np.random.rand(4, 3, 2)) # doctest: +SKIP
>>> p.apply(np.sqrt)
Equivalent to p.sum(1), returning a DataFrame
>>> p.apply(lambda x: x.sum(), axis=1)
>>> p.apply(lambda x: x.sum(), axis=1) # doctest: +SKIP
Equivalent to previous:
>>> p.apply(lambda x: x.sum(), axis='major')
>>> p.apply(lambda x: x.sum(), axis='major') # doctest: +SKIP
Return the shapes of each DataFrame over axis 2 (i.e the shapes of
items x major), as a Series
>>> p.apply(lambda x: x.shape, axis=(0,1))
>>> p.apply(lambda x: x.shape, axis=(0,1)) # doctest: +SKIP
Returns
-------
Expand Down
7 changes: 0 additions & 7 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2156,13 +2156,6 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
`join`-keyword works as in other methods.
>>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2])
>>> s.str.cat(t, join=None, na_rep='-')
0 ad
1 ba
2 -e
3 dc
dtype: object
>>>
>>> s.str.cat(t, join='left', na_rep='-')
0 aa
1 b-
Expand Down
2 changes: 1 addition & 1 deletion pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class ParserWarning(Warning):
>>> csv = u'''a;b;c
... 1;1,8
... 1;2,1'''
>>> df = pd.read_csv(io.StringIO(csv), sep='[;,]')
>>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP
... # ParserWarning: Falling back to the 'python' engine...
Adding `engine='python'` to `pd.read_csv` removes the Warning:
Expand Down
4 changes: 2 additions & 2 deletions pandas/plotting/_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
... 'versicolor', 'setosa', 'virginica',
... 'setosa']
... })
>>> rad_viz = pd.plotting.radviz(df, 'Category')
>>> rad_viz = pd.plotting.radviz(df, 'Category') # doctest: +SKIP
"""
import matplotlib.pyplot as plt
import matplotlib.patches as patches
Expand Down Expand Up @@ -407,7 +407,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
:context: close-figs
>>> s = pd.Series(np.random.uniform(size=100))
>>> fig = pd.plotting.bootstrap_plot(s)
>>> fig = pd.plotting.bootstrap_plot(s) # doctest: +SKIP
"""
import random
import matplotlib.pyplot as plt
Expand Down
167 changes: 82 additions & 85 deletions scripts/tests/test_validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,10 +785,10 @@ def test_bad_examples(self, capsys, klass, func, msgs):
assert msg in ' '.join(err[1] for err in result['errors'])


class ApiItems(object):
class TestApiItems(object):
@property
def api_doc(self):
return textwrap.dedent(io.StringIO('''
return io.StringIO(textwrap.dedent('''
.. currentmodule:: itertools
Itertools
Expand Down Expand Up @@ -861,93 +861,90 @@ def test_item_subsection(self, idx, subsection):
assert result[idx][3] == subsection


class MainFunction(object):
def test_num_errors_for_validate_one(self, monkeypatch):
class TestMainFunction(object):
def test_exit_status_for_validate_one(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings, 'validate_one',
lambda func_name: {'docstring': 'docstring1',
'errors': [('ER01', 'err desc'),
('ER02', 'err desc')
('ER03', 'err desc')],
'warnings': [],
'examples_errors': ''})
num_errors = validate_docstrings.main(func_name='docstring1',
prefix=None,
errors=[],
output_format='default')
assert num_errors == 3

def test_no_num_errors_for_validate_one(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings, 'validate_one',
lambda func_name: {'docstring': 'docstring1',
'errors': [],
'warnings': [('WN01', 'warn desc')],
'examples_errors': ''})
num_errors = validate_docstrings.main(func_name='docstring1',
prefix=None,
errors=[],
output_format='default')
assert num_errors == 0

def test_num_errors_for_validate_all(self, monkeypatch):
validate_docstrings, 'validate_one', lambda func_name: {
'docstring': 'docstring1',
'errors': [('ER01', 'err desc'),
('ER02', 'err desc'),
('ER03', 'err desc')],
'warnings': [],
'examples_errors': ''})
exit_status = validate_docstrings.main(func_name='docstring1',
prefix=None,
errors=[],
output_format='default')
assert exit_status == 0

def test_exit_status_errors_for_validate_all(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings, 'validate_all',
lambda: {'docstring1': {'errors': [('ER01', 'err desc'),
('ER02', 'err desc'),
('ER03', 'err desc')]},
'docstring2': {'errors': [('ER04', 'err desc'),
('ER05', 'err desc')]}})
num_errors = validate_docstrings.main(func_name=None,
prefix=None,
errors=[],
output_format='default')
assert num_errors == 5

def test_no_num_errors_for_validate_all(self, monkeypatch):
validate_docstrings, 'validate_all', lambda prefix: {
'docstring1': {'errors': [('ER01', 'err desc'),
('ER02', 'err desc'),
('ER03', 'err desc')],
'file': 'module1.py',
'file_line': 23},
'docstring2': {'errors': [('ER04', 'err desc'),
('ER05', 'err desc')],
'file': 'module2.py',
'file_line': 925}})
exit_status = validate_docstrings.main(func_name=None,
prefix=None,
errors=[],
output_format='default')
assert exit_status == 5

def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings, 'validate_all',
lambda: {'docstring1': {'errors': [],
'warnings': [('WN01', 'warn desc')]},
'docstring2': {'errors': []}})
num_errors = validate_docstrings.main(func_name=None,
prefix=None,
errors=[],
output_format='default')
assert num_errors == 0

def test_prefix_param_filters_docstrings(self, monkeypatch):
validate_docstrings, 'validate_all', lambda prefix: {
'docstring1': {'errors': [],
'warnings': [('WN01', 'warn desc')]},
'docstring2': {'errors': []}})
exit_status = validate_docstrings.main(func_name=None,
prefix=None,
errors=[],
output_format='default')
assert exit_status == 0

def test_exit_status_for_validate_all_json(self, monkeypatch):
print('EXECUTED')
monkeypatch.setattr(
validate_docstrings, 'validate_all',
lambda: {'Series.foo': {'errors': [('ER01', 'err desc'),
('ER02', 'err desc'),
('ER03', 'err desc')]},
'DataFrame.bar': {'errors': [('ER04', 'err desc'),
('ER05', 'err desc')]},
'Series.foobar': {'errors': [('ER06', 'err desc')]}})
num_errors = validate_docstrings.main(func_name=None,
prefix='Series.',
errors=[],
output_format='default')
assert num_errors == 4
validate_docstrings, 'validate_all', lambda prefix: {
'docstring1': {'errors': [('ER01', 'err desc'),
('ER02', 'err desc'),
('ER03', 'err desc')]},
'docstring2': {'errors': [('ER04', 'err desc'),
('ER05', 'err desc')]}})
exit_status = validate_docstrings.main(func_name=None,
prefix=None,
errors=[],
output_format='json')
assert exit_status == 0

def test_errors_param_filters_errors(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings, 'validate_all',
lambda: {'Series.foo': {'errors': [('ER01', 'err desc'),
('ER02', 'err desc'),
('ER03', 'err desc')]},
'DataFrame.bar': {'errors': [('ER01', 'err desc'),
('ER02', 'err desc')]},
'Series.foobar': {'errors': [('ER01', 'err desc')]}})
num_errors = validate_docstrings.main(func_name=None,
prefix=None,
errors=['E01'],
output_format='default')
assert num_errors == 3

num_errors = validate_docstrings.main(func_name=None,
prefix=None,
errors=['E03'],
output_format='default')
assert num_errors == 1
validate_docstrings, 'validate_all', lambda prefix: {
'Series.foo': {'errors': [('ER01', 'err desc'),
('ER02', 'err desc'),
('ER03', 'err desc')],
'file': 'series.py',
'file_line': 142},
'DataFrame.bar': {'errors': [('ER01', 'err desc'),
('ER02', 'err desc')],
'file': 'frame.py',
'file_line': 598},
'Series.foobar': {'errors': [('ER01', 'err desc')],
'file': 'series.py',
'file_line': 279}})
exit_status = validate_docstrings.main(func_name=None,
prefix=None,
errors=['ER01'],
output_format='default')
assert exit_status == 3

exit_status = validate_docstrings.main(func_name=None,
prefix=None,
errors=['ER03'],
output_format='default')
assert exit_status == 1
23 changes: 17 additions & 6 deletions scripts/validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@
from io import StringIO
except ImportError:
from cStringIO import StringIO

# Template backend makes matplotlib to not plot anything. This is useful
# to avoid that plot windows are open from the doctests while running the
# script. Setting here before matplotlib is loaded.
# We don't warn for the number of open plots, as none is actually being opened
os.environ['MPLBACKEND'] = 'Template'
import matplotlib
matplotlib.rc('figure', max_open_warning=10000)

import numpy

BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
Expand Down Expand Up @@ -505,6 +514,9 @@ def validate_pep8(self):
file.flush()
application.run_checks([file.name])

# We need this to avoid flake8 printing the names of the files to
# the standard output
application.formatter.write = lambda line, source: None
application.report()

yield from application.guide.stats.statistics_for('')
Expand Down Expand Up @@ -733,6 +745,7 @@ def header(title, width=80, char='#'):
return '\n{full_line}\n{title_line}\n{full_line}\n\n'.format(
full_line=full_line, title_line=title_line)

exit_status = 0
if func_name is None:
result = validate_all(prefix)

Expand All @@ -751,28 +764,26 @@ def header(title, width=80, char='#'):
raise ValueError('Unknown output_format "{}"'.format(
output_format))

num_errors, output = 0, ''
output = ''
for name, res in result.items():
for err_code, err_desc in res['errors']:
# The script would be faster if instead of filtering the
# errors after validating them, it didn't validate them
# initially. But that would complicate the code too much
if errors and err_code not in errors:
continue
num_errors += 1
exit_status += 1
output += output_format.format(
name=name,
path=res['file'],
row=res['file_line'],
code=err_code,
text='{}: {}'.format(name, err_desc))

sys.stderr.write(output)
sys.stdout.write(output)

else:
result = validate_one(func_name)
num_errors = len(result['errors'])

sys.stderr.write(header('Docstring ({})'.format(func_name)))
sys.stderr.write('{}\n'.format(result['docstring']))
sys.stderr.write(header('Validation'))
Expand All @@ -799,7 +810,7 @@ def header(title, width=80, char='#'):
sys.stderr.write(header('Doctests'))
sys.stderr.write(result['examples_errors'])

return num_errors
return exit_status


if __name__ == '__main__':
Expand Down

0 comments on commit 3e6f961

Please sign in to comment.