Skip to content

Commit

Permalink
TST: move .str-test to strings.py & parametrize it; precursor to pand…
Browse files Browse the repository at this point in the history
  • Loading branch information
h-vetinari authored and Pingviinituutti committed Feb 28, 2019
1 parent d93f6ed commit a884734
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 76 deletions.
76 changes: 0 additions & 76 deletions pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,82 +602,6 @@ def f():
ordered=True))
tm.assert_series_equal(result, expected)

def test_str_accessor_api_for_categorical(self):
# https://github.com/pandas-dev/pandas/issues/10661
from pandas.core.strings import StringMethods
s = Series(list('aabb'))
s = s + " " + s
c = s.astype('category')
assert isinstance(c.str, StringMethods)

# str functions, which need special arguments
special_func_defs = [
('cat', (list("zyxw"),), {"sep": ","}),
('center', (10,), {}),
('contains', ("a",), {}),
('count', ("a",), {}),
('decode', ("UTF-8",), {}),
('encode', ("UTF-8",), {}),
('endswith', ("a",), {}),
('extract', ("([a-z]*) ",), {"expand": False}),
('extract', ("([a-z]*) ",), {"expand": True}),
('extractall', ("([a-z]*) ",), {}),
('find', ("a",), {}),
('findall', ("a",), {}),
('index', (" ",), {}),
('ljust', (10,), {}),
('match', ("a"), {}), # deprecated...
('normalize', ("NFC",), {}),
('pad', (10,), {}),
('partition', (" ",), {"expand": False}), # not default
('partition', (" ",), {"expand": True}), # default
('repeat', (3,), {}),
('replace', ("a", "z"), {}),
('rfind', ("a",), {}),
('rindex', (" ",), {}),
('rjust', (10,), {}),
('rpartition', (" ",), {"expand": False}), # not default
('rpartition', (" ",), {"expand": True}), # default
('slice', (0, 1), {}),
('slice_replace', (0, 1, "z"), {}),
('split', (" ",), {"expand": False}), # default
('split', (" ",), {"expand": True}), # not default
('startswith', ("a",), {}),
('wrap', (2,), {}),
('zfill', (10,), {})
]
_special_func_names = [f[0] for f in special_func_defs]

# * get, join: they need a individual elements of type lists, but
# we can't make a categorical with lists as individual categories.
# -> `s.str.split(" ").astype("category")` will error!
# * `translate` has different interfaces for py2 vs. py3
_ignore_names = ["get", "join", "translate"]

str_func_names = [f for f in dir(s.str) if not (
f.startswith("_") or
f in _special_func_names or
f in _ignore_names)]

func_defs = [(f, (), {}) for f in str_func_names]
func_defs.extend(special_func_defs)

for func, args, kwargs in func_defs:
res = getattr(c.str, func)(*args, **kwargs)
exp = getattr(s.str, func)(*args, **kwargs)

if isinstance(res, DataFrame):
tm.assert_frame_equal(res, exp)
else:
tm.assert_series_equal(res, exp)

invalid = Series([1, 2, 3]).astype('category')
msg = "Can only use .str accessor with string"

with pytest.raises(AttributeError, match=msg):
invalid.str
assert not hasattr(invalid, 'str')

def test_dt_accessor_api_for_categorical(self):
# https://github.com/pandas-dev/pandas/issues/10661
from pandas.core.indexes.accessors import Properties
Expand Down
112 changes: 112 additions & 0 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,98 @@ def assert_series_or_index_equal(left, right):
assert_index_equal(left, right)


_any_string_method = [
('cat', (), {'sep': ','}), # noqa: E241
('cat', (Series(list('zyx')),), {'sep': ',', # noqa: E241
'join': 'left'}),
('center', (10,), {}), # noqa: E241
('contains', ('a',), {}), # noqa: E241
('count', ('a',), {}), # noqa: E241
('decode', ('UTF-8',), {}), # noqa: E241
('encode', ('UTF-8',), {}), # noqa: E241
('endswith', ('a',), {}), # noqa: E241
('extract', ('([a-z]*)',), {'expand': False}), # noqa: E241
('extract', ('([a-z]*)',), {'expand': True}), # noqa: E241
('extractall', ('([a-z]*)',), {}), # noqa: E241
('find', ('a',), {}), # noqa: E241
('findall', ('a',), {}), # noqa: E241
('get', (0,), {}), # noqa: E241
# because "index" (and "rindex") fail intentionally
# if the string is not found, search only for empty string
('index', ('',), {}), # noqa: E241
('join', (',',), {}), # noqa: E241
('ljust', (10,), {}), # noqa: E241
('match', ('a',), {}), # noqa: E241
('normalize', ('NFC',), {}), # noqa: E241
('pad', (10,), {}), # noqa: E241
('partition', (' ',), {'expand': False}), # noqa: E241
('partition', (' ',), {'expand': True}), # noqa: E241
('repeat', (3,), {}), # noqa: E241
('replace', ('a', 'z',), {}), # noqa: E241
('rfind', ('a',), {}), # noqa: E241
('rindex', ('',), {}), # noqa: E241
('rjust', (10,), {}), # noqa: E241
('rpartition', (' ',), {'expand': False}), # noqa: E241
('rpartition', (' ',), {'expand': True}), # noqa: E241
('slice', (0, 1,), {}), # noqa: E241
('slice_replace', (0, 1, 'z',), {}), # noqa: E241
('split', (' ',), {'expand': False}), # noqa: E241
('split', (' ',), {'expand': True}), # noqa: E241
('startswith', ('a',), {}), # noqa: E241
# translating unicode points of "a" to "d"
('translate', ({97: 100},), {}), # noqa: E241
('wrap', (2,), {}), # noqa: E241
('zfill', (10,), {}) # noqa: E241
] + list(zip([
# methods without positional arguments: zip with empty tuple and empty dict
'capitalize', 'cat', 'get_dummies',
'isalnum', 'isalpha', 'isdecimal',
'isdigit', 'islower', 'isnumeric',
'isspace', 'istitle', 'isupper',
'len', 'lower', 'lstrip', 'partition',
'rpartition', 'rsplit', 'rstrip',
'slice', 'slice_replace', 'split',
'strip', 'swapcase', 'title', 'upper'
], [()] * 100, [{}] * 100))
ids, _, _ = zip(*_any_string_method) # use method name as fixture-id


# test that the above list captures all methods of StringMethods
missing_methods = {f for f in dir(strings.StringMethods)
if not f.startswith('_')} - set(ids)
assert not missing_methods


@pytest.fixture(params=_any_string_method, ids=ids)
def any_string_method(request):
"""
Fixture for all public methods of `StringMethods`
This fixture returns a tuple of the method name and sample arguments
necessary to call the method.
Returns
-------
method_name : str
The name of the method in `StringMethods`
args : tuple
Sample values for the positional arguments
kwargs : dict
Sample values for the keyword arguments
Examples
--------
>>> def test_something(any_string_method):
... s = pd.Series(['a', 'b', np.nan, 'd'])
...
... method_name, args, kwargs = any_string_method
... method = getattr(s.str, method_name)
... # will not raise
... method(*args, **kwargs)
"""
return request.param


class TestStringMethods(object):

def test_api(self):
Expand All @@ -40,6 +132,26 @@ def test_api(self):
invalid.str
assert not hasattr(invalid, 'str')

def test_api_for_categorical(self, any_string_method):
# https://github.com/pandas-dev/pandas/issues/10661
s = Series(list('aabb'))
s = s + " " + s
c = s.astype('category')
assert isinstance(c.str, strings.StringMethods)

method_name, args, kwargs = any_string_method

result = getattr(c.str, method_name)(*args, **kwargs)
expected = getattr(s.str, method_name)(*args, **kwargs)

if isinstance(result, DataFrame):
tm.assert_frame_equal(result, expected)
elif isinstance(result, Series):
tm.assert_series_equal(result, expected)
else:
# str.cat(others=None) returns string, for example
assert result == expected

def test_iter(self):
# GH3638
strs = 'google', 'wikimedia', 'wikipedia', 'wikitravel'
Expand Down

0 comments on commit a884734

Please sign in to comment.