Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into ea-take
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Apr 27, 2018
2 parents 9a6c7d4 + 6cacdde commit 7c4f625
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 32 deletions.
26 changes: 11 additions & 15 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,9 @@ class ExtensionArray(object):
* copy
* _concat_same_type
Some additional methods are available to satisfy pandas' internal, private
block API:
An additional method is available to satisfy pandas' internal,
private block API.
* _can_hold_na
* _formatting_values
Some methods require casting the ExtensionArray to an ndarray of Python
Expand Down Expand Up @@ -399,7 +398,8 @@ def _values_for_factorize(self):
Returns
-------
values : ndarray
An array suitable for factoraization. This should maintain order
An array suitable for factorization. This should maintain order
and be a supported dtype (Float64, Int64, UInt64, String, Object).
By default, the extension array is cast to object dtype.
na_value : object
Expand All @@ -422,7 +422,7 @@ def factorize(self, na_sentinel=-1):
Returns
-------
labels : ndarray
An interger NumPy array that's an indexer into the original
An integer NumPy array that's an indexer into the original
ExtensionArray.
uniques : ExtensionArray
An ExtensionArray containing the unique values of `self`.
Expand Down Expand Up @@ -588,16 +588,12 @@ def _concat_same_type(cls, to_concat):
"""
raise AbstractMethodError(cls)

@property
def _can_hold_na(self):
# type: () -> bool
"""Whether your array can hold missing values. True by default.
Notes
-----
Setting this to false will optimize some operations like fillna.
"""
return True
# The _can_hold_na attribute is set to True so that pandas internals
# will use the ExtensionDtype.na_value as the NA value in operations
# such as take(), reindex(), shift(), etc. In addition, those results
# will then be of the ExtensionArray subclass rather than an array
# of objects
_can_hold_na = True

@property
def _ndarray_values(self):
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/extension/base/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def test_ndim(self, data):
assert data.ndim == 1

def test_can_hold_na_valid(self, data):
assert data._can_hold_na in {True, False}
# GH-20761
assert data._can_hold_na is True

def test_memory_usage(self, data):
s = pd.Series(data)
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/extension/base/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@

class BaseMissingTests(BaseExtensionTests):
def test_isna(self, data_missing):
if data_missing._can_hold_na:
expected = np.array([True, False])
else:
expected = np.array([False, False])
expected = np.array([True, False])

result = pd.isna(data_missing)
tm.assert_numpy_array_equal(result, expected)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def na_cmp():
Should return a function of two arguments that returns
True if both arguments are (scalar) NA for your type.
By default, uses ``operator.or``
By default, uses ``operator.is_``
"""
return operator.is_

Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ def take(self, indexer, allow_fill=False, fill_value=None):
def copy(self, deep=False):
return type(self)(self.data[:])

def astype(self, dtype, copy=True):
# NumPy has issues when all the dicts are the same length.
# np.array([UserDict(...), UserDict(...)]) fails,
# but np.array([{...}, {...}]) works, so cast.
return np.array([dict(x) for x in self], dtype=dtype, copy=copy)

def unique(self):
# Parent method doesn't work since np.array will try to infer
# a 2-dim object.
Expand Down
73 changes: 62 additions & 11 deletions pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import operator
import collections

import pytest


import pandas as pd
import pandas.util.testing as tm
from pandas.compat import PY2, PY36
from pandas.tests.extension import base

Expand Down Expand Up @@ -59,27 +61,76 @@ def data_for_grouping():
])


class TestDtype(base.BaseDtypeTests):
class BaseJSON(object):
# NumPy doesn't handle an array of equal-length UserDicts.
# The default assert_series_equal eventually does a
# Series.values, which raises. We work around it by
# converting the UserDicts to dicts.
def assert_series_equal(self, left, right, **kwargs):
if left.dtype.name == 'json':
assert left.dtype == right.dtype
left = pd.Series(JSONArray(left.values.astype(object)),
index=left.index, name=left.name)
right = pd.Series(JSONArray(right.values.astype(object)),
index=right.index, name=right.name)
tm.assert_series_equal(left, right, **kwargs)

def assert_frame_equal(self, left, right, *args, **kwargs):
tm.assert_index_equal(
left.columns, right.columns,
exact=kwargs.get('check_column_type', 'equiv'),
check_names=kwargs.get('check_names', True),
check_exact=kwargs.get('check_exact', False),
check_categorical=kwargs.get('check_categorical', True),
obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))

jsons = (left.dtypes == 'json').index

for col in jsons:
self.assert_series_equal(left[col], right[col],
*args, **kwargs)

left = left.drop(columns=jsons)
right = right.drop(columns=jsons)
tm.assert_frame_equal(left, right, *args, **kwargs)


class TestDtype(BaseJSON, base.BaseDtypeTests):
pass


class TestInterface(base.BaseInterfaceTests):
pass
class TestInterface(BaseJSON, base.BaseInterfaceTests):
def test_custom_asserts(self):
# This would always trigger the KeyError from trying to put
# an array of equal-length UserDicts inside an ndarray.
data = JSONArray([collections.UserDict({'a': 1}),
collections.UserDict({'b': 2}),
collections.UserDict({'c': 3})])
a = pd.Series(data)
self.assert_series_equal(a, a)
self.assert_frame_equal(a.to_frame(), a.to_frame())

b = pd.Series(data.take([0, 0, 1]))
with pytest.raises(AssertionError):
self.assert_series_equal(a, b)

with pytest.raises(AssertionError):
self.assert_frame_equal(a.to_frame(), b.to_frame())


class TestConstructors(base.BaseConstructorsTests):
class TestConstructors(BaseJSON, base.BaseConstructorsTests):
pass


class TestReshaping(base.BaseReshapingTests):
class TestReshaping(BaseJSON, base.BaseReshapingTests):
pass


class TestGetitem(base.BaseGetitemTests):
class TestGetitem(BaseJSON, base.BaseGetitemTests):
pass


class TestMissing(base.BaseMissingTests):
class TestMissing(BaseJSON, base.BaseMissingTests):
@pytest.mark.xfail(reason="Setting a dict as a scalar")
def test_fillna_series(self):
"""We treat dictionaries as a mapping in fillna, not a scalar."""
Expand All @@ -94,7 +145,7 @@ def test_fillna_frame(self):
reason="Dictionary order unstable")


class TestMethods(base.BaseMethodsTests):
class TestMethods(BaseJSON, base.BaseMethodsTests):
@unhashable
def test_value_counts(self, all_data, dropna):
pass
Expand Down Expand Up @@ -126,7 +177,7 @@ def test_sort_values_missing(self, data_missing_for_sorting, ascending):
data_missing_for_sorting, ascending)


class TestCasting(base.BaseCastingTests):
class TestCasting(BaseJSON, base.BaseCastingTests):
@pytest.mark.xfail
def test_astype_str(self):
"""This currently fails in NumPy on np.array(self, dtype=str) with
Expand All @@ -139,7 +190,7 @@ def test_astype_str(self):
# internals has trouble setting sequences of values into scalar positions.


class TestGroupby(base.BaseGroupbyTests):
class TestGroupby(BaseJSON, base.BaseGroupbyTests):

@unhashable
def test_groupby_extension_transform(self):
Expand Down

0 comments on commit 7c4f625

Please sign in to comment.