Skip to content

Commit

Permalink
REF: de-duplicate check_reduce_frame (pandas-dev#54393)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Aug 4, 2023
1 parent 6ffa4b7 commit 92d1d6a
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 78 deletions.
46 changes: 45 additions & 1 deletion pandas/tests/extension/base/reduce.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import final
import warnings

import pytest
Expand All @@ -15,6 +16,9 @@ class BaseReduceTests(BaseExtensionTests):
"""

def check_reduce(self, s, op_name, skipna):
# We perform the same operation on the np.float64 data and check
# that the results match. Override if you need to cast to something
# other than float64.
res_op = getattr(s, op_name)
exp_op = getattr(s.astype("float64"), op_name)
if op_name == "count":
Expand All @@ -25,6 +29,43 @@ def check_reduce(self, s, op_name, skipna):
expected = exp_op(skipna=skipna)
tm.assert_almost_equal(result, expected)

def _get_expected_reduction_dtype(self, arr, op_name: str):
# Find the expected dtype when the given reduction is done on a DataFrame
# column with this array. The default assumes float64-like behavior,
# i.e. retains the dtype.
return arr.dtype

# We anticipate that authors should not need to override check_reduce_frame,
# but should be able to do any necessary overriding in
# _get_expected_reduction_dtype. If you have a use case where this
# does not hold, please let us know at github.com/pandas-dev/pandas/issues.
@final
def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
# Check that the 2D reduction done in a DataFrame reduction "looks like"
# a wrapped version of the 1D reduction done by Series.
arr = ser.array
df = pd.DataFrame({"a": arr})

kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}

cmp_dtype = self._get_expected_reduction_dtype(arr, op_name)

# The DataFrame method just calls arr._reduce with keepdims=True,
# so this first check is perfunctory.
result1 = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs)
result2 = getattr(df, op_name)(skipna=skipna, **kwargs).array
tm.assert_extension_array_equal(result1, result2)

# Check that the 2D reduction looks like a wrapped version of the
# 1D reduction
if not skipna and ser.isna().any():
expected = pd.array([pd.NA], dtype=cmp_dtype)
else:
exp_value = getattr(ser.dropna(), op_name)()
expected = pd.array([exp_value], dtype=cmp_dtype)

tm.assert_extension_array_equal(result1, expected)


class BaseNoReduceTests(BaseReduceTests):
"""we don't define any reductions"""
Expand Down Expand Up @@ -71,9 +112,12 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna):
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
s = pd.Series(data)
if not is_numeric_dtype(s):
if not is_numeric_dtype(s.dtype):
pytest.skip("not numeric dtype")

if op_name in ["count", "kurt", "sem"]:
pytest.skip(f"{op_name} not an array method")

self.check_reduce_frame(s, op_name, skipna)


Expand Down
30 changes: 8 additions & 22 deletions pandas/tests/extension/decimal/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,27 +160,6 @@ def check_reduce(self, s, op_name, skipna):
expected = getattr(np.asarray(s), op_name)()
tm.assert_almost_equal(result, expected)

def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
arr = ser.array
df = pd.DataFrame({"a": arr})

if op_name in ["count", "kurt", "sem", "skew", "median"]:
assert not hasattr(arr, op_name)
pytest.skip(f"{op_name} not an array method")

result1 = arr._reduce(op_name, skipna=skipna, keepdims=True)
result2 = getattr(df, op_name)(skipna=skipna).array

tm.assert_extension_array_equal(result1, result2)

if not skipna and ser.isna().any():
expected = DecimalArray([pd.NA])
else:
exp_value = getattr(ser.dropna(), op_name)()
expected = DecimalArray([exp_value])

tm.assert_extension_array_equal(result1, expected)

def test_reduction_without_keepdims(self):
# GH52788
# test _reduce without keepdims
Expand All @@ -205,7 +184,14 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):


class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
pass
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
if op_name in ["skew", "median"]:
assert not hasattr(data, op_name)
pytest.skip(f"{op_name} not an array method")

return super().test_reduce_frame(data, all_numeric_reductions, skipna)


class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
Expand Down
26 changes: 9 additions & 17 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,15 +499,7 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
request.node.add_marker(xfail_mark)
super().test_reduce_series(data, all_numeric_reductions, skipna)

def check_reduce_frame(self, ser, op_name, skipna):
arr = ser.array

if op_name in ["count", "kurt", "sem", "skew"]:
assert not hasattr(arr, op_name)
return

kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}

def _get_expected_reduction_dtype(self, arr, op_name: str):
if op_name in ["max", "min"]:
cmp_dtype = arr.dtype
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
Expand All @@ -523,15 +515,15 @@ def check_reduce_frame(self, ser, op_name, skipna):
"u": "uint64[pyarrow]",
"f": "float64[pyarrow]",
}[arr.dtype.kind]
result = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs)
return cmp_dtype

if not skipna and ser.isna().any():
expected = pd.array([pd.NA], dtype=cmp_dtype)
else:
exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)(**kwargs)
expected = pd.array([exp_value], dtype=cmp_dtype)

tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
if op_name == "skew":
assert not hasattr(data, op_name)
return
return super().test_reduce_frame(data, all_numeric_reductions, skipna)

@pytest.mark.parametrize("typ", ["int64", "uint64", "float64"])
def test_median_not_approximate(self, typ):
Expand Down
17 changes: 2 additions & 15 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,7 @@ def check_reduce(self, s, op_name, skipna):
expected = bool(expected)
tm.assert_almost_equal(result, expected)

def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
arr = ser.array

if op_name in ["count", "kurt", "sem"]:
assert not hasattr(arr, op_name)
pytest.skip(f"{op_name} not an array method")

def _get_expected_reduction_dtype(self, arr, op_name: str):
if op_name in ["mean", "median", "var", "std", "skew"]:
cmp_dtype = "Float64"
elif op_name in ["min", "max"]:
Expand All @@ -251,14 +245,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
else:
raise TypeError("not supposed to reach this")

result = arr._reduce(op_name, skipna=skipna, keepdims=True)
if not skipna and ser.isna().any():
expected = pd.array([pd.NA], dtype=cmp_dtype)
else:
exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)()
expected = pd.array([exp_value], dtype=cmp_dtype)
tm.assert_extension_array_equal(result, expected)
return cmp_dtype


class TestBooleanReduce(base.BaseBooleanReduceTests):
Expand Down
26 changes: 4 additions & 22 deletions pandas/tests/extension/test_masked_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
)
from pandas.tests.extension import base

is_windows_or_32bit = is_platform_windows() or not IS64

pytestmark = [
pytest.mark.filterwarnings(
"ignore:invalid value encountered in divide:RuntimeWarning"
Expand Down Expand Up @@ -246,16 +248,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
expected = pd.NA
tm.assert_almost_equal(result, expected)

def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
if op_name in ["count", "kurt", "sem"]:
assert not hasattr(ser.array, op_name)
pytest.skip(f"{op_name} not an array method")

arr = ser.array
df = pd.DataFrame({"a": arr})

is_windows_or_32bit = is_platform_windows() or not IS64

def _get_expected_reduction_dtype(self, arr, op_name: str):
if tm.is_float_dtype(arr.dtype):
cmp_dtype = arr.dtype.name
elif op_name in ["mean", "median", "var", "std", "skew"]:
Expand All @@ -270,18 +263,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64"
else:
raise TypeError("not supposed to reach this")

if not skipna and ser.isna().any():
expected = pd.array([pd.NA], dtype=cmp_dtype)
else:
exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)()
expected = pd.array([exp_value], dtype=cmp_dtype)

result1 = arr._reduce(op_name, skipna=skipna, keepdims=True)
result2 = getattr(df, op_name)(skipna=skipna).array

tm.assert_extension_array_equal(result1, result2)
tm.assert_extension_array_equal(result2, expected)
return cmp_dtype


@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py")
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,8 @@ def check_reduce(self, s, op_name, skipna):
tm.assert_almost_equal(result, expected)

@pytest.mark.skip("tests not written yet")
def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
pass

@pytest.mark.parametrize("skipna", [True, False])
Expand Down

0 comments on commit 92d1d6a

Please sign in to comment.