REF: de-duplicate check_reduce_frame (pandas-dev#54393)

MichaelTiemannOSC · Aug 4, 2023 · 92d1d6a · 92d1d6a
1 parent 6ffa4b7
commit 92d1d6a
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 78 deletions.
diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py
@@ -1,3 +1,4 @@
+from typing import final
 import warnings
 
 import pytest
@@ -15,6 +16,9 @@ class BaseReduceTests(BaseExtensionTests):
     """
 
     def check_reduce(self, s, op_name, skipna):
+        # We perform the same operation on the np.float64 data and check
+        #  that the results match. Override if you need to cast to something
+        #  other than float64.
         res_op = getattr(s, op_name)
         exp_op = getattr(s.astype("float64"), op_name)
         if op_name == "count":
@@ -25,6 +29,43 @@ def check_reduce(self, s, op_name, skipna):
             expected = exp_op(skipna=skipna)
         tm.assert_almost_equal(result, expected)
 
+    def _get_expected_reduction_dtype(self, arr, op_name: str):
+        # Find the expected dtype when the given reduction is done on a DataFrame
+        # column with this array.  The default assumes float64-like behavior,
+        # i.e. retains the dtype.
+        return arr.dtype
+
+    # We anticipate that authors should not need to override check_reduce_frame,
+    #  but should be able to do any necessary overriding in
+    #  _get_expected_reduction_dtype. If you have a use case where this
+    #  does not hold, please let us know at github.com/pandas-dev/pandas/issues.
+    @final
+    def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
+        # Check that the 2D reduction done in a DataFrame reduction "looks like"
+        # a wrapped version of the 1D reduction done by Series.
+        arr = ser.array
+        df = pd.DataFrame({"a": arr})
+
+        kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}
+
+        cmp_dtype = self._get_expected_reduction_dtype(arr, op_name)
+
+        # The DataFrame method just calls arr._reduce with keepdims=True,
+        #  so this first check is perfunctory.
+        result1 = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs)
+        result2 = getattr(df, op_name)(skipna=skipna, **kwargs).array
+        tm.assert_extension_array_equal(result1, result2)
+
+        # Check that the 2D reduction looks like a wrapped version of the
+        #  1D reduction
+        if not skipna and ser.isna().any():
+            expected = pd.array([pd.NA], dtype=cmp_dtype)
+        else:
+            exp_value = getattr(ser.dropna(), op_name)()
+            expected = pd.array([exp_value], dtype=cmp_dtype)
+
+        tm.assert_extension_array_equal(result1, expected)
+
 
 class BaseNoReduceTests(BaseReduceTests):
     """we don't define any reductions"""
@@ -71,9 +112,12 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna):
     def test_reduce_frame(self, data, all_numeric_reductions, skipna):
         op_name = all_numeric_reductions
         s = pd.Series(data)
-        if not is_numeric_dtype(s):
+        if not is_numeric_dtype(s.dtype):
             pytest.skip("not numeric dtype")
 
+        if op_name in ["count", "kurt", "sem"]:
+            pytest.skip(f"{op_name} not an array method")
+
         self.check_reduce_frame(s, op_name, skipna)
 
 

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -160,27 +160,6 @@ def check_reduce(self, s, op_name, skipna):
             expected = getattr(np.asarray(s), op_name)()
             tm.assert_almost_equal(result, expected)
 
-    def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
-        arr = ser.array
-        df = pd.DataFrame({"a": arr})
-
-        if op_name in ["count", "kurt", "sem", "skew", "median"]:
-            assert not hasattr(arr, op_name)
-            pytest.skip(f"{op_name} not an array method")
-
-        result1 = arr._reduce(op_name, skipna=skipna, keepdims=True)
-        result2 = getattr(df, op_name)(skipna=skipna).array
-
-        tm.assert_extension_array_equal(result1, result2)
-
-        if not skipna and ser.isna().any():
-            expected = DecimalArray([pd.NA])
-        else:
-            exp_value = getattr(ser.dropna(), op_name)()
-            expected = DecimalArray([exp_value])
-
-        tm.assert_extension_array_equal(result1, expected)
-
     def test_reduction_without_keepdims(self):
         # GH52788
         # test _reduce without keepdims
@@ -205,7 +184,14 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
 
 
 class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
-    pass
+    @pytest.mark.parametrize("skipna", [True, False])
+    def test_reduce_frame(self, data, all_numeric_reductions, skipna):
+        op_name = all_numeric_reductions
+        if op_name in ["skew", "median"]:
+            assert not hasattr(data, op_name)
+            pytest.skip(f"{op_name} not an array method")
+
+        return super().test_reduce_frame(data, all_numeric_reductions, skipna)
 
 
 class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -499,15 +499,7 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
             request.node.add_marker(xfail_mark)
         super().test_reduce_series(data, all_numeric_reductions, skipna)
 
-    def check_reduce_frame(self, ser, op_name, skipna):
-        arr = ser.array
-
-        if op_name in ["count", "kurt", "sem", "skew"]:
-            assert not hasattr(arr, op_name)
-            return
-
-        kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}
-
+    def _get_expected_reduction_dtype(self, arr, op_name: str):
         if op_name in ["max", "min"]:
             cmp_dtype = arr.dtype
         elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
@@ -523,15 +515,15 @@ def check_reduce_frame(self, ser, op_name, skipna):
                 "u": "uint64[pyarrow]",
                 "f": "float64[pyarrow]",
             }[arr.dtype.kind]
-        result = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs)
+        return cmp_dtype
 
-        if not skipna and ser.isna().any():
-            expected = pd.array([pd.NA], dtype=cmp_dtype)
-        else:
-            exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)(**kwargs)
-            expected = pd.array([exp_value], dtype=cmp_dtype)
-
-        tm.assert_extension_array_equal(result, expected)
+    @pytest.mark.parametrize("skipna", [True, False])
+    def test_reduce_frame(self, data, all_numeric_reductions, skipna):
+        op_name = all_numeric_reductions
+        if op_name == "skew":
+            assert not hasattr(data, op_name)
+            return
+        return super().test_reduce_frame(data, all_numeric_reductions, skipna)
 
     @pytest.mark.parametrize("typ", ["int64", "uint64", "float64"])
     def test_median_not_approximate(self, typ):

diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
@@ -235,13 +235,7 @@ def check_reduce(self, s, op_name, skipna):
             expected = bool(expected)
         tm.assert_almost_equal(result, expected)
 
-    def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
-        arr = ser.array
-
-        if op_name in ["count", "kurt", "sem"]:
-            assert not hasattr(arr, op_name)
-            pytest.skip(f"{op_name} not an array method")
-
+    def _get_expected_reduction_dtype(self, arr, op_name: str):
         if op_name in ["mean", "median", "var", "std", "skew"]:
             cmp_dtype = "Float64"
         elif op_name in ["min", "max"]:
@@ -251,14 +245,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
             cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
         else:
             raise TypeError("not supposed to reach this")
-
-        result = arr._reduce(op_name, skipna=skipna, keepdims=True)
-        if not skipna and ser.isna().any():
-            expected = pd.array([pd.NA], dtype=cmp_dtype)
-        else:
-            exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)()
-            expected = pd.array([exp_value], dtype=cmp_dtype)
-        tm.assert_extension_array_equal(result, expected)
+        return cmp_dtype
 
 
 class TestBooleanReduce(base.BaseBooleanReduceTests):

diff --git a/pandas/tests/extension/test_masked_numeric.py b/pandas/tests/extension/test_masked_numeric.py
@@ -39,6 +39,8 @@
 )
 from pandas.tests.extension import base
 
+is_windows_or_32bit = is_platform_windows() or not IS64
+
 pytestmark = [
     pytest.mark.filterwarnings(
         "ignore:invalid value encountered in divide:RuntimeWarning"
@@ -246,16 +248,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
                 expected = pd.NA
         tm.assert_almost_equal(result, expected)
 
-    def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
-        if op_name in ["count", "kurt", "sem"]:
-            assert not hasattr(ser.array, op_name)
-            pytest.skip(f"{op_name} not an array method")
-
-        arr = ser.array
-        df = pd.DataFrame({"a": arr})
-
-        is_windows_or_32bit = is_platform_windows() or not IS64
-
+    def _get_expected_reduction_dtype(self, arr, op_name: str):
         if tm.is_float_dtype(arr.dtype):
             cmp_dtype = arr.dtype.name
         elif op_name in ["mean", "median", "var", "std", "skew"]:
@@ -270,18 +263,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
             cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64"
         else:
             raise TypeError("not supposed to reach this")
-
-        if not skipna and ser.isna().any():
-            expected = pd.array([pd.NA], dtype=cmp_dtype)
-        else:
-            exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)()
-            expected = pd.array([exp_value], dtype=cmp_dtype)
-
-        result1 = arr._reduce(op_name, skipna=skipna, keepdims=True)
-        result2 = getattr(df, op_name)(skipna=skipna).array
-
-        tm.assert_extension_array_equal(result1, result2)
-        tm.assert_extension_array_equal(result2, expected)
+        return cmp_dtype
 
 
 @pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py")

diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
@@ -311,7 +311,8 @@ def check_reduce(self, s, op_name, skipna):
         tm.assert_almost_equal(result, expected)
 
     @pytest.mark.skip("tests not written yet")
-    def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
+    @pytest.mark.parametrize("skipna", [True, False])
+    def test_reduce_frame(self, data, all_numeric_reductions, skipna):
         pass
 
     @pytest.mark.parametrize("skipna", [True, False])