smoothing: smoothed values for scale means

Crunch-io · Sep 23, 2020 · 4a41387 · 4a41387
1 parent b3fbd03
commit 4a41387
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 7 deletions.
diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py
@@ -516,12 +516,19 @@ def scale_mean_pairwise_indices_alt(self):
 
     @lazyproperty
     def scale_means_column(self):
+        """1D float64 ndarray of column scale means
+
+        The calculation is based on multiply of the numeric values by the
+        row_proportions and divide by the rows_margin.
+        """
         if np.all(np.isnan(self._columns_dimension_numeric_values)):
             return None
 
-        inner = np.nansum(self._columns_dimension_numeric_values * self.counts, axis=1)
+        inner = np.nansum(
+            self._columns_dimension_numeric_values * self.row_proportions, axis=1
+        )
         not_a_nan_index = ~np.isnan(self._columns_dimension_numeric_values)
-        denominator = np.sum(self.counts[:, not_a_nan_index], axis=1)
+        denominator = np.sum(self.row_proportions[:, not_a_nan_index], axis=1)
         return inner / denominator
 
     @lazyproperty
@@ -542,13 +549,19 @@ def scale_means_columns_margin(self):
 
     @lazyproperty
     def scale_means_row(self):
+        """1D float64 ndarray of row scale means
+
+        The calculation is based on multiply of the numeric values by the
+        column_proportions and divide by the columns_margin.
+        """
         if np.all(np.isnan(self._rows_dimension_numeric_values)):
             return None
         inner = np.nansum(
-            self._rows_dimension_numeric_values[:, None] * self.counts, axis=0
+            self._rows_dimension_numeric_values[:, None] * self.column_proportions,
+            axis=0,
         )
         not_a_nan_index = ~np.isnan(self._rows_dimension_numeric_values)
-        denominator = np.sum(self.counts[not_a_nan_index, :], axis=0)
+        denominator = np.sum(self.column_proportions[not_a_nan_index, :], axis=0)
         return inner / denominator
 
     @lazyproperty

diff --git a/tests/expectations/ca-subvar-ca-cat-x-cat-date-scale-means-w3.py b/tests/expectations/ca-subvar-ca-cat-x-cat-date-scale-means-w3.py
@@ -0,0 +1 @@
+[float("NaN"), float("NaN"), 2.71218211, 2.7578529]
diff --git a/tests/expectations/cat-x-cat-date-smoothed-scale-means-w3.py b/tests/expectations/cat-x-cat-date-smoothed-scale-means-w3.py
@@ -0,0 +1 @@
+[float("NaN"), float("NaN"), 2.173158, 1.967011]
diff --git a/tests/integration/test_scale_means.py b/tests/integration/test_scale_means.py
@@ -443,14 +443,14 @@ def test_var_scale_means_for_ca_itmes_x_cat():
 
     # Testing that the scale means (row and col) are equal on the 2 diverse
     # datasets
-    np.testing.assert_array_equal(
+    np.testing.assert_array_almost_equal(
         slice_.var_scale_means_column, slice2_.var_scale_means_row
     )
 
-    np.testing.assert_almost_equal(
+    np.testing.assert_array_almost_equal(
         slice2_.var_scale_means_row, [2.56410909, 5.17893869, 4.75445248, 4.81611278]
     )
-    np.testing.assert_almost_equal(
+    np.testing.assert_array_almost_equal(
         slice_.var_scale_means_column, [2.56410909, 5.17893869, 4.75445248, 4.81611278]
     )
 

diff --git a/tests/integration/test_smoothing.py b/tests/integration/test_smoothing.py
@@ -49,6 +49,33 @@ def it_provides_smoothed_col_pct_for_compatible_cubes(
             slice_.column_percentages, load_python_expression(expectation)
         )
 
+    @pytest.mark.parametrize(
+        "fixture, window, expectation",
+        (
+            (CR.CAT_X_CAT_DATE, 3, "cat-x-cat-date-smoothed-scale-means-w3"),
+            (CR.CAT_X_CAT_DATE_WGTD, 3, "cat-x-cat-date-smoothed-scale-means-w3"),
+            (
+                CR.CA_SUBVAR_X_CA_CAT_X_CAT_DATE,
+                3,
+                "ca-subvar-ca-cat-x-cat-date-scale-means-w3",
+            ),
+        ),
+    )
+    def it_provides_smoothed_scale_means_for_compatible_cubes(
+        self, fixture, window, expectation
+    ):
+        transforms = {
+            "smoothing": {
+                "method": "one_side_moving_avg",
+                "window": window,
+                "show": True,
+            }
+        }
+        slice_ = Cube(fixture, transforms=transforms).partitions[0]
+        np.testing.assert_array_almost_equal(
+            slice_.scale_means_row, load_python_expression(expectation)
+        )
+
     @pytest.mark.parametrize(
         "fixture, expectation",
         (