Merge pull request #173 from Crunch-io/means-with-insertions

fix: means with insertions
Crunch-io · Jul 1, 2019 · f1ad558 · f1ad558
2 parents d080f36 + a824ff6
commit f1ad558
Show file tree

Hide file tree

Showing 7 changed files with 590 additions and 503 deletions.
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,5 +1,8 @@
 # History of Changes
 
+#### 1.10.4
+- Fix means on `_Slice` having subtotals.
+
 #### 1.10.3
 - Refactor hidden and pruned slices
 

diff --git a/src/cr/cube/__init__.py b/src/cr/cube/__init__.py
@@ -2,6 +2,6 @@
 
 """Initialization module for crunch-cube package."""
 
-__version__ = "1.11.3"
+__version__ = "1.11.4"
 
 # NOTE: We'll be switching to 2.0.0 once we throw out the old cube and slice
diff --git a/src/cr/cube/matrix.py b/src/cr/cube/matrix.py
@@ -1061,6 +1061,14 @@ def column_index(self):
             + tuple([np.nan] * len(self._bottom_values))
         )
 
+    @lazyproperty
+    def means(self):
+        return np.array(
+            tuple([np.nan] * len(self._top_values))
+            + self._interleaved_means
+            + tuple([np.nan] * len(self._bottom_values))
+        )
+
     @lazyproperty
     def proportions(self):
         return self.values / self.margin
@@ -1134,6 +1142,16 @@ def _interleaved_column_index(self):
                     column_index.append(np.nan)
         return tuple(column_index)
 
+    @lazyproperty
+    def _interleaved_means(self):
+        means = []
+        for i, value in enumerate(self._base_vector.means):
+            means.append(value)
+            for inserted_vector in self._opposite_inserted_vectors:
+                if i == inserted_vector.anchor:
+                    means.append(np.nan)
+        return tuple(means)
+
     @lazyproperty
     def _interleaved_pvals(self):
         pvals = []
@@ -1184,11 +1202,12 @@ def _top_values(self):
         )
 
 
-class _BaseVectorAfterHiding(_BaseTransformationVector):
+class _VectorAfterHiding(_BaseTransformationVector):
     """Reflects a row or column with hidden elements removed."""
 
-    def __init__(self, base_vector):
+    def __init__(self, base_vector, opposite_vectors):
         self._base_vector = base_vector
+        self._opposite_vectors = opposite_vectors
 
     @lazyproperty
     def base(self):
@@ -1200,6 +1219,10 @@ def base(self):
     def base_values(self):
         return self._base_vector.base_values[self._visible_element_idxs]
 
+    @lazyproperty
+    def column_index(self):
+        return self._base_vector.column_index[self._visible_element_idxs]
+
     @lazyproperty
     def margin(self):
         if not isinstance(self._base_vector.margin, np.ndarray):
@@ -1210,26 +1233,6 @@ def margin(self):
     def means(self):
         return self._base_vector.means[self._visible_element_idxs]
 
-    @lazyproperty
-    def table_proportions(self):
-        return self._base_vector.table_proportions[self._visible_element_idxs]
-
-    @lazyproperty
-    def values(self):
-        return self._base_vector.values[self._visible_element_idxs]
-
-
-class _VectorAfterHiding(_BaseVectorAfterHiding):
-    """Reflects a row or column with hidden elements removed."""
-
-    def __init__(self, base_vector, opposite_vectors):
-        super(_VectorAfterHiding, self).__init__(base_vector)
-        self._opposite_vectors = opposite_vectors
-
-    @lazyproperty
-    def column_index(self):
-        return self._base_vector.column_index[self._visible_element_idxs]
-
     @lazyproperty
     def proportions(self):
         return self._base_vector.proportions[self._visible_element_idxs]
@@ -1238,6 +1241,14 @@ def proportions(self):
     def pvals(self):
         return self._base_vector.pvals[self._visible_element_idxs]
 
+    @lazyproperty
+    def table_proportions(self):
+        return self._base_vector.table_proportions[self._visible_element_idxs]
+
+    @lazyproperty
+    def values(self):
+        return self._base_vector.values[self._visible_element_idxs]
+
     @lazyproperty
     def zscore(self):
         return self._base_vector.zscore[self._visible_element_idxs]

diff --git a/tests/fixtures/cat-x-cat-mean-subtot.json b/tests/fixtures/cat-x-cat-mean-subtot.json
@@ -0,0 +1,120 @@
+{
+    "result": {
+        "counts": [
+            3,
+            3,
+            2,
+            2,
+            0
+        ],
+        "dimensions": [
+            {
+                "references": {"alias": "mean", "name": "mean"},
+                "type": {
+                    "categories": [{"id": 1, "missing": false, "name": "Mean"}],
+                    "class": "categorical"
+                }
+            },
+            {
+                "derived": false,
+                "references": {
+                    "alias": "Education",
+                    "description": "Education",
+                    "name": "Education",
+                    "view": {
+                        "column_width": null,
+                        "include_missing": false,
+                        "show_counts": false,
+                        "show_numeric_values": false,
+                        "transform": {
+                            "insertions": [
+                                {
+                                    "anchor": 1,
+                                    "args": [
+                                        2,
+                                        3,
+                                        4
+                                    ],
+                                    "function": "subtotal",
+                                    "name": "Any College"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "type": {
+                    "categories": [
+                        {
+                            "id": 1,
+                            "missing": false,
+                            "name": "HS or Less",
+                            "numeric_value": 1
+                        },
+                        {
+                            "id": 2,
+                            "missing": false,
+                            "name": "Some College",
+                            "numeric_value": 2
+                        },
+                        {
+                            "id": 3,
+                            "missing": false,
+                            "name": "College Grad",
+                            "numeric_value": 3
+                        },
+                        {
+                            "id": 4,
+                            "missing": false,
+                            "name": "Grad School",
+                            "numeric_value": 4
+                        },
+                        {
+                            "id": -1,
+                            "missing": true,
+                            "name": "No Data",
+                            "numeric_value": null
+                        }
+                    ],
+                    "class": "categorical",
+                    "ordinal": false
+                }
+            }
+        ],
+        "filtered": {
+            "unweighted_n": 10,
+            "weighted_n": 10
+        },
+        "measures": {
+            "mean": {
+                "data": [
+                    38.3333333333,
+                    65.0,
+                    55.0,
+                    34.0,
+                    {
+                        "?": -8
+                    }
+                ],
+                "metadata": {
+                    "derived": true,
+                    "references": {},
+                    "type": {
+                        "class": "numeric",
+                        "integer": true,
+                        "missing_reasons": {
+                            "NaN": -8,
+                            "No Data": -1
+                        },
+                        "missing_rules": {}
+                    }
+                },
+                "n_missing": 0
+            }
+        },
+        "n": 10,
+        "unfiltered": {
+            "unweighted_n": 10,
+            "weighted_n": 10
+        }
+    }
+}
diff --git a/tests/integration/test_slices.py → tests/integration/test_cubepart.py b/tests/integration/test_slices.py → tests/integration/test_cubepart.py
@@ -329,15 +329,17 @@ def it_calculates_mr_x_mr_row_proportions(self):
         ]
         np.testing.assert_almost_equal(slice_.row_proportions, expected)
 
-    def it_calculates_mr_x_mr_column_proportions(self):
+    def it_knows_mr_x_mr_column_proportions(self):
         slice_ = Cube(CR.MR_X_MR).partitions[0]
-        expected = [
-            [1.0, 0.13302403, 0.12391245, 0.22804396],
-            [0.28566937, 1.0, 0.23498805, 0.47751837],
-            [0.43456698, 0.34959546, 1.0, 0.72838875],
-            [1.0, 1.0, 1.0, 1.0],
-        ]
-        np.testing.assert_almost_equal(slice_.column_proportions, expected)
+        np.testing.assert_almost_equal(
+            slice_.column_proportions,
+            [
+                [1.0, 0.13302403, 0.12391245, 0.22804396],
+                [0.28566937, 1.0, 0.23498805, 0.47751837],
+                [0.43456698, 0.34959546, 1.0, 0.72838875],
+                [1.0, 1.0, 1.0, 1.0],
+            ],
+        )
 
     def it_reorders_cat_x_cat(self):
         slice_ = Cube(CR.CAT_X_CAT_PRUNING_HS).partitions[0]
@@ -441,15 +443,28 @@ def it_prunes_cat_x_cat_with_hs(self):
         )
         np.testing.assert_equal(slice_.base_counts, expected)
 
+    def it_accommodates_an_all_missing_element_rows_dimension(self):
+        slice_ = _Slice(Cube(CR.CAT_X_CAT_ALL_MISSING_ROW_ELEMENTS), 0, None, None, 0)
+        row_proportions = slice_.row_proportions
+        np.testing.assert_almost_equal(row_proportions, np.array([]))
+
+    def it_knows_means_with_subtotals_on_cat_x_cat(self):
+        slice_ = _Slice(Cube(CR.CAT_X_CAT_MEAN_SUBTOT), 0, None, None, 0)
+
+        means = slice_.means
+
+        np.testing.assert_almost_equal(
+            means, np.array([[38.3333333, np.nan, 65.0, 55.0, 34.0]])
+        )
+
+
+class Describe_Strand(object):
+    """Integration-test suite for `cr.cube.cubepart._Strand` object."""
+
     def it_provides_nans_for_means_insertions(self):
         strand = CubePartition.factory(
             Cube(CR.CAT_WITH_MEANS_AND_INSERTIONS), 0, None, None, None, 0
         )
         np.testing.assert_almost_equal(
             strand.means, [19.85555556, 13.85416667, 52.78947368, np.nan, np.nan]
         )
-
-    def it_accommodates_an_all_missing_element_rows_dimension(self):
-        slice_ = _Slice(Cube(CR.CAT_X_CAT_ALL_MISSING_ROW_ELEMENTS), 0, None, None, 0)
-        row_proportions = slice_.row_proportions
-        np.testing.assert_almost_equal(row_proportions, np.array([]))