Crunch-io · slobodan-ilic · Apr 16, 2019 · Apr 16, 2019
diff --git a/README.md b/README.md
@@ -112,6 +112,9 @@ The detailed description can be found
 
 ## Changes
 
+#### 1.9.16
+- Proper t-stats for cubes with H&S
+
 #### 1.9.15
 - Implement pairwise indices for Wishart, directly in cube
 

diff --git a/src/cr/cube/__init__.py b/src/cr/cube/__init__.py
@@ -2,4 +2,4 @@
 
 """Initialization module for crunch-cube package."""
 
-__version__ = "1.9.15"
+__version__ = "1.9.16"
diff --git a/src/cr/cube/cube_slice.py b/src/cr/cube/cube_slice.py
@@ -563,14 +563,14 @@ def pairwise_indices(self, alpha=0.05, only_larger=True, hs_dims=None):
             self, alpha=alpha, only_larger=only_larger, hs_dims=hs_dims
         ).pairwise_indices
 
-    def pairwise_significance_tests(self, column_idx):
+    def pairwise_significance_tests(self, column_idx, hs_dims=None):
         """list of _ColumnPairwiseSignificance tests.
 
         Result has as many elements as there are columns in the slice. Each
         significance test contains `p_vals` and `t_stats` (ndarrays that represent
         probability values and statistical scores).
         """
-        return PairwiseSignificance(self).values[column_idx]
+        return PairwiseSignificance(self, hs_dims=hs_dims).values[column_idx]
 
     def _apply_pruning_mask(self, res, hs_dims=None):
         array = self.as_array(prune=True, include_transforms_for_dims=hs_dims)

diff --git a/src/cr/cube/measures/pairwise_significance.py b/src/cr/cube/measures/pairwise_significance.py
@@ -7,7 +7,7 @@
 import numpy as np
 from scipy.stats import t
 
-from cr.cube.util import lazyproperty, intersperse_hs_in_std_res
+from cr.cube.util import lazyproperty
 
 try:
     xrange
@@ -48,21 +48,13 @@ def values(self):
                 self._only_larger,
                 self._hs_dims,
             )
-            for col_idx in range(self._slice.shape[1])
+            for col_idx in range(self._slice.get_shape(hs_dims=self._hs_dims)[1])
         ]
 
     @lazyproperty
     def pairwise_indices(self):
         """ndarray containing tuples of pairwise indices."""
-        pwi = np.array([sig.pairwise_indices for sig in self.values]).T
-
-        if self._hs_dims and 1 in self._hs_dims:
-            # If we need to account for the dimension 1 in pairwise indices, we need
-            # to intersperse with NaNs. The dimension 0 is already tackled
-            # when determining the indices.
-            pwi = intersperse_hs_in_std_res(self._slice, (1,), pwi)
-
-        return pwi
+        return np.array([sig.pairwise_indices for sig in self.values]).T
 
 
 # pylint: disable=too-few-public-methods
@@ -88,24 +80,25 @@ def __init__(
         self._hs_dims = hs_dims
 
     @lazyproperty
-    def _t_stats(self):
-        props = self._slice.proportions(axis=0)
+    def t_stats(self):
+        props = self._slice.proportions(
+            axis=0, include_transforms_for_dims=self._hs_dims
+        )
         diff = props - props[:, [self._col_idx]]
-        margin = self._slice.margin(axis=0, weighted=self._weighted)
+        margin = self._slice.margin(
+            axis=0, weighted=self._weighted, include_transforms_for_dims=self._hs_dims
+        )
         var_props = props * (1.0 - props) / margin
         se_diff = np.sqrt(var_props + var_props[:, [self._col_idx]])
         return diff / se_diff
 
-    @lazyproperty
-    def t_stats(self):
-        return intersperse_hs_in_std_res(self._slice, self._hs_dims, self._t_stats)
-
     @lazyproperty
     def p_vals(self):
-        unweighted_n = self._slice.margin(axis=0, weighted=False)
+        unweighted_n = self._slice.margin(
+            axis=0, weighted=False, include_transforms_for_dims=self._hs_dims
+        )
         df = unweighted_n + unweighted_n[self._col_idx] - 2
-        p_vals = 2 * (1 - t.cdf(abs(self._t_stats), df=df))
-        return intersperse_hs_in_std_res(self._slice, self._hs_dims, p_vals)
+        return 2 * (1 - t.cdf(abs(self.t_stats), df=df))
 
     @lazyproperty
     def pairwise_indices(self):

diff --git a/src/cr/cube/util.py b/src/cr/cube/util.py
@@ -37,12 +37,7 @@ def compress_pruned(table):
 
 
 def intersperse_hs_in_std_res(slice_, hs_dims, res):
-
-    if not hs_dims:
-        # Don't intersperse anything, just return the result
-        return res
-
-    # Perform the insertions of place-holding rows and cols for insertions
+    """Perform the insertions of place-holding rows and cols for insertions."""
     for dim, inds in enumerate(slice_.inserted_hs_indices()):
         if dim not in hs_dims:
             continue

diff --git a/tests/integration/test_pairwise_significance.py b/tests/integration/test_pairwise_significance.py
@@ -96,17 +96,115 @@ def test_hirotsu_pairwise_indices(self):
         expected = np.array([(), (), (3, 4, 7, 9), (2,), (2,), (), (), (2,), (), (2,)])
         np.testing.assert_array_equal(actual_pairwise_indices, expected)
 
+    def test_pairwise_t_stats_with_hs(self):
+        slice_ = CrunchCube(CR.PAIRWISE_HIROTSU_ILLNESS_X_OCCUPATION_WITH_HS).slices[0]
+        expected = np.array(
+            [
+                [
+                    0.0,
+                    -0.06178448,
+                    0.30342874,
+                    -3.18865018,
+                    -3.82130608,
+                    -2.99560531,
+                    0.07456344,
+                    -0.68932699,
+                    -2.95469238,
+                    -0.46970468,
+                    -4.14956044,
+                ],
+                [
+                    0.0,
+                    1.18922394,
+                    0.38254102,
+                    3.3306654,
+                    3.45013209,
+                    2.7520633,
+                    0.59216241,
+                    0.86352416,
+                    2.54171145,
+                    1.10130414,
+                    3.3839919,
+                ],
+                [
+                    0.0,
+                    -1.7080666,
+                    -0.931165,
+                    -0.87419923,
+                    -0.24915622,
+                    -0.2367748,
+                    -0.97198009,
+                    -0.38504801,
+                    -0.03910193,
+                    -1.02720423,
+                    0.19773989,
+                ],
+            ]
+        )
+        t_stats = slice_.pairwise_significance_tests(
+            column_idx=0, hs_dims=(0, 1)
+        ).t_stats
+        np.testing.assert_almost_equal(t_stats, expected)
+
+    def test_pairwise_p_vals_with_hs(self):
+        slice_ = CrunchCube(CR.PAIRWISE_HIROTSU_ILLNESS_X_OCCUPATION_WITH_HS).slices[0]
+        expected = np.array(
+            [
+                [
+                    1.00000000e00,
+                    9.50744855e-01,
+                    7.61580875e-01,
+                    1.45494511e-03,
+                    1.35271514e-04,
+                    2.75262668e-03,
+                    9.40575477e-01,
+                    4.90755141e-01,
+                    3.16822332e-03,
+                    6.38672254e-01,
+                    3.44275537e-05,
+                ],
+                [
+                    1.00000000e00,
+                    2.34589189e-01,
+                    7.02082945e-01,
+                    8.84605265e-04,
+                    5.67562813e-04,
+                    5.94375533e-03,
+                    5.53862215e-01,
+                    3.88027539e-01,
+                    1.11098015e-02,
+                    2.71039211e-01,
+                    7.25442977e-04,
+                ],
+                [
+                    1.00000000e00,
+                    8.78852262e-02,
+                    3.51831360e-01,
+                    3.82131035e-01,
+                    8.03255937e-01,
+                    8.12841336e-01,
+                    3.31271828e-01,
+                    7.00272311e-01,
+                    9.68813218e-01,
+                    3.04581978e-01,
+                    8.43264694e-01,
+                ],
+            ]
+        )
+        p_vals = slice_.pairwise_significance_tests(column_idx=0, hs_dims=(0, 1)).p_vals
+        np.testing.assert_almost_equal(p_vals, expected)
+
     def test_pairwise_indices_with_hs(self):
-        cube = CrunchCube(CR.PAIRWISE_HIROTSU_ILLNESS_X_OCCUPATION_WITH_HS)
+        slice_ = CrunchCube(CR.PAIRWISE_HIROTSU_ILLNESS_X_OCCUPATION_WITH_HS).slices[0]
         expected = [
             [
-                (3, 4, 8, 10),
-                (3, 4, 8, 10),
-                (3, 4, 8, 10),
+                (3, 4, 5, 8, 10),
+                (3, 4, 5, 8, 10),
+                (3, 4, 5, 8, 10),
                 (),
                 (),
-                np.nan,
-                (3, 4, 8, 10),
+                (10,),
+                (3, 4, 5, 8, 10),
                 (3, 4, 10),
                 (),
                 (4, 10),
@@ -118,17 +216,17 @@ def test_pairwise_indices_with_hs(self):
                 (),
                 (0, 2, 6, 7),
                 (0, 2, 6, 7),
-                np.nan,
+                (0, 2),
                 (),
                 (),
                 (0, 2),
                 (),
                 (0, 2, 6, 7),
             ],
-            [(), (), (), (), (), np.nan, (), (), (), (), (1,)],
+            [(), (), (), (), (), (1,), (), (), (), (), (1,)],
         ]
-        actual = cube.slices[0].pairwise_indices(hs_dims=[0, 1]).tolist()
-        assert actual == expected
+        pairwise_indices = slice_.pairwise_indices(hs_dims=(0, 1)).tolist()
+        assert pairwise_indices == expected
 
     def test_hirotsu_pvals_with_hs(self):
         """The shape of the result should be 11 x 11, with H&S (at index 5)."""

diff --git a/tests/unit/test_wishart_pairwise_significance.py b/tests/unit/test_wishart_pairwise_significance.py
@@ -17,7 +17,7 @@
 class DescribePairwiseSignificance:
     def it_provides_access_to_its_values(self, request, slice_):
         shape = (2, 2)
-        slice_.shape = shape
+        slice_.get_shape.return_value = shape
         expected_test_values = PairwiseSignificance(slice_).values
         assert len(expected_test_values) == 2
         for i, column_pairwise_significance in enumerate(expected_test_values):
@@ -37,13 +37,13 @@ def it_can_calculate_t_stats(self, t_stats_fixture, slice_):
         slice_.proportions.return_value = props
         slice_.margin.return_value = margin
         np.testing.assert_almost_equal(
-            _ColumnPairwiseSignificance(slice_, col_idx)._t_stats, t_stats
+            _ColumnPairwiseSignificance(slice_, col_idx).t_stats, t_stats
         )
 
-    def it_can_calculate_p_vals(self, p_vals_fixture, slice_, _t_stats_prop_):
+    def it_can_calculate_p_vals(self, p_vals_fixture, slice_, t_stats_prop_):
         col_idx, t_stats, margin, p_vals = p_vals_fixture
         slice_.margin.return_value = margin
-        _t_stats_prop_.return_value = t_stats
+        t_stats_prop_.return_value = t_stats
         np.testing.assert_almost_equal(
             _ColumnPairwiseSignificance(slice_, col_idx).p_vals, p_vals
         )
@@ -177,10 +177,6 @@ def t_stats_fixture(self, request):
     def slice_(self, request):
         return instance_mock(request, CubeSlice)
 
-    @pytest.fixture
-    def _t_stats_prop_(self, request):
-        return property_mock(request, _ColumnPairwiseSignificance, "_t_stats")
-
     @pytest.fixture
     def t_stats_prop_(self, request):
         return property_mock(request, _ColumnPairwiseSignificance, "t_stats")