diff --git a/src/cr/cube/matrix.py b/src/cr/cube/matrix.py index 3b1ee38dd..1260051f6 100644 --- a/src/cr/cube/matrix.py +++ b/src/cr/cube/matrix.py @@ -464,7 +464,7 @@ def _variance(self): """Returns the variance for cell percentages `variance = p * (1-p)` """ - return self._counts * (1 - self._counts) + return self._counts / self.table_margin * (1 - self._counts / self.table_margin) @lazyproperty def _zscores(self): @@ -636,7 +636,11 @@ def _variance(self): """Returns the variance for cell percentages `variance = p * (1-p)` """ - return self._counts[:, 0, :] * (1 - self._counts[:, 0, :]) + return ( + self._counts[:, 0, :] + / self.table_margin[:, None] + * (1 - self._counts[:, 0, :] / self.table_margin[:, None]) + ) @lazyproperty def _zscores(self): @@ -774,7 +778,11 @@ def _variance(self): """Returns the variance for cell percentages `variance = p * (1-p)` """ - return self._counts[:, :, 0] * (1 - self._counts[:, :, 0]) + return ( + self._counts[:, :, 0] + / self.table_margin + * (1 - self._counts[:, :, 0] / self.table_margin) + ) @lazyproperty def _zscores(self): @@ -987,7 +995,11 @@ def _variance(self): """Returns the variance for cell percentages `variance = p * (1-p)` """ - return self._counts[:, 0, :, 0] * (1 - self._counts[:, 0, :, 0]) + return ( + self._counts[:, 0, :, 0] + / self.table_margin + * (1 - self._counts[:, 0, :, 0] / self.table_margin) + ) @lazyproperty def _zscores(self): @@ -1105,14 +1117,6 @@ def ordering(self): """ return (self._anchor_n, self._neg_idx, self) - @lazyproperty - def standard_deviation(self): - return self._standard_deviation - - @lazyproperty - def standard_error(self): - return self._standard_error - @lazyproperty def table_margin(self): return self._table_margin @@ -1141,27 +1145,6 @@ def _anchor_n(self): else int(self.anchor) + 1 ) - @lazyproperty - def _standard_deviation(self): - """Returns the standard deviation for cell percentages - `std_deviation = sqrt(variance)` - """ - return np.sqrt(self._variance) - - @lazyproperty - def _standard_error(self): - """Returns the standard error for cell percentages - `std_error = sqrt(variance/N)` - """ - return np.sqrt(self._variance / self.table_margin) - - @lazyproperty - def _variance(self): - """Returns the variance for cell percentages - `variance = p * (1-p)` - """ - return self._values * (1 - self._values) - @lazyproperty def _zscore(self): # TODO: remove this if statement - temporary hack until MR zscore implementation @@ -1675,27 +1658,6 @@ def numeric(self): def pruned(self): return self.base == 0 or np.isnan(self.base) - @lazyproperty - def standard_deviation(self): - """Returns the standard deviation for cell percentages - `std_deviation = sqrt(variance)` - """ - return np.sqrt(self._variance) - - @lazyproperty - def standard_error(self): - """Returns the standard error for cell percentages - `std_error = sqrt(variance/N)` - """ - return np.sqrt(self._variance / self.table_margin) - - @lazyproperty - def variance(self): - """Returns the variance for cell percentages - `variance = p * (1-p)` - """ - return self.values * (1 - self.values) - @lazyproperty def zscore(self): variance = ( @@ -1803,14 +1765,6 @@ def __init__( def pruned(self): return self.table_base == 0 - @lazyproperty - def standard_deviation(self): - return self._standard_deviation - - @lazyproperty - def standard_error(self): - return self._standard_error - @lazyproperty def table_base(self): return np.sum(self._all_bases) @@ -1884,14 +1838,6 @@ def margin(self): def pruned(self): return np.all(self.base == 0) or np.all(np.isnan(self.base)) - @lazyproperty - def standard_deviation(self): - return self._standard_deviation - - @lazyproperty - def standard_error(self): - return self._standard_error - @lazyproperty def values(self): return self._selected diff --git a/tests/integration/test_headers_and_subtotals.py b/tests/integration/test_headers_and_subtotals.py index d42f96d05..f674ad44d 100644 --- a/tests/integration/test_headers_and_subtotals.py +++ b/tests/integration/test_headers_and_subtotals.py @@ -2569,6 +2569,226 @@ def it_calculates_residuals_for_cat_x_items_x_cats_with_col_insertion(self): ], ], ) + # Test std deviation + np.testing.assert_almost_equal( + slice_.standard_deviation, + [ + [ + 0.03018845, + 0.0319175, + 0.04857593, + 0.04751389, + 0.08953993, + 0.10328323, + 0.1524741, + 0.18022756, + 0.13456855, + 0.18607541, + 0.28325803, + ], + [ + 0.0595667, + 0.04309204, + 0.04933763, + 0.07460264, + 0.11766836, + 0.14009466, + 0.20399558, + 0.2858036, + 0.24756061, + 0.30673482, + 0.43888846, + ], + [ + 0.0451034, + 0.03524264, + 0.04325743, + 0.05778187, + 0.09675494, + 0.11460302, + 0.16205685, + 0.20556123, + 0.16948566, + 0.2256755, + 0.33368666, + ], + [ + 0.02002223, + 0.02248904, + 0.01817414, + 0.01864496, + 0.05761441, + 0.0601484, + 0.09295886, + 0.10388546, + 0.09123329, + 0.11333915, + 0.17684956, + ], + [ + 0.04082826, + 0.02342972, + 0.02170135, + 0.02434758, + 0.06941901, + 0.09116899, + 0.13440853, + 0.1652839, + 0.1624643, + 0.2043584, + 0.29853975, + ], + [ + 0.03566708, + 0.01482241, + 0.02063053, + 0.03399419, + 0.05407998, + 0.07387197, + 0.10215211, + 0.13465807, + 0.12933925, + 0.14491506, + 0.23175336, + ], + [ + 0.02433953, + 0.0148974, + 0.01541497, + 0.02545434, + 0.04827421, + 0.04793991, + 0.06399285, + 0.08495292, + 0.06291114, + 0.08184793, + 0.13290769, + ], + [ + 0.01960535, + 0.01548208, + 0.01906989, + 0.02260114, + 0.04638769, + 0.04187538, + 0.07265425, + 0.09220725, + 0.07954493, + 0.09381268, + 0.15249847, + ], + ], + ) + # Test standard error + np.testing.assert_almost_equal( + slice_.standard_error, + [ + [ + 0.00034447, + 0.0003642, + 0.00055429, + 0.00054217, + 0.00102171, + 0.00117853, + 0.00173984, + 0.00205652, + 0.00153552, + 0.00212325, + 0.00323218, + ], + [ + 0.0006797, + 0.00049171, + 0.00056298, + 0.00085127, + 0.00134268, + 0.00159858, + 0.00232773, + 0.00326122, + 0.00282484, + 0.00350006, + 0.00500803, + ], + [ + 0.00051466, + 0.00040214, + 0.0004936, + 0.00065933, + 0.00110404, + 0.0013077, + 0.00184918, + 0.0023456, + 0.00193395, + 0.00257512, + 0.0038076, + ], + [ + 0.00022847, + 0.00025662, + 0.00020738, + 0.00021275, + 0.00065742, + 0.00068634, + 0.00106073, + 0.00118541, + 0.00104104, + 0.00129328, + 0.00201798, + ], + [ + 0.00046588, + 0.00026735, + 0.00024763, + 0.00027782, + 0.00079212, + 0.0010403, + 0.0015337, + 0.00188601, + 0.00185383, + 0.00233187, + 0.00340655, + ], + [ + 0.00040699, + 0.00016913, + 0.00023541, + 0.0003879, + 0.00061709, + 0.00084293, + 0.00116563, + 0.00153654, + 0.00147585, + 0.00165358, + 0.00264447, + ], + [ + 0.00027773, + 0.00016999, + 0.0001759, + 0.00029045, + 0.00055084, + 0.00054703, + 0.0007302, + 0.00096937, + 0.00071786, + 0.00093394, + 0.00151657, + ], + [ + 0.00022371, + 0.00017666, + 0.0002176, + 0.0002579, + 0.00052932, + 0.00047783, + 0.00082904, + 0.00105215, + 0.00090766, + 0.00107047, + 0.00174012, + ], + ], + ) transforms = {"columns_dimension": {"insertions": {}}} slice_no_col_insertion_ = Cube( @@ -2582,6 +2802,14 @@ def it_calculates_residuals_for_cat_x_items_x_cats_with_col_insertion(self): np.testing.assert_almost_equal( slice_no_col_insertion_.pvals, slice_.pvals[:, : slice_.pvals.shape[1] - 1] ) + np.testing.assert_almost_equal( + slice_no_col_insertion_.standard_deviation, + slice_.standard_deviation[:, : slice_.standard_deviation.shape[1] - 1], + ) + np.testing.assert_almost_equal( + slice_no_col_insertion_.standard_error, + slice_.standard_error[:, : slice_.standard_error.shape[1] - 1], + ) def it_calculates_residuals_for_ca_as_0th_with_1_col_insertion(self): # Test for multi-cube when first cube represents a categorical-array @@ -3429,4 +3657,5 @@ def it_provide_residual_test_stats_including_hs(self): slice_ = Cube(CR.CAT_X_CAT_HS_2ROWS_1COL).partitions[0] np.testing.assert_array_equal(slice_.pvals, slice_.residual_test_stats[0]) np.testing.assert_array_equal(slice_.zscore, slice_.residual_test_stats[1]) + assert slice_.residual_test_stats.shape == (2, 6, 7) diff --git a/tests/integration/test_multiple_response.py b/tests/integration/test_multiple_response.py index 9f311f4d7..3222b334f 100644 --- a/tests/integration/test_multiple_response.py +++ b/tests/integration/test_multiple_response.py @@ -246,9 +246,9 @@ def test_table_base_unpruned_cat_x_mr(): np.testing.assert_array_equal(slice_.table_base_unpruned, expected) -def test_z_scores_from_r_rows_margin(): +def test_various_measures_from_r_rows_margin(): slice_ = Cube(CR.MR_X_CAT_PROFILES_STATS_WEIGHTED).partitions[0] - expected = [ + expected_zscore = [ [ -1.465585354569577, 3.704125875262655, @@ -270,7 +270,54 @@ def test_z_scores_from_r_rows_margin(): -0.794143540856781, ], ] - np.testing.assert_almost_equal(slice_.zscore, expected) + expected_standard_dev = [ + [ + 0.32529036, + 0.3230502, + 0.24634286, + 0.17015146, + 0.15631497, + 0.310228, + 0.07333405, + 0.09462074, + ], + [ + 0.34030642, + 0.32321237, + 0.24212202, + 0.17077218, + 0.15232289, + 0.34229973, + 0.07720184, + 0.09510867, + ], + ] + expected_standard_error = [ + [ + 0.00185406, + 0.0018413, + 0.00140409, + 0.00096982, + 0.00089095, + 0.00176821, + 0.00041798, + 0.00053931, + ], + [ + 0.00193965, + 0.00184222, + 0.00138003, + 0.00097335, + 0.0008682, + 0.00195101, + 0.00044003, + 0.00054209, + ], + ] + + np.testing.assert_almost_equal(slice_.zscore, expected_zscore) + np.testing.assert_almost_equal(slice_.standard_error, expected_standard_error) + np.testing.assert_almost_equal(slice_.standard_deviation, expected_standard_dev) def test_mr_x_single_wave(): @@ -378,7 +425,7 @@ def test_cat_x_mr_x_itself_zscores(): ) -def test_cat_x_mr_and_cat_x_mr_x_itself_zscores(): +def test_cat_x_mr_and_cat_x_mr_x_itself_various_measures(): slice_ = Cube(CR.EDU_FAV5).partitions[0] slice2_ = Cube(CR.EDU_FAV5_FAV5).partitions[0] @@ -386,6 +433,10 @@ def test_cat_x_mr_and_cat_x_mr_x_itself_zscores(): slice_.column_proportions, slice2_.column_proportions ) np.testing.assert_array_almost_equal(slice_.zscore, slice2_.zscore) + np.testing.assert_array_almost_equal( + slice_.standard_deviation, slice2_.standard_deviation + ) + np.testing.assert_array_almost_equal(slice_.standard_error, slice2_.standard_error) assert slice_.shape == (4, 5) assert slice2_.shape == (4, 5)