diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6363f0961..32801d4c2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,4 +11,3 @@ repos: rev: stable hooks: - id: black - language_version: python3.6 diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index 4263f3b71..fba048a78 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -84,7 +84,7 @@ def dimension_types(self): return tuple(d.dimension_type for d in self._dimensions) def evaluate(self, measure_expr): - """ -> 1D/2D ndarray, values evaluated given the function specification + """Return 1D/2D ndarray, values evaluated given the function specification The `function_spec` contains the function to apply and its parameters, e.g.: ``` @@ -113,7 +113,7 @@ def ndim(self): @lazyproperty def population_fraction(self): - """Returns the population fraction of the cube""" + """population fraction of the cube""" return self._cube.population_fraction @lazyproperty @@ -239,6 +239,11 @@ class _Slice(CubePartition): dimensions which can be crosstabbed in a slice. """ + # ---This is the quantile of the normal Cumulative Distribution Function (CDF) at + # ---probability 97.5% (p=.975), since the computed confidence interval + # ---is ±2.5% (.025) on each side of the CDF. + Z_975 = 1.959964 + def __init__(self, cube, slice_idx, transforms, population, mask_size): super(_Slice, self).__init__(cube, transforms) self._slice_idx = slice_idx @@ -291,16 +296,29 @@ def columns_dimension_type(self): def columns_margin(self): return np.array([column.margin for column in self._matrix.columns]).T + @lazyproperty + def columns_percentages_moe(self): + """1D/2D np.float64 ndarray of margin-of-error (MoE) for columns percentages. + + The values are represented as percentages, analogue to the `table_percentages` + property. This means that the value of 3.5% will have the value 3.5 (not 0.035). + The values can be np.nan when the corresponding percentage is also np.nan, which + happens when the respective columns margin is 0. + """ + return self.Z_975 * 100 * self.columns_std_err + @lazyproperty def columns_std_dev(self): - """Returns the standard deviation for cell percentages + """standard deviation for column percentages + `std_deviation = sqrt(variance)` """ return np.sqrt(self._columns_variance) @lazyproperty def columns_std_err(self): - """Returns the standard error for cell percentages + """standard error for column percentages + `std_error = sqrt(variance/N)` """ return np.sqrt(self._columns_variance / self.columns_margin) @@ -336,17 +354,17 @@ def inserted_row_idxs(self): @lazyproperty def insertions(self): - """Returns masked array with residuals for insertions + """2D np.float64 np.ma.core.MaskedArray of residuals for insertions. - 0 1 2 3 4 5 6 - 0 inf inf inf inf inf -2.9 inf - 1 inf inf inf inf inf -4.3 inf - 2 2.5 1.3 3.3 -0.70 -7.25 -6.52 2.25 - 3 inf inf inf inf inf -2.51 inf - 4 -1.16 2.20 5.84 1.78 -8.48 -5.92 0.93 - 5 inf inf inf inf inf 9.70 inf + 0 1 2 3 4 5 6 + 0 inf inf inf inf inf -2.9 inf + 1 inf inf inf inf inf -4.3 inf + 2 2.5 1.3 3.3 -0.70 -7.25 -6.52 2.25 + 3 inf inf inf inf inf -2.51 inf + 4 -1.16 2.20 5.84 1.78 -8.48 -5.92 0.93 + 5 inf inf inf inf inf 9.70 inf - Only the insertions residuals are showed in a inf masked array + Only the insertions residuals are showed in a inf masked array. """ inserted_rows = self.inserted_row_idxs inserted_cols = self.inserted_column_idxs @@ -601,7 +619,7 @@ def scale_means_rows_margin(self): @lazyproperty def scale_median_column(self): - """ -> np.int64 ndarray of the columns scale median + """np.int64 ndarray of the columns scale median The median is calculated using the standard algebra applied to the numeric values repeated for each related counts value @@ -621,7 +639,7 @@ def scale_median_column(self): @lazyproperty def scale_median_row(self): - """ -> np.int64 ndarray of the rows scale median + """np.int64 ndarray of the rows scale median The median is calculated using the standard algebra applied to the numeric values repeated for each related counts value @@ -641,7 +659,7 @@ def scale_median_row(self): @lazyproperty def scale_median_column_margin(self): - """ -> np.int64, represents the column scale median margin""" + """np.int64 represents the column scale median margin""" if np.all(np.isnan(self._columns_dimension_numeric_values)): return None columns_margin = self.columns_margin @@ -657,7 +675,7 @@ def scale_median_column_margin(self): @lazyproperty def scale_median_row_margin(self): - """ -> np.int64, represents the rows scale median margin""" + """np.int64 represents the rows scale median margin""" if np.all(np.isnan(self._rows_dimension_numeric_values)): return None rows_margin = self.rows_margin @@ -673,28 +691,28 @@ def scale_median_row_margin(self): @lazyproperty def scale_std_dev_column(self): - """ -> 1D np.ndarray of the standard deviation column of scales""" + """1D np.ndarray of the standard deviation column of scales""" if np.all(np.isnan(self._columns_dimension_numeric_values)): return None return np.sqrt(self.var_scale_means_column) @lazyproperty def scale_std_dev_row(self): - """ -> 1D np.ndarray of the standard deviation row of scales""" + """1D np.ndarray of the standard deviation row of scales""" if np.all(np.isnan(self._rows_dimension_numeric_values)): return None return np.sqrt(self.var_scale_means_row) @lazyproperty def scale_std_err_column(self): - """ -> 1D np.ndarray of the standard error column of scales""" + """1D np.ndarray of the standard error column of scales""" if np.all(np.isnan(self._columns_dimension_numeric_values)): return None return self.scale_std_dev_column / np.sqrt(self.rows_margin) @lazyproperty def scale_std_err_row(self): - """ -> 1D np.ndarray of the standard error row of scales""" + """1D np.ndarray of the standard error row of scales""" if np.all(np.isnan(self._rows_dimension_numeric_values)): return None return self.scale_std_dev_row / np.sqrt(self.columns_margin) @@ -782,6 +800,17 @@ def table_name(self): def table_percentages(self): return self.table_proportions * 100 + @lazyproperty + def table_percentages_moe(self): + """1D/2D np.float64 ndarray of margin-of-error (MoE) for table percentages. + + The values are represented as percentages, analogue to the `table_percentages` + property. This means that the value of 3.5% will have the value 3.5 (not 0.035). + The values can be np.nan when the corresponding percentage is also np.nan, which + happens when the respective table margin is 0. + """ + return self.Z_975 * 100 * self.table_std_err + @lazyproperty def table_proportions(self): return np.array([row.table_proportions for row in self._matrix.rows]) @@ -801,7 +830,7 @@ def unweighted_counts(self): @lazyproperty def var_scale_means_column(self): - """ -> 1D np.ndarray of the column variance values for scales + """1D np.ndarray of the column variance values for scales Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the mean of the data. @@ -822,7 +851,7 @@ def var_scale_means_column(self): @lazyproperty def var_scale_means_row(self): - """ -> 1D np.ndarray of the row variance values for scales + """1D np.ndarray of the row variance values for scales Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the mean of the data. @@ -862,7 +891,8 @@ def _columns_dimension_numeric_values(self): @lazyproperty def _columns_variance(self): - """Returns the variance for cell percentages + """variance for column percentages + `variance = p * (1-p)` """ return ( @@ -1039,7 +1069,7 @@ def scale_mean(self): @lazyproperty def scale_median(self): - """ -> np.int64, the median of scales + """np.int64 the median of scales The median is calculated using the standard algebra applied to the numeric values repeated for each related counts value @@ -1053,14 +1083,14 @@ def scale_median(self): @lazyproperty def scale_std_dev(self): - """ -> np.float64, the standard deviation of scales""" + """np.float64, the standard deviation of scales""" if np.all(np.isnan(self._numeric_values)): return None return np.sqrt(self.var_scale_mean) @lazyproperty def scale_std_err(self): - """ -> np.float64, the standard error of scales""" + """np.float64, the standard error of scales""" if np.all(np.isnan(self._numeric_values)): return None counts = self._counts_as_array[self._numeric_values_mask] @@ -1085,12 +1115,12 @@ def smoothed_dimension_dict(self): @lazyproperty def standard_deviation(self): - """ -> np.ndarray, percentages standard deviation""" + """np.ndarray percentages standard deviation""" return np.sqrt(self._variance) @lazyproperty def standard_error(self): - """ -> np.ndarray, percentages standard error""" + """np.ndarray percentages standard error""" if self.dimension_types[0] == DT.MR: return np.sqrt(self._variance / self.bases) return np.sqrt(self._variance / np.sum(self.rows_margin)) @@ -1207,7 +1237,7 @@ def _numeric_values(self): @lazyproperty def _numeric_values_mask(self): - """ -> np.ndarray, boolean elements for each element in rows dimension." + """np.ndarray boolean elements for each element in rows dimension." This array contains True or False according to the nan in the numeric_values array @@ -1238,7 +1268,8 @@ def _table_proportions_as_array(self): @lazyproperty def _variance(self): - """Returns the variance for cell percentages + """variance for cell percentages + `variance = p * (1-p)` """ p = self._table_proportions_as_array diff --git a/tests/expectations/cat-hs-x-mr-col-moe.py b/tests/expectations/cat-hs-x-mr-col-moe.py new file mode 100644 index 000000000..6aa517349 --- /dev/null +++ b/tests/expectations/cat-hs-x-mr-col-moe.py @@ -0,0 +1,10 @@ +[ + [17.30181459, 7.76167031, 2.58919072, 1.96696835, 2.47551804], + [13.7464937, 8.90617586, 6.33244289, 3.30105103, 3.99757694], + [16.79140176, 10.72624566, 6.68369737, 3.71703613, 4.42265749], + [0.0, 0.0, 0.0, 0.0, 0.0], + [11.79621344, 11.12694011, 8.58251349, 4.91075221, 4.79812657], + [14.76383504, 11.06914477, 8.61521467, 5.11357472, 4.93988229], + [0.0, 0.0, 0.0, 0.0, 0.0], + [16.79140176, 10.72624566, 6.68369737, 3.71703613, 4.42265749], +] diff --git a/tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py b/tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py new file mode 100644 index 000000000..311aa16fd --- /dev/null +++ b/tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py @@ -0,0 +1,56 @@ +[ + [ + 13.03595844, + 7.67698551, + 3.46251469, + 4.55693081, + 4.13969905, + 3.06644326, + 7.58177966, + ], + [ + 9.31746956, + 8.36644659, + 3.78951977, + 5.23042895, + 3.72360922, + 3.15148999, + 7.65643283, + ], + [ + 11.77008734, + 8.47930382, + 3.85500973, + 5.5463129, + 4.8153303, + 3.66939254, + 7.5418196, + ], + [ + 6.0015905, + 7.16459682, + 3.25399504, + 4.39795907, + 3.1556904, + 2.63154691, + 6.03640099, + ], + [ + 10.57125967, + 8.64082889, + 3.91804373, + 5.56024488, + 4.45804303, + 3.59253748, + 8.05245981, + ], + [ + 10.91512996, + 6.50723624, + 2.9825236, + 4.90998204, + 4.89378128, + 3.57587294, + 5.83679508, + ], +] diff --git a/tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py b/tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py new file mode 100644 index 000000000..8255c3e38 --- /dev/null +++ b/tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py @@ -0,0 +1,56 @@ +[ + [ + 0.49326036, + 0.43967108, + 0.43739495, + 0.4093598, + 0.42242603, + 0.41688475, + 0.47060217, + ], + [ + 0.35255854, + 0.47915742, + 0.47870319, + 0.46986171, + 0.3799671, + 0.42844691, + 0.4752359, + ], + [ + 0.44536177, + 0.48562091, + 0.48697607, + 0.49823831, + 0.49136926, + 0.49885606, + 0.46812184, + ], + [ + 0.22709084, + 0.4103259, + 0.41105414, + 0.39507899, + 0.32201514, + 0.35776034, + 0.37468029, + ], + [ + 0.4, + 0.49487166, + 0.49493871, + 0.49948985, + 0.45491071, + 0.48840757, + 0.49981735, + ], + [ + 0.41301152, + 0.372678, + 0.37676108, + 0.44107522, + 0.49937461, + 0.48614202, + 0.36229072, + ], +] diff --git a/tests/expectations/col-std-err-cat-x-cat-hs-2rows-1col.py b/tests/expectations/col-std-err-cat-x-cat-hs-2rows-1col.py new file mode 100644 index 000000000..0b62f7092 --- /dev/null +++ b/tests/expectations/col-std-err-cat-x-cat-hs-2rows-1col.py @@ -0,0 +1,56 @@ +[ + [ + 0.06651121, + 0.03916901, + 0.01766622, + 0.02325007, + 0.0211213, + 0.01564541, + 0.03868326, + ], + [ + 0.04753898, + 0.04268674, + 0.01933464, + 0.02668635, + 0.01899836, + 0.01607933, + 0.03906415, + ], + [ + 0.06005257, + 0.04326255, + 0.01966878, + 0.02829803, + 0.02456846, + 0.01872173, + 0.03847938, + ], + [ + 0.03062092, + 0.03655474, + 0.01660232, + 0.02243898, + 0.01610076, + 0.01342651, + 0.03079853, + ], + [ + 0.05393599, + 0.04408667, + 0.01999039, + 0.02836912, + 0.02274554, + 0.01832961, + 0.04108473, + ], + [ + 0.05569046, + 0.03320079, + 0.01521724, + 0.02505139, + 0.02496873, + 0.01824458, + 0.02978011, + ], +] diff --git a/tests/expectations/mr-x-cat-hs-col-moe.py b/tests/expectations/mr-x-cat-hs-col-moe.py new file mode 100644 index 000000000..438842dfa --- /dev/null +++ b/tests/expectations/mr-x-cat-hs-col-moe.py @@ -0,0 +1,52 @@ +[ + [ + 20.15560409, + 13.30738328, + 12.78408718, + float("NaN"), + 6.55784933, + 7.97027717, + float("NaN"), + 5.21298079, + ], + [ + 24.45137595, + 14.1680129, + 12.66154543, + float("NaN"), + 10.44549053, + 10.28893289, + float("NaN"), + 7.33090439, + ], + [ + 23.63020102, + 15.291979, + 13.26439551, + float("NaN"), + 10.15732844, + 9.67359437, + float("NaN"), + 7.01221198, + ], + [ + 20.08846861, + 11.45121769, + 9.94950815, + float("NaN"), + 6.1292629, + 4.7740531, + float("NaN"), + 3.81368531, + ], + [ + 14.54617601, + 7.82024004, + 6.92340026, + float("NaN"), + 6.27830645, + 5.73799481, + float("NaN"), + 4.2383771, + ], +] diff --git a/tests/integration/test_cube.py b/tests/integration/test_cube.py index 8dd2b4d4e..5ec72bdb9 100644 --- a/tests/integration/test_cube.py +++ b/tests/integration/test_cube.py @@ -848,10 +848,17 @@ def test_calculate_various_measures_axis_0(self): [0.05880176, 0.03705843, 0.02576154, 0.03360238, 0.04526793, 0.07517074], [0.05880176, 0.03705843, 0.02576154, 0.03360238, 0.04526793, 0.07517074], ] + expected_col_percentages_moe = [ + [11.5249326, 7.2633194, 5.0491687, 6.5859452, 8.8723517, 14.7331947], + [11.5249326, 7.2633194, 5.0491687, 6.5859452, 8.8723517, 14.7331947], + ] np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev) np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, expected_col_percentages_moe + ) np.testing.assert_almost_equal(slice_.zscores, expected_zscore) def test_pvals(self): @@ -987,15 +994,22 @@ def test_various_measures_admit_by_dept_unweighted_rows(self): [0.01567414, 0.01993363, 0.01575024, 0.01682826, 0.01795892, 0.00918798], [0.01567414, 0.01993363, 0.01575024, 0.01682826, 0.01795892, 0.00918798], ] + expected_col_percentages_moe = [ + [3.07207565, 3.90691882, 3.0869894, 3.29827837, 3.51988285, 1.80081013], + [3.07207565, 3.90691882, 3.0869894, 3.29827837, 3.51988285, 1.80081013], + ] np.testing.assert_almost_equal(slice_.zscores, expected_zscores) np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev) np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, expected_col_percentages_moe + ) def test_various_measures_admit_by_gender_weighted_rows(self): - """ see + """see https://github.com/Crunch-io/whaam/blob/master/base/stats/tests/ zvalues-spec.js#L67 """ @@ -1010,12 +1024,19 @@ def test_various_measures_admit_by_gender_weighted_rows(self): expected_table_std_err = [[0.00659641, 0.00492018], [0.0070529, 0.00675348]] expected_col_std_dev = [[0.49668253, 0.45933735], [0.49668253, 0.45933735]] expected_col_std_err = [[0.00966009, 0.01080163], [0.00966009, 0.01080163]] + expected_col_percentages_moe = [ + [1.89334366, 2.11708092], + [1.89334366, 2.11708092], + ] np.testing.assert_almost_equal(slice_.zscores, expected_zscores) np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev) np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, expected_col_percentages_moe + ) def test_selected_crosstab_as_array(self): slice_ = Cube(CR.SELECTED_CROSSTAB_4).partitions[0] diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index 9d1657b77..1d932e3c6 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -117,6 +117,25 @@ def it_provides_values_for_cat_x_cat_pruning_hs(self): [0.02675483, 0.01976428, 0.0292589, 0.05070657, np.nan, 0.0], ], ) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, + [ + [ + 13.51426726, + 10.79256616, + 16.59188199, + 22.48817088, + np.nan, + 53.31123764, + ], + [12.89084933, 10.4849174, 16.3342716, 22.23457567, np.nan, 53.31123764], + [5.39900809, 2.84154374, 0.0, 9.93830606, np.nan, 0.0], + [8.99374047, 7.09061236, 11.08349041, 14.03617724, np.nan, 53.31123764], + [8.99374047, 8.57336265, 14.52191695, 19.75240786, np.nan, 0.0], + [0.0, 0.0, 0.0, 0.0, np.nan, 0.0], + [5.24385075, 3.87372796, 5.73464003, 9.93830606, np.nan, 0.0], + ], + ) assert slice_.dimension_types == (DT.CAT, DT.CAT) assert slice_.inserted_column_idxs == (1,) assert slice_.inserted_row_idxs == (1,) @@ -176,6 +195,18 @@ def it_provides_values_for_cat_x_cat_pruning_hs(self): [0.01072157, 0.01508098, 0.01072157, 0.01072157, 0.0, 0.0], ], ) + np.testing.assert_almost_equal( + slice_.table_percentages_moe, + [ + [9.47425342, 10.23031735, 8.51416179, 6.4011466, 0.0, 2.10138857], + [5.48548061, 8.3257033, 6.91062699, 5.77563348, 0.0, 3.0001713], + [2.16468583, 2.16468583, 0.0, 2.10138857, 0.0, 0.0], + [3.67141987, 5.4641654, 4.19511734, 3.04381859, 0.0, 3.0001713], + [3.67141987, 6.66431728, 5.77563348, 4.6236001, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [2.10138857, 2.95581821, 2.10138857, 2.10138857, 0.0, 0.0], + ], + ) np.testing.assert_almost_equal( slice_.zscores, [ @@ -204,6 +235,10 @@ def it_provides_values_for_cat_hs_x_mr(self): np.testing.assert_almost_equal( slice_.columns_std_err, load_python_expression("cat-hs-x-mr-col-stderr") ) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, + load_python_expression("cat-hs-x-mr-col-moe"), + ) np.testing.assert_almost_equal( slice_.zscores, load_python_expression("cat-hs-x-mr-zscores") ) @@ -241,6 +276,10 @@ def it_provides_values_for_mr_x_cat_hs(self): np.testing.assert_almost_equal( slice_.columns_std_err, load_python_expression("mr-x-cat-hs-col-stderr") ) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, + load_python_expression("mr-x-cat-hs-col-moe"), + ) np.testing.assert_almost_equal( slice_.pvals, load_python_expression("mr-x-cat-hs-pvals") ) diff --git a/tests/integration/test_headers_and_subtotals.py b/tests/integration/test_headers_and_subtotals.py index 49c53682b..ad3dd6abe 100644 --- a/tests/integration/test_headers_and_subtotals.py +++ b/tests/integration/test_headers_and_subtotals.py @@ -8,6 +8,7 @@ from cr.cube.cube import Cube from ..fixtures import CR +from ..util import load_python_expression class TestHeadersAndSubtotals(object): @@ -1007,125 +1008,17 @@ def test_col_labels_with_top_hs(self): def it_calculate_col_residuals_for_subtotals(self): slice_ = Cube(CR.CAT_X_CAT_HS_2ROWS_1COL).partitions[0] - np.testing.assert_almost_equal( slice_.columns_std_dev, - [ - [ - 0.49326036, - 0.43967108, - 0.43739495, - 0.4093598, - 0.42242603, - 0.41688475, - 0.47060217, - ], - [ - 0.35255854, - 0.47915742, - 0.47870319, - 0.46986171, - 0.3799671, - 0.42844691, - 0.4752359, - ], - [ - 0.44536177, - 0.48562091, - 0.48697607, - 0.49823831, - 0.49136926, - 0.49885606, - 0.46812184, - ], - [ - 0.22709084, - 0.4103259, - 0.41105414, - 0.39507899, - 0.32201514, - 0.35776034, - 0.37468029, - ], - [ - 0.4, - 0.49487166, - 0.49493871, - 0.49948985, - 0.45491071, - 0.48840757, - 0.49981735, - ], - [ - 0.41301152, - 0.372678, - 0.37676108, - 0.44107522, - 0.49937461, - 0.48614202, - 0.36229072, - ], - ], + load_python_expression("col-std-dev-cat-x-cat-hs-2rows-1col"), ) - np.testing.assert_almost_equal( slice_.columns_std_err, - [ - [ - 0.06651121, - 0.03916901, - 0.01766622, - 0.02325007, - 0.0211213, - 0.01564541, - 0.03868326, - ], - [ - 0.04753898, - 0.04268674, - 0.01933464, - 0.02668635, - 0.01899836, - 0.01607933, - 0.03906415, - ], - [ - 0.06005257, - 0.04326255, - 0.01966878, - 0.02829803, - 0.02456846, - 0.01872173, - 0.03847938, - ], - [ - 0.03062092, - 0.03655474, - 0.01660232, - 0.02243898, - 0.01610076, - 0.01342651, - 0.03079853, - ], - [ - 0.05393599, - 0.04408667, - 0.01999039, - 0.02836912, - 0.02274554, - 0.01832961, - 0.04108473, - ], - [ - 0.05569046, - 0.03320079, - 0.01521724, - 0.02505139, - 0.02496873, - 0.01824458, - 0.02978011, - ], - ], + load_python_expression("col-std-err-cat-x-cat-hs-2rows-1col"), + ) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, + load_python_expression("col-per-moe-cat-x-cat-hs-2rows-1col"), ) def it_computes_residuals_for_subtotals_1col_2rows(self): @@ -1980,6 +1873,16 @@ def it_calculates_residuals_for_columns_insertion(self): ], ) + # Test MoE for 1 column insertion + np.testing.assert_almost_equal( + slice_.table_percentages_moe, + [ + [17.21652881, 17.21652881, 0.0, 0.0, 21.77737778], + [10.58190352, 17.21652881, 14.51825185, 0.0, 19.20584194], + [0.0, 14.51825185, 10.58190352, 17.21652881, 14.51825185], + ], + ) + # Test col std dev np.testing.assert_almost_equal( slice_.columns_std_dev, @@ -2000,6 +1903,16 @@ def it_calculates_residuals_for_columns_insertion(self): ], ) + # Test col MoE + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, + [ + [42.43446536, 33.54739046, 0.0, 0.0, 28.28964358], + [42.43446536, 33.54739046, 53.34346349, 0.0, 26.67173175], + [0.0, 30.00569821, 53.34346349, 0.0, 21.08585536], + ], + ) + slice_ = Cube(CR.CA_X_CAT_HS).partitions[0] # Test zscores for 2 columns insertion bottom and interleaved @@ -2163,6 +2076,20 @@ def it_calculates_residuals_for_rows_insertion(self): ], ) + # Test MoE for 1 row insertion + np.testing.assert_almost_equal( + slice_.table_percentages_moe, + [ + [9.47425342, 8.51416179, 6.4011466, 0.0, 2.10138857], + [5.48548061, 6.91062699, 5.77563348, 0.0, 3.0001713], + [2.16468583, 0.0, 2.10138857, 0.0, 0.0], + [3.67141987, 4.19511734, 3.04381859, 0.0, 3.0001713], + [3.67141987, 5.77563348, 4.6236001, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [2.10138857, 2.10138857, 2.10138857, 0.0, 0.0], + ], + ) + # Test col std deviation for 1 row insertion np.testing.assert_almost_equal( slice_.columns_std_dev, @@ -2190,6 +2117,19 @@ def it_calculates_residuals_for_rows_insertion(self): [0.02675483, 0.0292589, 0.05070657, np.nan, 0.0], ], ) + # Test col MoE for 1 row insertion + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, + [ + [13.51426726, 16.59188199, 22.48817088, np.nan, 53.31123764], + [12.89084933, 16.3342716, 22.23457567, np.nan, 53.31123764], + [5.39900809, 0.0, 9.93830606, np.nan, 0.0], + [8.99374047, 11.08349041, 14.03617724, np.nan, 53.31123764], + [8.99374047, 14.52191695, 19.75240786, np.nan, 0.0], + [0.0, 0.0, 0.0, np.nan, 0.0], + [5.24385075, 5.73464003, 9.93830606, np.nan, 0.0], + ], + ) slice_ = Cube(CR.FOOD_GROUP_X_SHAPE_OF_PASTA_2ROWS_INSERTION).partitions[0] @@ -2497,6 +2437,15 @@ def it_calculates_residuals_for_cat_x_cat_with_missing_1_col_insertion(self): ], ) + # Test MoE for 1 column insertion at left + np.testing.assert_almost_equal( + slice_.table_percentages_moe, + [ + [2.74144167, 0.0, 0.0, 2.01473624, 2.34177324, 1.19627333, 1.47999058], + [2.78474723, 2.06247142, 2.41916703, 0.0, 0.0, 1.41377833, 1.4556357], + ], + ) + # Test col std dev for 1 column insertion at left np.testing.assert_almost_equal( slice_.columns_std_dev, @@ -2515,6 +2464,15 @@ def it_calculates_residuals_for_cat_x_cat_with_missing_1_col_insertion(self): ], ) + # Test MoE err for 1 column insertion at left + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, + [ + [3.30479837, 0.0, 0.0, 0.0, 0.0, 8.42914245, 7.58210469], + [3.30479837, 0.0, 0.0, 0.0, 0.0, 8.42914245, 7.58210469], + ], + ) + def it_calculates_residuals_for_cat_x_num_hs_pruned_with_3_rows_insertions(self): transforms = { "rows_dimension": {"prune": True}, diff --git a/tests/integration/test_multiple_response.py b/tests/integration/test_multiple_response.py index 52ef302e3..3af00237a 100644 --- a/tests/integration/test_multiple_response.py +++ b/tests/integration/test_multiple_response.py @@ -375,11 +375,62 @@ def test_various_measures_from_r_rows_margin(): ], ] + expected_col_percentages_moe = [ + [ + 1.1068377, + 1.1476323, + 1.5644486, + 2.2820312, + 2.5268353, + 1.1247901, + 5.2236999, + 4.1444169, + ], + [ + 1.1068377, + 1.1476323, + 1.5644486, + 2.2820312, + 2.5268353, + 1.1247901, + 5.2236999, + 4.1444169, + ], + ] + expected_table_percentages_moe = [ + [ + 0.36338981, + 0.36088727, + 0.27519563, + 0.19008035, + 0.17462327, + 0.34656327, + 0.08192326, + 0.10570314, + ], + [ + 0.38016461, + 0.36106843, + 0.27048043, + 0.19077377, + 0.17016363, + 0.38239139, + 0.08624406, + 0.10624822, + ], + ] + np.testing.assert_almost_equal(slice_.zscores, expected_zscores) np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, expected_col_percentages_moe + ) + np.testing.assert_almost_equal( + slice_.table_percentages_moe, expected_table_percentages_moe + ) def test_mr_x_single_wave(): @@ -441,6 +492,10 @@ def test_std_deviation_std_error_array_x_mr_by_row(): slice_.table_std_err, [[0.02978762, 0.00971635, 0.03292998], [0.02918338, 0.03472281, 0.02929588]], ) + np.testing.assert_array_almost_equal( + slice_.table_percentages_moe, + [[5.83826629, 1.90437053, 6.45415801], [5.71983772, 6.80554616, 5.74188756]], + ) np.testing.assert_array_almost_equal( slice_.columns_std_dev, [[0.49978635, 0.20331906, 0.49121125], [0.49978635, 0.20331906, 0.49121125]], @@ -449,6 +504,10 @@ def test_std_deviation_std_error_array_x_mr_by_row(): slice_.columns_std_err, [[0.05158518, 0.02113084, 0.04615627], [0.05158518, 0.02113084, 0.04615627]], ) + np.testing.assert_array_almost_equal( + slice_.columns_percentages_moe, + [[10.11050978, 4.14156918, 9.04646295], [10.11050978, 4.14156918, 9.04646295]], + ) def test_array_x_mr_by_cell(): @@ -493,6 +552,15 @@ def test_cat_x_mr_aug_zscores(): [0.00409039, 0.00545342, 0.00594324, 0.0055188, 0.00600108], ], ) + np.testing.assert_array_almost_equal( + slice_.table_percentages_moe, + [ + [1.08261432, 1.16171617, 1.27809263, 1.11256038, 1.48112581], + [1.09260745, 1.22848624, 1.50359016, 1.181889, 1.52217263], + [0.98049777, 1.24772811, 1.39572199, 1.20304775, 1.42889422], + [0.80170092, 1.06885106, 1.16485428, 1.08166431, 1.17618943], + ], + ) np.testing.assert_almost_equal( slice_.columns_std_dev, [ @@ -511,6 +579,15 @@ def test_cat_x_mr_aug_zscores(): [0.023515, 0.02167449, 0.01813225, 0.02305351, 0.01661564], ], ) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, + [ + [5.75714482, 4.52392763, 3.82664558, 4.61399677, 3.89836724], + [5.78910352, 4.70374716, 4.28140404, 4.81520916, 3.97036572], + [5.39112616, 4.75233018, 4.08096539, 4.87263958, 3.80094107], + [4.60885461, 4.24812161, 3.55385613, 4.51840483, 3.25660577], + ], + ) def test_cat_x_mr_and_cat_x_mr_augmented_various_measures(): @@ -523,12 +600,18 @@ def test_cat_x_mr_and_cat_x_mr_augmented_various_measures(): np.testing.assert_array_almost_equal(slice_.zscores, slice2_.zscores) np.testing.assert_array_almost_equal(slice_.table_std_dev, slice2_.table_std_dev) np.testing.assert_array_almost_equal(slice_.table_std_err, slice2_.table_std_err) + np.testing.assert_array_almost_equal( + slice_.table_percentages_moe, slice2_.table_percentages_moe + ) np.testing.assert_array_almost_equal( slice_.columns_std_dev, slice2_.columns_std_dev ) np.testing.assert_array_almost_equal( slice_.columns_std_err, slice2_.columns_std_err ) + np.testing.assert_array_almost_equal( + slice_.columns_percentages_moe, slice2_.columns_percentages_moe + ) assert slice_.shape == (4, 5) assert slice2_.shape == (4, 5)