From 105a55e0e02bedb558691dc48651d7ea7e7436e7 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 27 Oct 2020 16:58:17 +0100 Subject: [PATCH 1/6] black --- .pre-commit-config.yaml | 1 - src/cr/cube/cubepart.py | 34 ++++++++++++++++------------------ 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6363f0961..32801d4c2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,4 +11,3 @@ repos: rev: stable hooks: - id: black - language_version: python3.6 diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index 4263f3b71..5eb6e0c02 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -84,7 +84,7 @@ def dimension_types(self): return tuple(d.dimension_type for d in self._dimensions) def evaluate(self, measure_expr): - """ -> 1D/2D ndarray, values evaluated given the function specification + """-> 1D/2D ndarray, values evaluated given the function specification The `function_spec` contains the function to apply and its parameters, e.g.: ``` @@ -337,17 +337,15 @@ def inserted_row_idxs(self): @lazyproperty def insertions(self): """Returns masked array with residuals for insertions - - 0 1 2 3 4 5 6 - 0 inf inf inf inf inf -2.9 inf - 1 inf inf inf inf inf -4.3 inf - 2 2.5 1.3 3.3 -0.70 -7.25 -6.52 2.25 - 3 inf inf inf inf inf -2.51 inf - 4 -1.16 2.20 5.84 1.78 -8.48 -5.92 0.93 - 5 inf inf inf inf inf 9.70 inf - - Only the insertions residuals are showed in a inf masked array - """ + 0 1 2 3 4 5 6 + 0 inf inf inf inf inf -2.9 inf + 1 inf inf inf inf inf -4.3 inf + 2 2.5 1.3 3.3 -0.70 -7.25 -6.52 2.25 + 3 inf inf inf inf inf -2.51 inf + 4 -1.16 2.20 5.84 1.78 -8.48 -5.92 0.93 + 5 inf inf inf inf inf 9.70 inf + + Only the insertions residuals are showed in a inf masked array""" inserted_rows = self.inserted_row_idxs inserted_cols = self.inserted_column_idxs if not inserted_cols and not inserted_rows: @@ -601,7 +599,7 @@ def scale_means_rows_margin(self): @lazyproperty def scale_median_column(self): - """ -> np.int64 ndarray of the columns scale median + """-> np.int64 ndarray of the columns scale median The median is calculated using the standard algebra applied to the numeric values repeated for each related counts value @@ -621,7 +619,7 @@ def scale_median_column(self): @lazyproperty def scale_median_row(self): - """ -> np.int64 ndarray of the rows scale median + """-> np.int64 ndarray of the rows scale median The median is calculated using the standard algebra applied to the numeric values repeated for each related counts value @@ -801,7 +799,7 @@ def unweighted_counts(self): @lazyproperty def var_scale_means_column(self): - """ -> 1D np.ndarray of the column variance values for scales + """-> 1D np.ndarray of the column variance values for scales Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the mean of the data. @@ -822,7 +820,7 @@ def var_scale_means_column(self): @lazyproperty def var_scale_means_row(self): - """ -> 1D np.ndarray of the row variance values for scales + """-> 1D np.ndarray of the row variance values for scales Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the mean of the data. @@ -1039,7 +1037,7 @@ def scale_mean(self): @lazyproperty def scale_median(self): - """ -> np.int64, the median of scales + """-> np.int64, the median of scales The median is calculated using the standard algebra applied to the numeric values repeated for each related counts value @@ -1207,7 +1205,7 @@ def _numeric_values(self): @lazyproperty def _numeric_values_mask(self): - """ -> np.ndarray, boolean elements for each element in rows dimension." + """-> np.ndarray, boolean elements for each element in rows dimension." This array contains True or False according to the nan in the numeric_values array From c4c5f3b82cf5dba34277ce436dc6caba9e12ded4 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Wed, 28 Oct 2020 13:58:45 +0100 Subject: [PATCH 2/6] tests: Margin of Error for columns and table --- tests/expectations/cat-hs-x-mr-col-moe.py | 10 ++ tests/expectations/mr-x-cat-hs-col-moe.py | 52 ++++++++ tests/integration/test_cube.py | 14 +- tests/integration/test_cubepart.py | 37 ++++++ .../integration/test_headers_and_subtotals.py | 124 ++++++++++++++++++ tests/integration/test_multiple_response.py | 75 +++++++++++ 6 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 tests/expectations/cat-hs-x-mr-col-moe.py create mode 100644 tests/expectations/mr-x-cat-hs-col-moe.py diff --git a/tests/expectations/cat-hs-x-mr-col-moe.py b/tests/expectations/cat-hs-x-mr-col-moe.py new file mode 100644 index 000000000..6aa517349 --- /dev/null +++ b/tests/expectations/cat-hs-x-mr-col-moe.py @@ -0,0 +1,10 @@ +[ + [17.30181459, 7.76167031, 2.58919072, 1.96696835, 2.47551804], + [13.7464937, 8.90617586, 6.33244289, 3.30105103, 3.99757694], + [16.79140176, 10.72624566, 6.68369737, 3.71703613, 4.42265749], + [0.0, 0.0, 0.0, 0.0, 0.0], + [11.79621344, 11.12694011, 8.58251349, 4.91075221, 4.79812657], + [14.76383504, 11.06914477, 8.61521467, 5.11357472, 4.93988229], + [0.0, 0.0, 0.0, 0.0, 0.0], + [16.79140176, 10.72624566, 6.68369737, 3.71703613, 4.42265749], +] diff --git a/tests/expectations/mr-x-cat-hs-col-moe.py b/tests/expectations/mr-x-cat-hs-col-moe.py new file mode 100644 index 000000000..438842dfa --- /dev/null +++ b/tests/expectations/mr-x-cat-hs-col-moe.py @@ -0,0 +1,52 @@ +[ + [ + 20.15560409, + 13.30738328, + 12.78408718, + float("NaN"), + 6.55784933, + 7.97027717, + float("NaN"), + 5.21298079, + ], + [ + 24.45137595, + 14.1680129, + 12.66154543, + float("NaN"), + 10.44549053, + 10.28893289, + float("NaN"), + 7.33090439, + ], + [ + 23.63020102, + 15.291979, + 13.26439551, + float("NaN"), + 10.15732844, + 9.67359437, + float("NaN"), + 7.01221198, + ], + [ + 20.08846861, + 11.45121769, + 9.94950815, + float("NaN"), + 6.1292629, + 4.7740531, + float("NaN"), + 3.81368531, + ], + [ + 14.54617601, + 7.82024004, + 6.92340026, + float("NaN"), + 6.27830645, + 5.73799481, + float("NaN"), + 4.2383771, + ], +] diff --git a/tests/integration/test_cube.py b/tests/integration/test_cube.py index 8dd2b4d4e..a0d13ca8e 100644 --- a/tests/integration/test_cube.py +++ b/tests/integration/test_cube.py @@ -848,10 +848,15 @@ def test_calculate_various_measures_axis_0(self): [0.05880176, 0.03705843, 0.02576154, 0.03360238, 0.04526793, 0.07517074], [0.05880176, 0.03705843, 0.02576154, 0.03360238, 0.04526793, 0.07517074], ] + expected_col_moe = [ + [11.5249326, 7.2633194, 5.0491687, 6.5859452, 8.8723517, 14.7331947], + [11.5249326, 7.2633194, 5.0491687, 6.5859452, 8.8723517, 14.7331947], + ] np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev) np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) + np.testing.assert_almost_equal(slice_.columns_moe, expected_col_moe) np.testing.assert_almost_equal(slice_.zscores, expected_zscore) def test_pvals(self): @@ -987,15 +992,20 @@ def test_various_measures_admit_by_dept_unweighted_rows(self): [0.01567414, 0.01993363, 0.01575024, 0.01682826, 0.01795892, 0.00918798], [0.01567414, 0.01993363, 0.01575024, 0.01682826, 0.01795892, 0.00918798], ] + expected_col_moe = [ + [3.07207565, 3.90691882, 3.0869894, 3.29827837, 3.51988285, 1.80081013], + [3.07207565, 3.90691882, 3.0869894, 3.29827837, 3.51988285, 1.80081013], + ] np.testing.assert_almost_equal(slice_.zscores, expected_zscores) np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev) np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) + np.testing.assert_almost_equal(slice_.columns_moe, expected_col_moe) def test_various_measures_admit_by_gender_weighted_rows(self): - """ see + """see https://github.com/Crunch-io/whaam/blob/master/base/stats/tests/ zvalues-spec.js#L67 """ @@ -1010,12 +1020,14 @@ def test_various_measures_admit_by_gender_weighted_rows(self): expected_table_std_err = [[0.00659641, 0.00492018], [0.0070529, 0.00675348]] expected_col_std_dev = [[0.49668253, 0.45933735], [0.49668253, 0.45933735]] expected_col_std_err = [[0.00966009, 0.01080163], [0.00966009, 0.01080163]] + expected_col_moe = [[1.89334366, 2.11708092], [1.89334366, 2.11708092]] np.testing.assert_almost_equal(slice_.zscores, expected_zscores) np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev) np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) + np.testing.assert_almost_equal(slice_.columns_moe, expected_col_moe) def test_selected_crosstab_as_array(self): slice_ = Cube(CR.SELECTED_CROSSTAB_4).partitions[0] diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index 9d1657b77..4a69f5c58 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -117,6 +117,25 @@ def it_provides_values_for_cat_x_cat_pruning_hs(self): [0.02675483, 0.01976428, 0.0292589, 0.05070657, np.nan, 0.0], ], ) + np.testing.assert_almost_equal( + slice_.columns_moe, + [ + [ + 13.51426726, + 10.79256616, + 16.59188199, + 22.48817088, + np.nan, + 53.31123764, + ], + [12.89084933, 10.4849174, 16.3342716, 22.23457567, np.nan, 53.31123764], + [5.39900809, 2.84154374, 0.0, 9.93830606, np.nan, 0.0], + [8.99374047, 7.09061236, 11.08349041, 14.03617724, np.nan, 53.31123764], + [8.99374047, 8.57336265, 14.52191695, 19.75240786, np.nan, 0.0], + [0.0, 0.0, 0.0, 0.0, np.nan, 0.0], + [5.24385075, 3.87372796, 5.73464003, 9.93830606, np.nan, 0.0], + ], + ) assert slice_.dimension_types == (DT.CAT, DT.CAT) assert slice_.inserted_column_idxs == (1,) assert slice_.inserted_row_idxs == (1,) @@ -176,6 +195,18 @@ def it_provides_values_for_cat_x_cat_pruning_hs(self): [0.01072157, 0.01508098, 0.01072157, 0.01072157, 0.0, 0.0], ], ) + np.testing.assert_almost_equal( + slice_.table_moe, + [ + [9.47425342, 10.23031735, 8.51416179, 6.4011466, 0.0, 2.10138857], + [5.48548061, 8.3257033, 6.91062699, 5.77563348, 0.0, 3.0001713], + [2.16468583, 2.16468583, 0.0, 2.10138857, 0.0, 0.0], + [3.67141987, 5.4641654, 4.19511734, 3.04381859, 0.0, 3.0001713], + [3.67141987, 6.66431728, 5.77563348, 4.6236001, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [2.10138857, 2.95581821, 2.10138857, 2.10138857, 0.0, 0.0], + ], + ) np.testing.assert_almost_equal( slice_.zscores, [ @@ -204,6 +235,9 @@ def it_provides_values_for_cat_hs_x_mr(self): np.testing.assert_almost_equal( slice_.columns_std_err, load_python_expression("cat-hs-x-mr-col-stderr") ) + np.testing.assert_almost_equal( + slice_.columns_moe, load_python_expression("cat-hs-x-mr-col-moe") + ) np.testing.assert_almost_equal( slice_.zscores, load_python_expression("cat-hs-x-mr-zscores") ) @@ -241,6 +275,9 @@ def it_provides_values_for_mr_x_cat_hs(self): np.testing.assert_almost_equal( slice_.columns_std_err, load_python_expression("mr-x-cat-hs-col-stderr") ) + np.testing.assert_almost_equal( + slice_.columns_moe, load_python_expression("mr-x-cat-hs-col-moe") + ) np.testing.assert_almost_equal( slice_.pvals, load_python_expression("mr-x-cat-hs-pvals") ) diff --git a/tests/integration/test_headers_and_subtotals.py b/tests/integration/test_headers_and_subtotals.py index 49c53682b..9696c8c4d 100644 --- a/tests/integration/test_headers_and_subtotals.py +++ b/tests/integration/test_headers_and_subtotals.py @@ -1127,6 +1127,65 @@ def it_calculate_col_residuals_for_subtotals(self): ], ], ) + np.testing.assert_almost_equal( + slice_.columns_moe, + [ + [ + 13.03595844, + 7.67698551, + 3.46251469, + 4.55693081, + 4.13969905, + 3.06644326, + 7.58177966, + ], + [ + 9.31746956, + 8.36644659, + 3.78951977, + 5.23042895, + 3.72360922, + 3.15148999, + 7.65643283, + ], + [ + 11.77008734, + 8.47930382, + 3.85500973, + 5.5463129, + 4.8153303, + 3.66939254, + 7.5418196, + ], + [ + 6.0015905, + 7.16459682, + 3.25399504, + 4.39795907, + 3.1556904, + 2.63154691, + 6.03640099, + ], + [ + 10.57125967, + 8.64082889, + 3.91804373, + 5.56024488, + 4.45804303, + 3.59253748, + 8.05245981, + ], + [ + 10.91512996, + 6.50723624, + 2.9825236, + 4.90998204, + 4.89378128, + 3.57587294, + 5.83679508, + ], + ], + ) def it_computes_residuals_for_subtotals_1col_2rows(self): slice_ = Cube(CR.CAT_X_CAT_HS_2ROWS_1COL).partitions[0] @@ -1980,6 +2039,16 @@ def it_calculates_residuals_for_columns_insertion(self): ], ) + # Test MoE for 1 column insertion + np.testing.assert_almost_equal( + slice_.table_moe, + [ + [17.21652881, 17.21652881, 0.0, 0.0, 21.77737778], + [10.58190352, 17.21652881, 14.51825185, 0.0, 19.20584194], + [0.0, 14.51825185, 10.58190352, 17.21652881, 14.51825185], + ], + ) + # Test col std dev np.testing.assert_almost_equal( slice_.columns_std_dev, @@ -2000,6 +2069,16 @@ def it_calculates_residuals_for_columns_insertion(self): ], ) + # Test col MoE + np.testing.assert_almost_equal( + slice_.columns_moe, + [ + [42.43446536, 33.54739046, 0.0, 0.0, 28.28964358], + [42.43446536, 33.54739046, 53.34346349, 0.0, 26.67173175], + [0.0, 30.00569821, 53.34346349, 0.0, 21.08585536], + ], + ) + slice_ = Cube(CR.CA_X_CAT_HS).partitions[0] # Test zscores for 2 columns insertion bottom and interleaved @@ -2163,6 +2242,20 @@ def it_calculates_residuals_for_rows_insertion(self): ], ) + # Test MoE for 1 row insertion + np.testing.assert_almost_equal( + slice_.table_moe, + [ + [9.47425342, 8.51416179, 6.4011466, 0.0, 2.10138857], + [5.48548061, 6.91062699, 5.77563348, 0.0, 3.0001713], + [2.16468583, 0.0, 2.10138857, 0.0, 0.0], + [3.67141987, 4.19511734, 3.04381859, 0.0, 3.0001713], + [3.67141987, 5.77563348, 4.6236001, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [2.10138857, 2.10138857, 2.10138857, 0.0, 0.0], + ], + ) + # Test col std deviation for 1 row insertion np.testing.assert_almost_equal( slice_.columns_std_dev, @@ -2190,6 +2283,19 @@ def it_calculates_residuals_for_rows_insertion(self): [0.02675483, 0.0292589, 0.05070657, np.nan, 0.0], ], ) + # Test col MoE for 1 row insertion + np.testing.assert_almost_equal( + slice_.columns_moe, + [ + [13.51426726, 16.59188199, 22.48817088, np.nan, 53.31123764], + [12.89084933, 16.3342716, 22.23457567, np.nan, 53.31123764], + [5.39900809, 0.0, 9.93830606, np.nan, 0.0], + [8.99374047, 11.08349041, 14.03617724, np.nan, 53.31123764], + [8.99374047, 14.52191695, 19.75240786, np.nan, 0.0], + [0.0, 0.0, 0.0, np.nan, 0.0], + [5.24385075, 5.73464003, 9.93830606, np.nan, 0.0], + ], + ) slice_ = Cube(CR.FOOD_GROUP_X_SHAPE_OF_PASTA_2ROWS_INSERTION).partitions[0] @@ -2497,6 +2603,15 @@ def it_calculates_residuals_for_cat_x_cat_with_missing_1_col_insertion(self): ], ) + # Test MoE for 1 column insertion at left + np.testing.assert_almost_equal( + slice_.table_moe, + [ + [2.74144167, 0.0, 0.0, 2.01473624, 2.34177324, 1.19627333, 1.47999058], + [2.78474723, 2.06247142, 2.41916703, 0.0, 0.0, 1.41377833, 1.4556357], + ], + ) + # Test col std dev for 1 column insertion at left np.testing.assert_almost_equal( slice_.columns_std_dev, @@ -2515,6 +2630,15 @@ def it_calculates_residuals_for_cat_x_cat_with_missing_1_col_insertion(self): ], ) + # Test MoE err for 1 column insertion at left + np.testing.assert_almost_equal( + slice_.columns_moe, + [ + [3.30479837, 0.0, 0.0, 0.0, 0.0, 8.42914245, 7.58210469], + [3.30479837, 0.0, 0.0, 0.0, 0.0, 8.42914245, 7.58210469], + ], + ) + def it_calculates_residuals_for_cat_x_num_hs_pruned_with_3_rows_insertions(self): transforms = { "rows_dimension": {"prune": True}, diff --git a/tests/integration/test_multiple_response.py b/tests/integration/test_multiple_response.py index 52ef302e3..ca49e3c5b 100644 --- a/tests/integration/test_multiple_response.py +++ b/tests/integration/test_multiple_response.py @@ -375,11 +375,58 @@ def test_various_measures_from_r_rows_margin(): ], ] + expected_col_moe = [ + [ + 1.1068377, + 1.1476323, + 1.5644486, + 2.2820312, + 2.5268353, + 1.1247901, + 5.2236999, + 4.1444169, + ], + [ + 1.1068377, + 1.1476323, + 1.5644486, + 2.2820312, + 2.5268353, + 1.1247901, + 5.2236999, + 4.1444169, + ], + ] + expected_table_moe = [ + [ + 0.36338981, + 0.36088727, + 0.27519563, + 0.19008035, + 0.17462327, + 0.34656327, + 0.08192326, + 0.10570314, + ], + [ + 0.38016461, + 0.36106843, + 0.27048043, + 0.19077377, + 0.17016363, + 0.38239139, + 0.08624406, + 0.10624822, + ], + ] + np.testing.assert_almost_equal(slice_.zscores, expected_zscores) np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) + np.testing.assert_almost_equal(slice_.columns_moe, expected_col_moe) + np.testing.assert_almost_equal(slice_.table_moe, expected_table_moe) def test_mr_x_single_wave(): @@ -441,6 +488,10 @@ def test_std_deviation_std_error_array_x_mr_by_row(): slice_.table_std_err, [[0.02978762, 0.00971635, 0.03292998], [0.02918338, 0.03472281, 0.02929588]], ) + np.testing.assert_array_almost_equal( + slice_.table_moe, + [[5.83826629, 1.90437053, 6.45415801], [5.71983772, 6.80554616, 5.74188756]], + ) np.testing.assert_array_almost_equal( slice_.columns_std_dev, [[0.49978635, 0.20331906, 0.49121125], [0.49978635, 0.20331906, 0.49121125]], @@ -449,6 +500,10 @@ def test_std_deviation_std_error_array_x_mr_by_row(): slice_.columns_std_err, [[0.05158518, 0.02113084, 0.04615627], [0.05158518, 0.02113084, 0.04615627]], ) + np.testing.assert_array_almost_equal( + slice_.columns_moe, + [[10.11050978, 4.14156918, 9.04646295], [10.11050978, 4.14156918, 9.04646295]], + ) def test_array_x_mr_by_cell(): @@ -493,6 +548,15 @@ def test_cat_x_mr_aug_zscores(): [0.00409039, 0.00545342, 0.00594324, 0.0055188, 0.00600108], ], ) + np.testing.assert_array_almost_equal( + slice_.table_moe, + [ + [1.08261432, 1.16171617, 1.27809263, 1.11256038, 1.48112581], + [1.09260745, 1.22848624, 1.50359016, 1.181889, 1.52217263], + [0.98049777, 1.24772811, 1.39572199, 1.20304775, 1.42889422], + [0.80170092, 1.06885106, 1.16485428, 1.08166431, 1.17618943], + ], + ) np.testing.assert_almost_equal( slice_.columns_std_dev, [ @@ -511,6 +575,15 @@ def test_cat_x_mr_aug_zscores(): [0.023515, 0.02167449, 0.01813225, 0.02305351, 0.01661564], ], ) + np.testing.assert_almost_equal( + slice_.columns_moe, + [ + [5.75714482, 4.52392763, 3.82664558, 4.61399677, 3.89836724], + [5.78910352, 4.70374716, 4.28140404, 4.81520916, 3.97036572], + [5.39112616, 4.75233018, 4.08096539, 4.87263958, 3.80094107], + [4.60885461, 4.24812161, 3.55385613, 4.51840483, 3.25660577], + ], + ) def test_cat_x_mr_and_cat_x_mr_augmented_various_measures(): @@ -523,12 +596,14 @@ def test_cat_x_mr_and_cat_x_mr_augmented_various_measures(): np.testing.assert_array_almost_equal(slice_.zscores, slice2_.zscores) np.testing.assert_array_almost_equal(slice_.table_std_dev, slice2_.table_std_dev) np.testing.assert_array_almost_equal(slice_.table_std_err, slice2_.table_std_err) + np.testing.assert_array_almost_equal(slice_.table_moe, slice2_.table_moe) np.testing.assert_array_almost_equal( slice_.columns_std_dev, slice2_.columns_std_dev ) np.testing.assert_array_almost_equal( slice_.columns_std_err, slice2_.columns_std_err ) + np.testing.assert_array_almost_equal(slice_.columns_moe, slice2_.columns_moe) assert slice_.shape == (4, 5) assert slice2_.shape == (4, 5) From dc4327ed7b2895ca658d54f3fcd1d26dc83c5ea1 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Wed, 28 Oct 2020 14:03:27 +0100 Subject: [PATCH 3/6] feat: implement MoE --- src/cr/cube/cubepart.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index 5eb6e0c02..5dc7c57ca 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -239,6 +239,8 @@ class _Slice(CubePartition): dimensions which can be crosstabbed in a slice. """ + z_alpha = 1.959964 + def __init__(self, cube, slice_idx, transforms, population, mask_size): super(_Slice, self).__init__(cube, transforms) self._slice_idx = slice_idx @@ -291,6 +293,13 @@ def columns_dimension_type(self): def columns_margin(self): return np.array([column.margin for column in self._matrix.columns]).T + @lazyproperty + def columns_moe(self): + """Returns the margin of error (MoE) for cell percentages + `moe = z(0.25) * 100 * std_error` + """ + return self.z_alpha * 100 * self.columns_std_err + @lazyproperty def columns_std_dev(self): """Returns the standard deviation for cell percentages @@ -762,6 +771,10 @@ def table_margin(self): def table_margin_unpruned(self): return self._matrix.table_margin_unpruned + @lazyproperty + def table_moe(self): + return self.z_alpha * 100 * self.table_std_err + @lazyproperty def table_name(self): """Provides differentiated name for each stacked table of a 3D cube.""" From 71fdbc35d0a320e3fbf67106f4c59ee1eac3ac23 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Wed, 28 Oct 2020 16:55:46 +0100 Subject: [PATCH 4/6] Comments from Ernesto and Mike --- src/cr/cube/cubepart.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index 5dc7c57ca..aa9318480 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -239,7 +239,9 @@ class _Slice(CubePartition): dimensions which can be crosstabbed in a slice. """ - z_alpha = 1.959964 + # quantile of the normal cdf at .975 because the confidence + # interval is ± (.025 on each side) + Z_975 = 1.959964 def __init__(self, cube, slice_idx, transforms, population, mask_size): super(_Slice, self).__init__(cube, transforms) @@ -295,21 +297,21 @@ def columns_margin(self): @lazyproperty def columns_moe(self): - """Returns the margin of error (MoE) for cell percentages - `moe = z(0.25) * 100 * std_error` + """Returns the margin of error (MoE) for col percentages + `moe = Z_975 * 100 * std_error` (the * 100 part accounts for percentages) """ - return self.z_alpha * 100 * self.columns_std_err + return self.Z_975 * 100 * self.columns_std_err @lazyproperty def columns_std_dev(self): - """Returns the standard deviation for cell percentages + """Returns the standard deviation for col percentages `std_deviation = sqrt(variance)` """ return np.sqrt(self._columns_variance) @lazyproperty def columns_std_err(self): - """Returns the standard error for cell percentages + """Returns the standard error for col percentages `std_error = sqrt(variance/N)` """ return np.sqrt(self._columns_variance / self.columns_margin) @@ -648,7 +650,7 @@ def scale_median_row(self): @lazyproperty def scale_median_column_margin(self): - """ -> np.int64, represents the column scale median margin""" + """ -> np.int64 represents the column scale median margin""" if np.all(np.isnan(self._columns_dimension_numeric_values)): return None columns_margin = self.columns_margin @@ -664,7 +666,7 @@ def scale_median_column_margin(self): @lazyproperty def scale_median_row_margin(self): - """ -> np.int64, represents the rows scale median margin""" + """ -> np.int64 represents the rows scale median margin""" if np.all(np.isnan(self._rows_dimension_numeric_values)): return None rows_margin = self.rows_margin @@ -773,7 +775,10 @@ def table_margin_unpruned(self): @lazyproperty def table_moe(self): - return self.z_alpha * 100 * self.table_std_err + """Returns the margin of error (MoE) for table percentages + `moe = Z_975 * 100 * std_error` (the * 100 part accounts for percentages) + """ + return self.Z_975 * 100 * self.table_std_err @lazyproperty def table_name(self): @@ -873,7 +878,7 @@ def _columns_dimension_numeric_values(self): @lazyproperty def _columns_variance(self): - """Returns the variance for cell percentages + """Returns the variance for col percentages `variance = p * (1-p)` """ return ( @@ -1050,7 +1055,7 @@ def scale_mean(self): @lazyproperty def scale_median(self): - """-> np.int64, the median of scales + """-> np.int64 the median of scales The median is calculated using the standard algebra applied to the numeric values repeated for each related counts value @@ -1096,12 +1101,12 @@ def smoothed_dimension_dict(self): @lazyproperty def standard_deviation(self): - """ -> np.ndarray, percentages standard deviation""" + """ -> np.ndarray percentages standard deviation""" return np.sqrt(self._variance) @lazyproperty def standard_error(self): - """ -> np.ndarray, percentages standard error""" + """ -> np.ndarray percentages standard error""" if self.dimension_types[0] == DT.MR: return np.sqrt(self._variance / self.bases) return np.sqrt(self._variance / np.sum(self.rows_margin)) @@ -1218,7 +1223,7 @@ def _numeric_values(self): @lazyproperty def _numeric_values_mask(self): - """-> np.ndarray, boolean elements for each element in rows dimension." + """-> np.ndarray boolean elements for each element in rows dimension." This array contains True or False according to the nan in the numeric_values array From 4d10e2465db81eb055b23632c931a8591fd6b52a Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 29 Oct 2020 20:08:32 +0100 Subject: [PATCH 5/6] cr: address steve's comments --- src/cr/cube/cubepart.py | 71 ++++--- .../col-per-moe-cat-x-cat-hs-2rows-1col.py | 56 ++++++ .../col-std-dev-cat-x-cat-hs-2rows-1col.py | 56 ++++++ .../col-std-err-cat-x-cat-hs-2rows-1col.py | 56 ++++++ tests/integration/test_cube.py | 21 +- tests/integration/test_cubepart.py | 10 +- .../integration/test_headers_and_subtotals.py | 188 +----------------- tests/integration/test_multiple_response.py | 28 ++- 8 files changed, 259 insertions(+), 227 deletions(-) create mode 100644 tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py create mode 100644 tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py create mode 100644 tests/expectations/col-std-err-cat-x-cat-hs-2rows-1col.py diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index aa9318480..407af6b54 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -84,7 +84,7 @@ def dimension_types(self): return tuple(d.dimension_type for d in self._dimensions) def evaluate(self, measure_expr): - """-> 1D/2D ndarray, values evaluated given the function specification + """Returns 1D/2D ndarray, values evaluated given the function specification The `function_spec` contains the function to apply and its parameters, e.g.: ``` @@ -239,8 +239,9 @@ class _Slice(CubePartition): dimensions which can be crosstabbed in a slice. """ - # quantile of the normal cdf at .975 because the confidence - # interval is ± (.025 on each side) + # ---This is the quantile of the normal Cumulative Distribution Function (CDF) at + # ---probability 97.5% (p=.975), since the computed confidence interval + # ---is ±2.5% (.025) on each side of the CDF. Z_975 = 1.959964 def __init__(self, cube, slice_idx, transforms, population, mask_size): @@ -296,22 +297,26 @@ def columns_margin(self): return np.array([column.margin for column in self._matrix.columns]).T @lazyproperty - def columns_moe(self): - """Returns the margin of error (MoE) for col percentages - `moe = Z_975 * 100 * std_error` (the * 100 part accounts for percentages) + def columns_percentages_moe(self): + """1D/2D np.float64 ndarray of margin-of-error (MoE) for columns percentages. + + The values are represented as percentages, analogue to the `table_percentages` + property. This means that the value of 3.5% will have the value 3.5 (not 0.035). + The values can be np.nan when the corresponding percentage is also np.nan, which + happens when the respective columns margin is 0. """ return self.Z_975 * 100 * self.columns_std_err @lazyproperty def columns_std_dev(self): - """Returns the standard deviation for col percentages + """Returns the standard deviation for column percentages `std_deviation = sqrt(variance)` """ return np.sqrt(self._columns_variance) @lazyproperty def columns_std_err(self): - """Returns the standard error for col percentages + """Returns the standard error for column percentages `std_error = sqrt(variance/N)` """ return np.sqrt(self._columns_variance / self.columns_margin) @@ -347,7 +352,8 @@ def inserted_row_idxs(self): @lazyproperty def insertions(self): - """Returns masked array with residuals for insertions + """2D np.float64 np.ma.core.MaskedArray of residuals for insertions. + 0 1 2 3 4 5 6 0 inf inf inf inf inf -2.9 inf 1 inf inf inf inf inf -4.3 inf @@ -356,7 +362,8 @@ def insertions(self): 4 -1.16 2.20 5.84 1.78 -8.48 -5.92 0.93 5 inf inf inf inf inf 9.70 inf - Only the insertions residuals are showed in a inf masked array""" + Only the insertions residuals are showed in a inf masked array. + """ inserted_rows = self.inserted_row_idxs inserted_cols = self.inserted_column_idxs if not inserted_cols and not inserted_rows: @@ -610,7 +617,7 @@ def scale_means_rows_margin(self): @lazyproperty def scale_median_column(self): - """-> np.int64 ndarray of the columns scale median + """np.int64 ndarray of the columns scale median The median is calculated using the standard algebra applied to the numeric values repeated for each related counts value @@ -630,7 +637,7 @@ def scale_median_column(self): @lazyproperty def scale_median_row(self): - """-> np.int64 ndarray of the rows scale median + """np.int64 ndarray of the rows scale median The median is calculated using the standard algebra applied to the numeric values repeated for each related counts value @@ -650,7 +657,7 @@ def scale_median_row(self): @lazyproperty def scale_median_column_margin(self): - """ -> np.int64 represents the column scale median margin""" + """np.int64 represents the column scale median margin""" if np.all(np.isnan(self._columns_dimension_numeric_values)): return None columns_margin = self.columns_margin @@ -666,7 +673,7 @@ def scale_median_column_margin(self): @lazyproperty def scale_median_row_margin(self): - """ -> np.int64 represents the rows scale median margin""" + """np.int64 represents the rows scale median margin""" if np.all(np.isnan(self._rows_dimension_numeric_values)): return None rows_margin = self.rows_margin @@ -682,28 +689,28 @@ def scale_median_row_margin(self): @lazyproperty def scale_std_dev_column(self): - """ -> 1D np.ndarray of the standard deviation column of scales""" + """1D np.ndarray of the standard deviation column of scales""" if np.all(np.isnan(self._columns_dimension_numeric_values)): return None return np.sqrt(self.var_scale_means_column) @lazyproperty def scale_std_dev_row(self): - """ -> 1D np.ndarray of the standard deviation row of scales""" + """1D np.ndarray of the standard deviation row of scales""" if np.all(np.isnan(self._rows_dimension_numeric_values)): return None return np.sqrt(self.var_scale_means_row) @lazyproperty def scale_std_err_column(self): - """ -> 1D np.ndarray of the standard error column of scales""" + """1D np.ndarray of the standard error column of scales""" if np.all(np.isnan(self._columns_dimension_numeric_values)): return None return self.scale_std_dev_column / np.sqrt(self.rows_margin) @lazyproperty def scale_std_err_row(self): - """ -> 1D np.ndarray of the standard error row of scales""" + """1D np.ndarray of the standard error row of scales""" if np.all(np.isnan(self._rows_dimension_numeric_values)): return None return self.scale_std_dev_row / np.sqrt(self.columns_margin) @@ -774,9 +781,13 @@ def table_margin_unpruned(self): return self._matrix.table_margin_unpruned @lazyproperty - def table_moe(self): - """Returns the margin of error (MoE) for table percentages - `moe = Z_975 * 100 * std_error` (the * 100 part accounts for percentages) + def table_percentages_moe(self): + """1D/2D np.float64 ndarray of margin-of-error (MoE) for table percentages. + + The values are represented as percentages, analogue to the `table_percentages` + property. This means that the value of 3.5% will have the value 3.5 (not 0.035). + The values can be np.nan when the corresponding percentage is also np.nan, which + happens when the respective table margin is 0. """ return self.Z_975 * 100 * self.table_std_err @@ -817,7 +828,7 @@ def unweighted_counts(self): @lazyproperty def var_scale_means_column(self): - """-> 1D np.ndarray of the column variance values for scales + """1D np.ndarray of the column variance values for scales Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the mean of the data. @@ -838,7 +849,7 @@ def var_scale_means_column(self): @lazyproperty def var_scale_means_row(self): - """-> 1D np.ndarray of the row variance values for scales + """1D np.ndarray of the row variance values for scales Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the mean of the data. @@ -878,7 +889,7 @@ def _columns_dimension_numeric_values(self): @lazyproperty def _columns_variance(self): - """Returns the variance for col percentages + """Returns the variance for column percentages `variance = p * (1-p)` """ return ( @@ -1055,7 +1066,7 @@ def scale_mean(self): @lazyproperty def scale_median(self): - """-> np.int64 the median of scales + """np.int64 the median of scales The median is calculated using the standard algebra applied to the numeric values repeated for each related counts value @@ -1069,14 +1080,14 @@ def scale_median(self): @lazyproperty def scale_std_dev(self): - """ -> np.float64, the standard deviation of scales""" + """np.float64, the standard deviation of scales""" if np.all(np.isnan(self._numeric_values)): return None return np.sqrt(self.var_scale_mean) @lazyproperty def scale_std_err(self): - """ -> np.float64, the standard error of scales""" + """np.float64, the standard error of scales""" if np.all(np.isnan(self._numeric_values)): return None counts = self._counts_as_array[self._numeric_values_mask] @@ -1101,12 +1112,12 @@ def smoothed_dimension_dict(self): @lazyproperty def standard_deviation(self): - """ -> np.ndarray percentages standard deviation""" + """np.ndarray percentages standard deviation""" return np.sqrt(self._variance) @lazyproperty def standard_error(self): - """ -> np.ndarray percentages standard error""" + """np.ndarray percentages standard error""" if self.dimension_types[0] == DT.MR: return np.sqrt(self._variance / self.bases) return np.sqrt(self._variance / np.sum(self.rows_margin)) @@ -1223,7 +1234,7 @@ def _numeric_values(self): @lazyproperty def _numeric_values_mask(self): - """-> np.ndarray boolean elements for each element in rows dimension." + """np.ndarray boolean elements for each element in rows dimension." This array contains True or False according to the nan in the numeric_values array diff --git a/tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py b/tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py new file mode 100644 index 000000000..311aa16fd --- /dev/null +++ b/tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py @@ -0,0 +1,56 @@ +[ + [ + 13.03595844, + 7.67698551, + 3.46251469, + 4.55693081, + 4.13969905, + 3.06644326, + 7.58177966, + ], + [ + 9.31746956, + 8.36644659, + 3.78951977, + 5.23042895, + 3.72360922, + 3.15148999, + 7.65643283, + ], + [ + 11.77008734, + 8.47930382, + 3.85500973, + 5.5463129, + 4.8153303, + 3.66939254, + 7.5418196, + ], + [ + 6.0015905, + 7.16459682, + 3.25399504, + 4.39795907, + 3.1556904, + 2.63154691, + 6.03640099, + ], + [ + 10.57125967, + 8.64082889, + 3.91804373, + 5.56024488, + 4.45804303, + 3.59253748, + 8.05245981, + ], + [ + 10.91512996, + 6.50723624, + 2.9825236, + 4.90998204, + 4.89378128, + 3.57587294, + 5.83679508, + ], +] diff --git a/tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py b/tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py new file mode 100644 index 000000000..8255c3e38 --- /dev/null +++ b/tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py @@ -0,0 +1,56 @@ +[ + [ + 0.49326036, + 0.43967108, + 0.43739495, + 0.4093598, + 0.42242603, + 0.41688475, + 0.47060217, + ], + [ + 0.35255854, + 0.47915742, + 0.47870319, + 0.46986171, + 0.3799671, + 0.42844691, + 0.4752359, + ], + [ + 0.44536177, + 0.48562091, + 0.48697607, + 0.49823831, + 0.49136926, + 0.49885606, + 0.46812184, + ], + [ + 0.22709084, + 0.4103259, + 0.41105414, + 0.39507899, + 0.32201514, + 0.35776034, + 0.37468029, + ], + [ + 0.4, + 0.49487166, + 0.49493871, + 0.49948985, + 0.45491071, + 0.48840757, + 0.49981735, + ], + [ + 0.41301152, + 0.372678, + 0.37676108, + 0.44107522, + 0.49937461, + 0.48614202, + 0.36229072, + ], +] diff --git a/tests/expectations/col-std-err-cat-x-cat-hs-2rows-1col.py b/tests/expectations/col-std-err-cat-x-cat-hs-2rows-1col.py new file mode 100644 index 000000000..0b62f7092 --- /dev/null +++ b/tests/expectations/col-std-err-cat-x-cat-hs-2rows-1col.py @@ -0,0 +1,56 @@ +[ + [ + 0.06651121, + 0.03916901, + 0.01766622, + 0.02325007, + 0.0211213, + 0.01564541, + 0.03868326, + ], + [ + 0.04753898, + 0.04268674, + 0.01933464, + 0.02668635, + 0.01899836, + 0.01607933, + 0.03906415, + ], + [ + 0.06005257, + 0.04326255, + 0.01966878, + 0.02829803, + 0.02456846, + 0.01872173, + 0.03847938, + ], + [ + 0.03062092, + 0.03655474, + 0.01660232, + 0.02243898, + 0.01610076, + 0.01342651, + 0.03079853, + ], + [ + 0.05393599, + 0.04408667, + 0.01999039, + 0.02836912, + 0.02274554, + 0.01832961, + 0.04108473, + ], + [ + 0.05569046, + 0.03320079, + 0.01521724, + 0.02505139, + 0.02496873, + 0.01824458, + 0.02978011, + ], +] diff --git a/tests/integration/test_cube.py b/tests/integration/test_cube.py index a0d13ca8e..5ec72bdb9 100644 --- a/tests/integration/test_cube.py +++ b/tests/integration/test_cube.py @@ -848,7 +848,7 @@ def test_calculate_various_measures_axis_0(self): [0.05880176, 0.03705843, 0.02576154, 0.03360238, 0.04526793, 0.07517074], [0.05880176, 0.03705843, 0.02576154, 0.03360238, 0.04526793, 0.07517074], ] - expected_col_moe = [ + expected_col_percentages_moe = [ [11.5249326, 7.2633194, 5.0491687, 6.5859452, 8.8723517, 14.7331947], [11.5249326, 7.2633194, 5.0491687, 6.5859452, 8.8723517, 14.7331947], ] @@ -856,7 +856,9 @@ def test_calculate_various_measures_axis_0(self): np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) - np.testing.assert_almost_equal(slice_.columns_moe, expected_col_moe) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, expected_col_percentages_moe + ) np.testing.assert_almost_equal(slice_.zscores, expected_zscore) def test_pvals(self): @@ -992,7 +994,7 @@ def test_various_measures_admit_by_dept_unweighted_rows(self): [0.01567414, 0.01993363, 0.01575024, 0.01682826, 0.01795892, 0.00918798], [0.01567414, 0.01993363, 0.01575024, 0.01682826, 0.01795892, 0.00918798], ] - expected_col_moe = [ + expected_col_percentages_moe = [ [3.07207565, 3.90691882, 3.0869894, 3.29827837, 3.51988285, 1.80081013], [3.07207565, 3.90691882, 3.0869894, 3.29827837, 3.51988285, 1.80081013], ] @@ -1002,7 +1004,9 @@ def test_various_measures_admit_by_dept_unweighted_rows(self): np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) - np.testing.assert_almost_equal(slice_.columns_moe, expected_col_moe) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, expected_col_percentages_moe + ) def test_various_measures_admit_by_gender_weighted_rows(self): """see @@ -1020,14 +1024,19 @@ def test_various_measures_admit_by_gender_weighted_rows(self): expected_table_std_err = [[0.00659641, 0.00492018], [0.0070529, 0.00675348]] expected_col_std_dev = [[0.49668253, 0.45933735], [0.49668253, 0.45933735]] expected_col_std_err = [[0.00966009, 0.01080163], [0.00966009, 0.01080163]] - expected_col_moe = [[1.89334366, 2.11708092], [1.89334366, 2.11708092]] + expected_col_percentages_moe = [ + [1.89334366, 2.11708092], + [1.89334366, 2.11708092], + ] np.testing.assert_almost_equal(slice_.zscores, expected_zscores) np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev) np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) - np.testing.assert_almost_equal(slice_.columns_moe, expected_col_moe) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, expected_col_percentages_moe + ) def test_selected_crosstab_as_array(self): slice_ = Cube(CR.SELECTED_CROSSTAB_4).partitions[0] diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index 4a69f5c58..1d932e3c6 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -118,7 +118,7 @@ def it_provides_values_for_cat_x_cat_pruning_hs(self): ], ) np.testing.assert_almost_equal( - slice_.columns_moe, + slice_.columns_percentages_moe, [ [ 13.51426726, @@ -196,7 +196,7 @@ def it_provides_values_for_cat_x_cat_pruning_hs(self): ], ) np.testing.assert_almost_equal( - slice_.table_moe, + slice_.table_percentages_moe, [ [9.47425342, 10.23031735, 8.51416179, 6.4011466, 0.0, 2.10138857], [5.48548061, 8.3257033, 6.91062699, 5.77563348, 0.0, 3.0001713], @@ -236,7 +236,8 @@ def it_provides_values_for_cat_hs_x_mr(self): slice_.columns_std_err, load_python_expression("cat-hs-x-mr-col-stderr") ) np.testing.assert_almost_equal( - slice_.columns_moe, load_python_expression("cat-hs-x-mr-col-moe") + slice_.columns_percentages_moe, + load_python_expression("cat-hs-x-mr-col-moe"), ) np.testing.assert_almost_equal( slice_.zscores, load_python_expression("cat-hs-x-mr-zscores") @@ -276,7 +277,8 @@ def it_provides_values_for_mr_x_cat_hs(self): slice_.columns_std_err, load_python_expression("mr-x-cat-hs-col-stderr") ) np.testing.assert_almost_equal( - slice_.columns_moe, load_python_expression("mr-x-cat-hs-col-moe") + slice_.columns_percentages_moe, + load_python_expression("mr-x-cat-hs-col-moe"), ) np.testing.assert_almost_equal( slice_.pvals, load_python_expression("mr-x-cat-hs-pvals") diff --git a/tests/integration/test_headers_and_subtotals.py b/tests/integration/test_headers_and_subtotals.py index 9696c8c4d..ad3dd6abe 100644 --- a/tests/integration/test_headers_and_subtotals.py +++ b/tests/integration/test_headers_and_subtotals.py @@ -8,6 +8,7 @@ from cr.cube.cube import Cube from ..fixtures import CR +from ..util import load_python_expression class TestHeadersAndSubtotals(object): @@ -1007,184 +1008,17 @@ def test_col_labels_with_top_hs(self): def it_calculate_col_residuals_for_subtotals(self): slice_ = Cube(CR.CAT_X_CAT_HS_2ROWS_1COL).partitions[0] - np.testing.assert_almost_equal( slice_.columns_std_dev, - [ - [ - 0.49326036, - 0.43967108, - 0.43739495, - 0.4093598, - 0.42242603, - 0.41688475, - 0.47060217, - ], - [ - 0.35255854, - 0.47915742, - 0.47870319, - 0.46986171, - 0.3799671, - 0.42844691, - 0.4752359, - ], - [ - 0.44536177, - 0.48562091, - 0.48697607, - 0.49823831, - 0.49136926, - 0.49885606, - 0.46812184, - ], - [ - 0.22709084, - 0.4103259, - 0.41105414, - 0.39507899, - 0.32201514, - 0.35776034, - 0.37468029, - ], - [ - 0.4, - 0.49487166, - 0.49493871, - 0.49948985, - 0.45491071, - 0.48840757, - 0.49981735, - ], - [ - 0.41301152, - 0.372678, - 0.37676108, - 0.44107522, - 0.49937461, - 0.48614202, - 0.36229072, - ], - ], + load_python_expression("col-std-dev-cat-x-cat-hs-2rows-1col"), ) - np.testing.assert_almost_equal( slice_.columns_std_err, - [ - [ - 0.06651121, - 0.03916901, - 0.01766622, - 0.02325007, - 0.0211213, - 0.01564541, - 0.03868326, - ], - [ - 0.04753898, - 0.04268674, - 0.01933464, - 0.02668635, - 0.01899836, - 0.01607933, - 0.03906415, - ], - [ - 0.06005257, - 0.04326255, - 0.01966878, - 0.02829803, - 0.02456846, - 0.01872173, - 0.03847938, - ], - [ - 0.03062092, - 0.03655474, - 0.01660232, - 0.02243898, - 0.01610076, - 0.01342651, - 0.03079853, - ], - [ - 0.05393599, - 0.04408667, - 0.01999039, - 0.02836912, - 0.02274554, - 0.01832961, - 0.04108473, - ], - [ - 0.05569046, - 0.03320079, - 0.01521724, - 0.02505139, - 0.02496873, - 0.01824458, - 0.02978011, - ], - ], + load_python_expression("col-std-err-cat-x-cat-hs-2rows-1col"), ) np.testing.assert_almost_equal( - slice_.columns_moe, - [ - [ - 13.03595844, - 7.67698551, - 3.46251469, - 4.55693081, - 4.13969905, - 3.06644326, - 7.58177966, - ], - [ - 9.31746956, - 8.36644659, - 3.78951977, - 5.23042895, - 3.72360922, - 3.15148999, - 7.65643283, - ], - [ - 11.77008734, - 8.47930382, - 3.85500973, - 5.5463129, - 4.8153303, - 3.66939254, - 7.5418196, - ], - [ - 6.0015905, - 7.16459682, - 3.25399504, - 4.39795907, - 3.1556904, - 2.63154691, - 6.03640099, - ], - [ - 10.57125967, - 8.64082889, - 3.91804373, - 5.56024488, - 4.45804303, - 3.59253748, - 8.05245981, - ], - [ - 10.91512996, - 6.50723624, - 2.9825236, - 4.90998204, - 4.89378128, - 3.57587294, - 5.83679508, - ], - ], + slice_.columns_percentages_moe, + load_python_expression("col-per-moe-cat-x-cat-hs-2rows-1col"), ) def it_computes_residuals_for_subtotals_1col_2rows(self): @@ -2041,7 +1875,7 @@ def it_calculates_residuals_for_columns_insertion(self): # Test MoE for 1 column insertion np.testing.assert_almost_equal( - slice_.table_moe, + slice_.table_percentages_moe, [ [17.21652881, 17.21652881, 0.0, 0.0, 21.77737778], [10.58190352, 17.21652881, 14.51825185, 0.0, 19.20584194], @@ -2071,7 +1905,7 @@ def it_calculates_residuals_for_columns_insertion(self): # Test col MoE np.testing.assert_almost_equal( - slice_.columns_moe, + slice_.columns_percentages_moe, [ [42.43446536, 33.54739046, 0.0, 0.0, 28.28964358], [42.43446536, 33.54739046, 53.34346349, 0.0, 26.67173175], @@ -2244,7 +2078,7 @@ def it_calculates_residuals_for_rows_insertion(self): # Test MoE for 1 row insertion np.testing.assert_almost_equal( - slice_.table_moe, + slice_.table_percentages_moe, [ [9.47425342, 8.51416179, 6.4011466, 0.0, 2.10138857], [5.48548061, 6.91062699, 5.77563348, 0.0, 3.0001713], @@ -2285,7 +2119,7 @@ def it_calculates_residuals_for_rows_insertion(self): ) # Test col MoE for 1 row insertion np.testing.assert_almost_equal( - slice_.columns_moe, + slice_.columns_percentages_moe, [ [13.51426726, 16.59188199, 22.48817088, np.nan, 53.31123764], [12.89084933, 16.3342716, 22.23457567, np.nan, 53.31123764], @@ -2605,7 +2439,7 @@ def it_calculates_residuals_for_cat_x_cat_with_missing_1_col_insertion(self): # Test MoE for 1 column insertion at left np.testing.assert_almost_equal( - slice_.table_moe, + slice_.table_percentages_moe, [ [2.74144167, 0.0, 0.0, 2.01473624, 2.34177324, 1.19627333, 1.47999058], [2.78474723, 2.06247142, 2.41916703, 0.0, 0.0, 1.41377833, 1.4556357], @@ -2632,7 +2466,7 @@ def it_calculates_residuals_for_cat_x_cat_with_missing_1_col_insertion(self): # Test MoE err for 1 column insertion at left np.testing.assert_almost_equal( - slice_.columns_moe, + slice_.columns_percentages_moe, [ [3.30479837, 0.0, 0.0, 0.0, 0.0, 8.42914245, 7.58210469], [3.30479837, 0.0, 0.0, 0.0, 0.0, 8.42914245, 7.58210469], diff --git a/tests/integration/test_multiple_response.py b/tests/integration/test_multiple_response.py index ca49e3c5b..3af00237a 100644 --- a/tests/integration/test_multiple_response.py +++ b/tests/integration/test_multiple_response.py @@ -375,7 +375,7 @@ def test_various_measures_from_r_rows_margin(): ], ] - expected_col_moe = [ + expected_col_percentages_moe = [ [ 1.1068377, 1.1476323, @@ -397,7 +397,7 @@ def test_various_measures_from_r_rows_margin(): 4.1444169, ], ] - expected_table_moe = [ + expected_table_percentages_moe = [ [ 0.36338981, 0.36088727, @@ -425,8 +425,12 @@ def test_various_measures_from_r_rows_margin(): np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev) np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev) np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err) - np.testing.assert_almost_equal(slice_.columns_moe, expected_col_moe) - np.testing.assert_almost_equal(slice_.table_moe, expected_table_moe) + np.testing.assert_almost_equal( + slice_.columns_percentages_moe, expected_col_percentages_moe + ) + np.testing.assert_almost_equal( + slice_.table_percentages_moe, expected_table_percentages_moe + ) def test_mr_x_single_wave(): @@ -489,7 +493,7 @@ def test_std_deviation_std_error_array_x_mr_by_row(): [[0.02978762, 0.00971635, 0.03292998], [0.02918338, 0.03472281, 0.02929588]], ) np.testing.assert_array_almost_equal( - slice_.table_moe, + slice_.table_percentages_moe, [[5.83826629, 1.90437053, 6.45415801], [5.71983772, 6.80554616, 5.74188756]], ) np.testing.assert_array_almost_equal( @@ -501,7 +505,7 @@ def test_std_deviation_std_error_array_x_mr_by_row(): [[0.05158518, 0.02113084, 0.04615627], [0.05158518, 0.02113084, 0.04615627]], ) np.testing.assert_array_almost_equal( - slice_.columns_moe, + slice_.columns_percentages_moe, [[10.11050978, 4.14156918, 9.04646295], [10.11050978, 4.14156918, 9.04646295]], ) @@ -549,7 +553,7 @@ def test_cat_x_mr_aug_zscores(): ], ) np.testing.assert_array_almost_equal( - slice_.table_moe, + slice_.table_percentages_moe, [ [1.08261432, 1.16171617, 1.27809263, 1.11256038, 1.48112581], [1.09260745, 1.22848624, 1.50359016, 1.181889, 1.52217263], @@ -576,7 +580,7 @@ def test_cat_x_mr_aug_zscores(): ], ) np.testing.assert_almost_equal( - slice_.columns_moe, + slice_.columns_percentages_moe, [ [5.75714482, 4.52392763, 3.82664558, 4.61399677, 3.89836724], [5.78910352, 4.70374716, 4.28140404, 4.81520916, 3.97036572], @@ -596,14 +600,18 @@ def test_cat_x_mr_and_cat_x_mr_augmented_various_measures(): np.testing.assert_array_almost_equal(slice_.zscores, slice2_.zscores) np.testing.assert_array_almost_equal(slice_.table_std_dev, slice2_.table_std_dev) np.testing.assert_array_almost_equal(slice_.table_std_err, slice2_.table_std_err) - np.testing.assert_array_almost_equal(slice_.table_moe, slice2_.table_moe) + np.testing.assert_array_almost_equal( + slice_.table_percentages_moe, slice2_.table_percentages_moe + ) np.testing.assert_array_almost_equal( slice_.columns_std_dev, slice2_.columns_std_dev ) np.testing.assert_array_almost_equal( slice_.columns_std_err, slice2_.columns_std_err ) - np.testing.assert_array_almost_equal(slice_.columns_moe, slice2_.columns_moe) + np.testing.assert_array_almost_equal( + slice_.columns_percentages_moe, slice2_.columns_percentages_moe + ) assert slice_.shape == (4, 5) assert slice2_.shape == (4, 5) From 21d6d76c868134dc74c8b9f7a09622d87d129567 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Fri, 30 Oct 2020 12:57:43 -0700 Subject: [PATCH 6/6] fix: small CR fixes --- src/cr/cube/cubepart.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index 407af6b54..fba048a78 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -84,7 +84,7 @@ def dimension_types(self): return tuple(d.dimension_type for d in self._dimensions) def evaluate(self, measure_expr): - """Returns 1D/2D ndarray, values evaluated given the function specification + """Return 1D/2D ndarray, values evaluated given the function specification The `function_spec` contains the function to apply and its parameters, e.g.: ``` @@ -113,7 +113,7 @@ def ndim(self): @lazyproperty def population_fraction(self): - """Returns the population fraction of the cube""" + """population fraction of the cube""" return self._cube.population_fraction @lazyproperty @@ -309,14 +309,16 @@ def columns_percentages_moe(self): @lazyproperty def columns_std_dev(self): - """Returns the standard deviation for column percentages + """standard deviation for column percentages + `std_deviation = sqrt(variance)` """ return np.sqrt(self._columns_variance) @lazyproperty def columns_std_err(self): - """Returns the standard error for column percentages + """standard error for column percentages + `std_error = sqrt(variance/N)` """ return np.sqrt(self._columns_variance / self.columns_margin) @@ -780,17 +782,6 @@ def table_margin(self): def table_margin_unpruned(self): return self._matrix.table_margin_unpruned - @lazyproperty - def table_percentages_moe(self): - """1D/2D np.float64 ndarray of margin-of-error (MoE) for table percentages. - - The values are represented as percentages, analogue to the `table_percentages` - property. This means that the value of 3.5% will have the value 3.5 (not 0.035). - The values can be np.nan when the corresponding percentage is also np.nan, which - happens when the respective table margin is 0. - """ - return self.Z_975 * 100 * self.table_std_err - @lazyproperty def table_name(self): """Provides differentiated name for each stacked table of a 3D cube.""" @@ -809,6 +800,17 @@ def table_name(self): def table_percentages(self): return self.table_proportions * 100 + @lazyproperty + def table_percentages_moe(self): + """1D/2D np.float64 ndarray of margin-of-error (MoE) for table percentages. + + The values are represented as percentages, analogue to the `table_percentages` + property. This means that the value of 3.5% will have the value 3.5 (not 0.035). + The values can be np.nan when the corresponding percentage is also np.nan, which + happens when the respective table margin is 0. + """ + return self.Z_975 * 100 * self.table_std_err + @lazyproperty def table_proportions(self): return np.array([row.table_proportions for row in self._matrix.rows]) @@ -889,7 +891,8 @@ def _columns_dimension_numeric_values(self): @lazyproperty def _columns_variance(self): - """Returns the variance for column percentages + """variance for column percentages + `variance = p * (1-p)` """ return ( @@ -1265,7 +1268,8 @@ def _table_proportions_as_array(self): @lazyproperty def _variance(self): - """Returns the variance for cell percentages + """variance for cell percentages + `variance = p * (1-p)` """ p = self._table_proportions_as_array