From 48e9b5bf064eb1308ea79fe23eca718a41a739e9 Mon Sep 17 00:00:00 2001 From: Michael Malecki Date: Sun, 21 Apr 2019 14:48:38 -0400 Subject: [PATCH 1/3] add cube with funky weights, and expectations of their ttests --- src/cr/cube/measures/pairwise_significance.py | 6 +-- .../integration/test_pairwise_significance.py | 41 +++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/cr/cube/measures/pairwise_significance.py b/src/cr/cube/measures/pairwise_significance.py index 8a2c4ca3c..72a338535 100644 --- a/src/cr/cube/measures/pairwise_significance.py +++ b/src/cr/cube/measures/pairwise_significance.py @@ -85,10 +85,10 @@ def t_stats(self): axis=0, include_transforms_for_dims=self._hs_dims ) diff = props - props[:, [self._col_idx]] - margin = self._slice.margin( - axis=0, weighted=self._weighted, include_transforms_for_dims=self._hs_dims + unweighted_margin = self._slice.margin( + axis=0, weighted=False, include_transforms_for_dims=self._hs_dims ) - var_props = props * (1.0 - props) / margin + var_props = props * (1.0 - props) / unweighted_margin se_diff = np.sqrt(var_props + var_props[:, [self._col_idx]]) return diff / se_diff diff --git a/tests/integration/test_pairwise_significance.py b/tests/integration/test_pairwise_significance.py index 2e1f8eb6e..1b42913af 100644 --- a/tests/integration/test_pairwise_significance.py +++ b/tests/integration/test_pairwise_significance.py @@ -352,3 +352,44 @@ def test_pairwise_indices_larger_and_smaller(self): ) pairwise_indices = cube.slices[0].pairwise_indices(only_larger=False) np.testing.assert_array_equal(pairwise_indices, expected_indices) + + def test_ttests_use_unweighted_n_for_variance(self): + """The weights on this cube demonstrate much higher variance (less + extreme t values, and higher associated p-values) than if weighted_n + were used in the variance estimate of the test statistic. + """ + cube = CrunchCube(CR.CAT_X_CAT_WEIGHTED_TTESTS) + actual = cube.slices[0].pairwise_significance_tests( + column_idx=0, hs_dims=(0, 1) + ) + expected_tstats = np.array( + [ + [0.0, 1.3892930788974391, 0.8869425734660505, 1.402945620973322], + [0.0, 0.1903540333363253, 0.30894158244285624, 0.3994739596013725], + [0.0, 0.03761142927757482, 1.2682277741610029, 0.36476016345069556], + [0.0, -1.187392798652706, -1.0206496663686406, -1.35111583891054], + [0.0, -1.742783579889951, -2.425391682127969, -3.0738474093706927], + ] + ).reshape(5, 4) + expected_pvals = np.array( + [ + [1.0, 0.1673820620286901, 0.37579738470724267, 0.16373028998420036], + [1.0, 0.8493616019040273, 0.7575734897713429, 0.6903959137827367], + [1.0, 0.9700615941125716, 0.20566822638024163, 0.7160606992310101], + [1.0, 0.23747780923355655, 0.30821629616167123, 0.17970733830083074], + [1.0, 0.0839987707197456, 0.015862691173528676, 0.002723927327002773], + ] + ).reshape(5, 4) + np.testing.assert_almost_equal(actual.t_stats, expected_tstats) + np.testing.assert_almost_equal(actual.p_vals, expected_pvals) + pairwise_indices = cube.slices[0].pairwise_indices() + expected_indices = np.array( + [ + [(), (), (), ()], + [(), (), (), ()], + [(), (), (), ()], + [(), (), (), ()], + [(2, 3), (), (), ()], + ] + ) + np.testing.assert_array_equal(pairwise_indices, expected_indices) From 3bda34b2fe2f52c9054866ce320cc0a2acf721d2 Mon Sep 17 00:00:00 2001 From: Michael Malecki Date: Sun, 21 Apr 2019 15:03:32 -0400 Subject: [PATCH 2/3] version bump --- src/cr/cube/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cr/cube/__init__.py b/src/cr/cube/__init__.py index 50579922b..0b1704810 100644 --- a/src/cr/cube/__init__.py +++ b/src/cr/cube/__init__.py @@ -2,4 +2,4 @@ """Initialization module for crunch-cube package.""" -__version__ = "1.9.16" +__version__ = "1.9.17" From 0e3b1ebba3c03e7139db693541009e983508a41d Mon Sep 17 00:00:00 2001 From: Michael Malecki Date: Sun, 21 Apr 2019 15:16:33 -0400 Subject: [PATCH 3/3] add file --- tests/fixtures/cat-x-cat-weighted-ttests.json | 227 ++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 tests/fixtures/cat-x-cat-weighted-ttests.json diff --git a/tests/fixtures/cat-x-cat-weighted-ttests.json b/tests/fixtures/cat-x-cat-weighted-ttests.json new file mode 100644 index 000000000..a47ceb60e --- /dev/null +++ b/tests/fixtures/cat-x-cat-weighted-ttests.json @@ -0,0 +1,227 @@ +{ + "value": { + "query": { + "dimensions": [ + { + "variable": "/api/datasets/123/variables/ac62f49fe0944baf8030839227d7cf06" + }, { + "variable": "/api/datasets/123/variables/772559e879734d5dbda31d67bb0e7a2c/" + } + ], + "filters": [], + "measures": { + "count": { + "args": [], + "function": "cube_count" + } + }, + "weight": "/api/datasets/123/variables/c1820eb7befd4704beacfdbcb430969c/" + }, + "result": { + "counts": [ + 10, + 17, + 110, + 13, + 0, + 19, + 11, + 113, + 11, + 0, + 11, + 17, + 14, + 14, + 0, + 3, + 0, + 3, + 0, + 0, + 18, + 13, + 11, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "dimensions": [ + { + "derived": false, + "references": { + "alias": "ShutdownBlame", + "description": "If President Oba", + "discarded": false, + "header_order": 0, + "name": "ShutdownBlame" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "President Obama", + "numeric_value": 1 + }, { + "id": 2, + "missing": false, + "name": "Republicans", + "numeric_value": 2 + }, { + "id": 3, + "missing": false, + "name": "Both", + "numeric_value": 3 + }, { + "id": 4, + "missing": false, + "name": "Neither", + "numeric_value": 4 + }, { + "id": 5, + "missing": false, + "name": "Not sure", + "numeric_value": 5 + }, { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + }, { + "derived": false, + "references": { + "alias": "Age4", + "description": "4 Category Age", + "discarded": false, + "header_order": 13, + "name": "Age4" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "18-29", + "numeric_value": 1 + }, { + "id": 2, + "missing": false, + "name": "30-44", + "numeric_value": 2 + }, { + "id": 3, + "missing": false, + "name": "45-64", + "numeric_value": 3 + }, { + "id": 4, + "missing": false, + "name": "65+", + "numeric_value": 4 + }, { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + } + ], + "element": "crunch:cube", + "measures": { + "count": { + "data": [ + 60.86625141813965, + 97.88936399229519, + 100.15929984533358, + 53.85016845098138, + 0, + 79.36184123878606, + 91.08041828026134, + 113.52172513707207, + 51.24133618727803, + 0, + 51.6860301097489, + 57.19341887477966, + 94.46614309659532, + 34.21679409150737, + 0, + 7.1814887299023, + 0.648732097517952, + 2.160525278672988, + 0, + 0, + 48.05709086803253, + 23.04698949579007, + 21.86062399732678, + 3.2671706433211516, + 1.24732636818006, + 4.27630335512173, + 2.720958443356048, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 4 + } + }, + "missing": 4, + "n": 1000 + } + } +}