Skip to content

Commit

Permalink
Merge 0e3b1eb into 4514297
Browse files Browse the repository at this point in the history
  • Loading branch information
malecki committed Apr 21, 2019
2 parents 4514297 + 0e3b1eb commit ad9c7db
Show file tree
Hide file tree
Showing 4 changed files with 272 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/cr/cube/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

"""Initialization module for crunch-cube package."""

__version__ = "1.9.16"
__version__ = "1.9.17"
6 changes: 3 additions & 3 deletions src/cr/cube/measures/pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ def t_stats(self):
axis=0, include_transforms_for_dims=self._hs_dims
)
diff = props - props[:, [self._col_idx]]
margin = self._slice.margin(
axis=0, weighted=self._weighted, include_transforms_for_dims=self._hs_dims
unweighted_margin = self._slice.margin(
axis=0, weighted=False, include_transforms_for_dims=self._hs_dims
)
var_props = props * (1.0 - props) / margin
var_props = props * (1.0 - props) / unweighted_margin
se_diff = np.sqrt(var_props + var_props[:, [self._col_idx]])
return diff / se_diff

Expand Down
227 changes: 227 additions & 0 deletions tests/fixtures/cat-x-cat-weighted-ttests.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
{
"value": {
"query": {
"dimensions": [
{
"variable": "/api/datasets/123/variables/ac62f49fe0944baf8030839227d7cf06"
}, {
"variable": "/api/datasets/123/variables/772559e879734d5dbda31d67bb0e7a2c/"
}
],
"filters": [],
"measures": {
"count": {
"args": [],
"function": "cube_count"
}
},
"weight": "/api/datasets/123/variables/c1820eb7befd4704beacfdbcb430969c/"
},
"result": {
"counts": [
10,
17,
110,
13,
0,
19,
11,
113,
11,
0,
11,
17,
14,
14,
0,
3,
0,
3,
0,
0,
18,
13,
11,
3,
3,
3,
3,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"dimensions": [
{
"derived": false,
"references": {
"alias": "ShutdownBlame",
"description": "If President Oba",
"discarded": false,
"header_order": 0,
"name": "ShutdownBlame"
},
"type": {
"categories": [
{
"id": 1,
"missing": false,
"name": "President Obama",
"numeric_value": 1
}, {
"id": 2,
"missing": false,
"name": "Republicans",
"numeric_value": 2
}, {
"id": 3,
"missing": false,
"name": "Both",
"numeric_value": 3
}, {
"id": 4,
"missing": false,
"name": "Neither",
"numeric_value": 4
}, {
"id": 5,
"missing": false,
"name": "Not sure",
"numeric_value": 5
}, {
"id": 8,
"missing": true,
"name": "Skipped",
"numeric_value": 8
}, {
"id": 9,
"missing": true,
"name": "Not Asked",
"numeric_value": 9
}, {
"id": -1,
"missing": true,
"name": "No Data",
"numeric_value": null
}
],
"class": "categorical",
"ordinal": false
}
}, {
"derived": false,
"references": {
"alias": "Age4",
"description": "4 Category Age",
"discarded": false,
"header_order": 13,
"name": "Age4"
},
"type": {
"categories": [
{
"id": 1,
"missing": false,
"name": "18-29",
"numeric_value": 1
}, {
"id": 2,
"missing": false,
"name": "30-44",
"numeric_value": 2
}, {
"id": 3,
"missing": false,
"name": "45-64",
"numeric_value": 3
}, {
"id": 4,
"missing": false,
"name": "65+",
"numeric_value": 4
}, {
"id": -1,
"missing": true,
"name": "No Data",
"numeric_value": null
}
],
"class": "categorical",
"ordinal": false
}
}
],
"element": "crunch:cube",
"measures": {
"count": {
"data": [
60.86625141813965,
97.88936399229519,
100.15929984533358,
53.85016845098138,
0,
79.36184123878606,
91.08041828026134,
113.52172513707207,
51.24133618727803,
0,
51.6860301097489,
57.19341887477966,
94.46614309659532,
34.21679409150737,
0,
7.1814887299023,
0.648732097517952,
2.160525278672988,
0,
0,
48.05709086803253,
23.04698949579007,
21.86062399732678,
3.2671706433211516,
1.24732636818006,
4.27630335512173,
2.720958443356048,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"metadata": {
"derived": true,
"references": {},
"type": {
"class": "numeric",
"integer": false,
"missing_reasons": {
"No Data": -1
},
"missing_rules": {}
}
},
"n_missing": 4
}
},
"missing": 4,
"n": 1000
}
}
}
41 changes: 41 additions & 0 deletions tests/integration/test_pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,3 +352,44 @@ def test_pairwise_indices_larger_and_smaller(self):
)
pairwise_indices = cube.slices[0].pairwise_indices(only_larger=False)
np.testing.assert_array_equal(pairwise_indices, expected_indices)

def test_ttests_use_unweighted_n_for_variance(self):
"""The weights on this cube demonstrate much higher variance (less
extreme t values, and higher associated p-values) than if weighted_n
were used in the variance estimate of the test statistic.
"""
cube = CrunchCube(CR.CAT_X_CAT_WEIGHTED_TTESTS)
actual = cube.slices[0].pairwise_significance_tests(
column_idx=0, hs_dims=(0, 1)
)
expected_tstats = np.array(
[
[0.0, 1.3892930788974391, 0.8869425734660505, 1.402945620973322],
[0.0, 0.1903540333363253, 0.30894158244285624, 0.3994739596013725],
[0.0, 0.03761142927757482, 1.2682277741610029, 0.36476016345069556],
[0.0, -1.187392798652706, -1.0206496663686406, -1.35111583891054],
[0.0, -1.742783579889951, -2.425391682127969, -3.0738474093706927],
]
).reshape(5, 4)
expected_pvals = np.array(
[
[1.0, 0.1673820620286901, 0.37579738470724267, 0.16373028998420036],
[1.0, 0.8493616019040273, 0.7575734897713429, 0.6903959137827367],
[1.0, 0.9700615941125716, 0.20566822638024163, 0.7160606992310101],
[1.0, 0.23747780923355655, 0.30821629616167123, 0.17970733830083074],
[1.0, 0.0839987707197456, 0.015862691173528676, 0.002723927327002773],
]
).reshape(5, 4)
np.testing.assert_almost_equal(actual.t_stats, expected_tstats)
np.testing.assert_almost_equal(actual.p_vals, expected_pvals)
pairwise_indices = cube.slices[0].pairwise_indices()
expected_indices = np.array(
[
[(), (), (), ()],
[(), (), (), ()],
[(), (), (), ()],
[(), (), (), ()],
[(2, 3), (), (), ()],
]
)
np.testing.assert_array_equal(pairwise_indices, expected_indices)

0 comments on commit ad9c7db

Please sign in to comment.