Skip to content

Commit

Permalink
Merge 30d851d into b1d5f3c
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Apr 23, 2019
2 parents b1d5f3c + 30d851d commit cb2ac12
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 11 deletions.
5 changes: 5 additions & 0 deletions src/cr/cube/cube_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,11 @@ def pairwise_indices(self, alpha=0.05, only_larger=True, hs_dims=None):
self, alpha=alpha, only_larger=only_larger, hs_dims=hs_dims
).pairwise_indices

def summary_pairwise_indices(self, alpha=0.05, only_larger=True, hs_dims=None):
return PairwiseSignificance(
self, alpha=alpha, only_larger=only_larger, hs_dims=hs_dims
).summary_pairwise_indices

def pairwise_significance_tests(self, column_idx, hs_dims=None):
"""list of _ColumnPairwiseSignificance tests.
Expand Down
63 changes: 54 additions & 9 deletions src/cr/cube/measures/pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,17 @@ def pairwise_indices(self):
"""ndarray containing tuples of pairwise indices."""
return np.array([sig.pairwise_indices for sig in self.values]).T

@lazyproperty
def summary_pairwise_indices(self):
"""ndarray containing tuples of pairwise indices for the column summary."""
summary_pairwise_indices = np.empty(
self.values[0].t_stats.shape[1], dtype=object
)
summary_pairwise_indices[:] = [
sig.summary_pairwise_indices for sig in self.values
]
return summary_pairwise_indices


# pylint: disable=too-few-public-methods
class _ColumnPairwiseSignificance:
Expand All @@ -79,30 +90,64 @@ def __init__(
self._only_larger = only_larger
self._hs_dims = hs_dims

@lazyproperty
def _unweighted_col_margin(self):
return self._slice.margin(
axis=0, weighted=False, include_transforms_for_dims=self._hs_dims
)

@lazyproperty
def t_stats(self):
props = self._slice.proportions(
axis=0, include_transforms_for_dims=self._hs_dims
)
diff = props - props[:, [self._col_idx]]
unweighted_margin = self._slice.margin(
axis=0, weighted=False, include_transforms_for_dims=self._hs_dims
)
var_props = props * (1.0 - props) / unweighted_margin
var_props = props * (1.0 - props) / self._unweighted_col_margin
se_diff = np.sqrt(var_props + var_props[:, [self._col_idx]])
return diff / se_diff

@lazyproperty
def p_vals(self):
unweighted_n = self._slice.margin(
axis=0, weighted=False, include_transforms_for_dims=self._hs_dims
)
df = unweighted_n + unweighted_n[self._col_idx] - 2
return 2 * (1 - t.cdf(abs(self.t_stats), df=df))
return 2 * (1 - t.cdf(abs(self.t_stats), df=self._df))

@lazyproperty
def pairwise_indices(self):
significance = self.p_vals < self._alpha
if self._only_larger:
significance = np.logical_and(self.t_stats < 0, significance)
return [tuple(np.where(sig_row)[0]) for sig_row in significance]

@lazyproperty
def summary_pairwise_indices(self):
significance = self.summary_p_vals < self._alpha
if self._only_larger:
significance = np.logical_and(self.summary_t_stats < 0, significance)
return tuple(np.where(significance)[0])

@lazyproperty
def summary_t_stats(self):
total_margin = self._slice.margin(weighted=self._weighted)
col_margin_props = self._unweighted_col_margin / total_margin
diff = col_margin_props - col_margin_props[self._col_idx]
var_props = col_margin_props * (1.0 - col_margin_props) / total_margin
se_diff = np.sqrt(var_props + var_props[self._col_idx])
return diff / se_diff

@lazyproperty
def summary_p_vals(self):
return 2 * (1 - t.cdf(abs(self.summary_t_stats), df=self._df))

@lazyproperty
def _df(self):
selected_unweighted_n = (
self._unweighted_n[self._col_idx]
if self._unweighted_n.ndim < 2
else self._unweighted_n[:, self._col_idx][:, None]
)
return self._unweighted_n + selected_unweighted_n - 2

@lazyproperty
def _unweighted_n(self):
return self._slice.margin(
axis=0, weighted=False, include_transforms_for_dims=self._hs_dims
)
57 changes: 56 additions & 1 deletion tests/integration/test_pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def test_compare_to_column(self):
np.testing.assert_almost_equal(actual.t_stats, expected_tstats)
np.testing.assert_almost_equal(actual.p_vals, expected_pvals)

def test_pairwise_indices_only_larger(self):
def test_cat_x_cat_pairwise_indices_only_larger(self):
cube = CrunchCube(CR.PAIRWISE_HIROTSU_OCCUPATION_X_ILLNESS)
expected_indices = np.array(
[
Expand All @@ -334,6 +334,48 @@ def test_pairwise_indices_only_larger(self):
pairwise_indices = cube.slices[0].pairwise_indices()
np.testing.assert_array_equal(pairwise_indices, expected_indices)

def test_mr_x_cat_pairwise_indices_only_larger(self):
cube = CrunchCube(CR.MR_X_CAT_HS)
expected_indices = np.array(
[
[(1, 3, 4), (), (), (), (), ()],
[(), (), (), (), (), ()],
[(), (), (), (0,), (0,), ()],
[(), (), (), (), (1,), ()],
[(), (), (), (), (), ()],
]
)
pairwise_indices = cube.slices[0].pairwise_indices()
np.testing.assert_array_equal(pairwise_indices, expected_indices)

def test_cat_x_mr_pairwise_indices_only_larger(self):
cube = CrunchCube(CR.CAT_X_MR_HS)
expected_indices = np.array(
[
[(1, 2, 3, 4), (2, 3), (), (), (2,)],
[(), (), (), (), (3,)],
[(), (), (), (), ()],
[(), (0,), (0,), (0,), (0,)],
[(), (), (), (0, 1, 4), ()],
[(), (), (), (), ()],
]
)
pairwise_indices = cube.slices[0].pairwise_indices()
np.testing.assert_array_equal(pairwise_indices, expected_indices)

def test_mr_x_mr_pairwise_indices_only_larger(self):
cube = CrunchCube(CR.MR_X_MR)
expected_indices = np.array(
[
[(1, 2, 3), (), (), ()],
[(), (0, 2, 3), (), (2,)],
[(), (), (0, 1, 3), (1,)],
[(), (), (), ()],
]
)
pairwise_indices = cube.slices[0].pairwise_indices()
np.testing.assert_array_equal(pairwise_indices, expected_indices)

def test_pairwise_indices_larger_and_smaller(self):
cube = CrunchCube(CR.PAIRWISE_HIROTSU_OCCUPATION_X_ILLNESS)
expected_indices = np.array(
Expand All @@ -353,6 +395,19 @@ def test_pairwise_indices_larger_and_smaller(self):
pairwise_indices = cube.slices[0].pairwise_indices(only_larger=False)
np.testing.assert_array_equal(pairwise_indices, expected_indices)

def test_cat_x_cat_summary_pairwise_indices(self):
slice_ = CrunchCube(CR.PAIRWISE_HIROTSU_OCCUPATION_X_ILLNESS).slices[0]

# Only larger
pairwise_indices = slice_.summary_pairwise_indices()
expected_indices = np.array([(2,), (0, 2), ()])
np.testing.assert_array_equal(pairwise_indices, expected_indices)

# Larger and smaller
pairwise_indices = slice_.summary_pairwise_indices(only_larger=False)
expected_indices = np.array([(1, 2), (0, 2), (0, 1)], dtype="i,i")
np.testing.assert_array_equal(pairwise_indices, expected_indices)

def test_ttests_use_unweighted_n_for_variance(self):
"""The weights on this cube demonstrate much higher variance (less
extreme t values, and higher associated p-values) than if weighted_n
Expand Down
100 changes: 99 additions & 1 deletion tests/unit/test_wishart_pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,82 @@ def it_knows_its_pairwise_indices(
cps = _ColumnPairwiseSignificance(slice_, None, only_larger=only_larger)
assert cps.pairwise_indices == pairwise_indices

def it_can_calculate_summary_t_stats(
self, slice_, _unweighted_col_margin_prop_, summary_t_stats_fixture
):
margin, col_idx, expected, _ = summary_t_stats_fixture
_unweighted_col_margin_prop_.return_value = margin
slice_.margin.return_value = np.sum(margin)
np.testing.assert_almost_equal(
_ColumnPairwiseSignificance(slice_, col_idx).summary_t_stats, expected
)

def it_can_calculate_summary_p_vals(
self,
slice_,
_unweighted_col_margin_prop_,
_unweighted_n_prop,
summary_t_stats_fixture,
):
margin, col_idx, _, expected = summary_t_stats_fixture
_unweighted_col_margin_prop_.return_value = margin
slice_.margin.return_value = np.sum(margin)
_unweighted_n_prop.return_value = margin
np.testing.assert_almost_equal(
_ColumnPairwiseSignificance(slice_, col_idx).summary_p_vals, expected
)

def it_can_calculate_summary_pairwise_indices(
self,
slice_,
summary_pairwise_indices_fixture,
summary_p_vals_prop,
summary_t_stats_prop,
):
only_larger, col_idx, t_stats, p_vals, expected = (
summary_pairwise_indices_fixture
)
summary_p_vals_prop.return_value = p_vals
summary_t_stats_prop.return_value = t_stats
np.testing.assert_array_equal(
_ColumnPairwiseSignificance(
slice_, col_idx, only_larger=only_larger
).summary_pairwise_indices,
expected,
)

# fixtures -------------------------------------------------------

@pytest.fixture(
params=[
(True, 1, [-0.6793662, 0.0, -1], [0.6201015, 1.0, 0.01], (2,)),
(False, 0, None, [1, 0.01, 0.01], (1, 2)),
]
)
def summary_pairwise_indices_fixture(self, request):
only_larger, col_idx, t_stats, p_vals, expected = request.param
return only_larger, col_idx, np.array(t_stats), np.array(p_vals), expected

@pytest.fixture(
params=[
(
[1, 2, 3],
0,
[0.0, 0.67936622, 1.30930734],
[np.nan, 0.62010151, 0.32063378],
),
([1, 2, 3], 1, [-0.6793662, 0.0, 0.5940885], [0.6201015, 1.0, 0.5942728]),
]
)
def summary_t_stats_fixture(self, request):
margin, col_idx, expected_t_stats, expected_p_vals = request.param
return (
np.array(margin),
col_idx,
np.array(expected_t_stats),
np.array(expected_p_vals),
)

@pytest.fixture(
params=[
(
Expand Down Expand Up @@ -164,7 +238,13 @@ def p_vals_fixture(self, request):
[0.0, -0.8586079707543924, -1.1774569464270872],
[0.0, 4.663801762560106, 3.743253010905157],
],
)
),
(
0,
[[0.25, 0.75], [0.75, 0.25]],
[[1, 2], [3, 4]],
[[0.0, 0.94280904], [0.0, -1.51185789]],
),
]
)
def t_stats_fixture(self, request):
Expand All @@ -177,10 +257,28 @@ def t_stats_fixture(self, request):
def slice_(self, request):
return instance_mock(request, CubeSlice)

@pytest.fixture
def _unweighted_col_margin_prop_(self, request):
return property_mock(
request, _ColumnPairwiseSignificance, "_unweighted_col_margin"
)

@pytest.fixture
def _unweighted_n_prop(self, request):
return property_mock(request, _ColumnPairwiseSignificance, "_unweighted_n")

@pytest.fixture
def t_stats_prop_(self, request):
return property_mock(request, _ColumnPairwiseSignificance, "t_stats")

@pytest.fixture
def summary_p_vals_prop(self, request):
return property_mock(request, _ColumnPairwiseSignificance, "summary_p_vals")

@pytest.fixture
def summary_t_stats_prop(self, request):
return property_mock(request, _ColumnPairwiseSignificance, "summary_t_stats")

@pytest.fixture
def p_vals_prop_(self, request):
return property_mock(request, _ColumnPairwiseSignificance, "p_vals")

0 comments on commit cb2ac12

Please sign in to comment.