Skip to content

Commit

Permalink
Merge 436a442 into da8b65c
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Apr 16, 2019
2 parents da8b65c + 436a442 commit e2eb27d
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 48 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ The detailed description can be found

## Changes

#### 1.9.16
- Proper t-stats for cubes with H&S

#### 1.9.15
- Implement pairwise indices for Wishart, directly in cube

Expand Down
2 changes: 1 addition & 1 deletion src/cr/cube/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

"""Initialization module for crunch-cube package."""

__version__ = "1.9.15"
__version__ = "1.9.16"
4 changes: 2 additions & 2 deletions src/cr/cube/cube_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,14 +563,14 @@ def pairwise_indices(self, alpha=0.05, only_larger=True, hs_dims=None):
self, alpha=alpha, only_larger=only_larger, hs_dims=hs_dims
).pairwise_indices

def pairwise_significance_tests(self, column_idx):
def pairwise_significance_tests(self, column_idx, hs_dims=None):
"""list of _ColumnPairwiseSignificance tests.
Result has as many elements as there are columns in the slice. Each
significance test contains `p_vals` and `t_stats` (ndarrays that represent
probability values and statistical scores).
"""
return PairwiseSignificance(self).values[column_idx]
return PairwiseSignificance(self, hs_dims=hs_dims).values[column_idx]

def _apply_pruning_mask(self, res, hs_dims=None):
array = self.as_array(prune=True, include_transforms_for_dims=hs_dims)
Expand Down
35 changes: 14 additions & 21 deletions src/cr/cube/measures/pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
from scipy.stats import t

from cr.cube.util import lazyproperty, intersperse_hs_in_std_res
from cr.cube.util import lazyproperty

try:
xrange
Expand Down Expand Up @@ -48,21 +48,13 @@ def values(self):
self._only_larger,
self._hs_dims,
)
for col_idx in range(self._slice.shape[1])
for col_idx in range(self._slice.get_shape(hs_dims=self._hs_dims)[1])
]

@lazyproperty
def pairwise_indices(self):
"""ndarray containing tuples of pairwise indices."""
pwi = np.array([sig.pairwise_indices for sig in self.values]).T

if self._hs_dims and 1 in self._hs_dims:
# If we need to account for the dimension 1 in pairwise indices, we need
# to intersperse with NaNs. The dimension 0 is already tackled
# when determining the indices.
pwi = intersperse_hs_in_std_res(self._slice, (1,), pwi)

return pwi
return np.array([sig.pairwise_indices for sig in self.values]).T


# pylint: disable=too-few-public-methods
Expand All @@ -88,24 +80,25 @@ def __init__(
self._hs_dims = hs_dims

@lazyproperty
def _t_stats(self):
props = self._slice.proportions(axis=0)
def t_stats(self):
props = self._slice.proportions(
axis=0, include_transforms_for_dims=self._hs_dims
)
diff = props - props[:, [self._col_idx]]
margin = self._slice.margin(axis=0, weighted=self._weighted)
margin = self._slice.margin(
axis=0, weighted=self._weighted, include_transforms_for_dims=self._hs_dims
)
var_props = props * (1.0 - props) / margin
se_diff = np.sqrt(var_props + var_props[:, [self._col_idx]])
return diff / se_diff

@lazyproperty
def t_stats(self):
return intersperse_hs_in_std_res(self._slice, self._hs_dims, self._t_stats)

@lazyproperty
def p_vals(self):
unweighted_n = self._slice.margin(axis=0, weighted=False)
unweighted_n = self._slice.margin(
axis=0, weighted=False, include_transforms_for_dims=self._hs_dims
)
df = unweighted_n + unweighted_n[self._col_idx] - 2
p_vals = 2 * (1 - t.cdf(abs(self._t_stats), df=df))
return intersperse_hs_in_std_res(self._slice, self._hs_dims, p_vals)
return 2 * (1 - t.cdf(abs(self.t_stats), df=df))

@lazyproperty
def pairwise_indices(self):
Expand Down
7 changes: 1 addition & 6 deletions src/cr/cube/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,7 @@ def compress_pruned(table):


def intersperse_hs_in_std_res(slice_, hs_dims, res):

if not hs_dims:
# Don't intersperse anything, just return the result
return res

# Perform the insertions of place-holding rows and cols for insertions
"""Perform the insertions of place-holding rows and cols for insertions."""
for dim, inds in enumerate(slice_.inserted_hs_indices()):
if dim not in hs_dims:
continue
Expand Down
118 changes: 108 additions & 10 deletions tests/integration/test_pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,115 @@ def test_hirotsu_pairwise_indices(self):
expected = np.array([(), (), (3, 4, 7, 9), (2,), (2,), (), (), (2,), (), (2,)])
np.testing.assert_array_equal(actual_pairwise_indices, expected)

def test_pairwise_t_stats_with_hs(self):
slice_ = CrunchCube(CR.PAIRWISE_HIROTSU_ILLNESS_X_OCCUPATION_WITH_HS).slices[0]
expected = np.array(
[
[
0.0,
-0.06178448,
0.30342874,
-3.18865018,
-3.82130608,
-2.99560531,
0.07456344,
-0.68932699,
-2.95469238,
-0.46970468,
-4.14956044,
],
[
0.0,
1.18922394,
0.38254102,
3.3306654,
3.45013209,
2.7520633,
0.59216241,
0.86352416,
2.54171145,
1.10130414,
3.3839919,
],
[
0.0,
-1.7080666,
-0.931165,
-0.87419923,
-0.24915622,
-0.2367748,
-0.97198009,
-0.38504801,
-0.03910193,
-1.02720423,
0.19773989,
],
]
)
t_stats = slice_.pairwise_significance_tests(
column_idx=0, hs_dims=(0, 1)
).t_stats
np.testing.assert_almost_equal(t_stats, expected)

def test_pairwise_p_vals_with_hs(self):
slice_ = CrunchCube(CR.PAIRWISE_HIROTSU_ILLNESS_X_OCCUPATION_WITH_HS).slices[0]
expected = np.array(
[
[
1.00000000e00,
9.50744855e-01,
7.61580875e-01,
1.45494511e-03,
1.35271514e-04,
2.75262668e-03,
9.40575477e-01,
4.90755141e-01,
3.16822332e-03,
6.38672254e-01,
3.44275537e-05,
],
[
1.00000000e00,
2.34589189e-01,
7.02082945e-01,
8.84605265e-04,
5.67562813e-04,
5.94375533e-03,
5.53862215e-01,
3.88027539e-01,
1.11098015e-02,
2.71039211e-01,
7.25442977e-04,
],
[
1.00000000e00,
8.78852262e-02,
3.51831360e-01,
3.82131035e-01,
8.03255937e-01,
8.12841336e-01,
3.31271828e-01,
7.00272311e-01,
9.68813218e-01,
3.04581978e-01,
8.43264694e-01,
],
]
)
p_vals = slice_.pairwise_significance_tests(column_idx=0, hs_dims=(0, 1)).p_vals
np.testing.assert_almost_equal(p_vals, expected)

def test_pairwise_indices_with_hs(self):
cube = CrunchCube(CR.PAIRWISE_HIROTSU_ILLNESS_X_OCCUPATION_WITH_HS)
slice_ = CrunchCube(CR.PAIRWISE_HIROTSU_ILLNESS_X_OCCUPATION_WITH_HS).slices[0]
expected = [
[
(3, 4, 8, 10),
(3, 4, 8, 10),
(3, 4, 8, 10),
(3, 4, 5, 8, 10),
(3, 4, 5, 8, 10),
(3, 4, 5, 8, 10),
(),
(),
np.nan,
(3, 4, 8, 10),
(10,),
(3, 4, 5, 8, 10),
(3, 4, 10),
(),
(4, 10),
Expand All @@ -118,17 +216,17 @@ def test_pairwise_indices_with_hs(self):
(),
(0, 2, 6, 7),
(0, 2, 6, 7),
np.nan,
(0, 2),
(),
(),
(0, 2),
(),
(0, 2, 6, 7),
],
[(), (), (), (), (), np.nan, (), (), (), (), (1,)],
[(), (), (), (), (), (1,), (), (), (), (), (1,)],
]
actual = cube.slices[0].pairwise_indices(hs_dims=[0, 1]).tolist()
assert actual == expected
pairwise_indices = slice_.pairwise_indices(hs_dims=(0, 1)).tolist()
assert pairwise_indices == expected

def test_hirotsu_pvals_with_hs(self):
"""The shape of the result should be 11 x 11, with H&S (at index 5)."""
Expand Down
12 changes: 4 additions & 8 deletions tests/unit/test_wishart_pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
class DescribePairwiseSignificance:
def it_provides_access_to_its_values(self, request, slice_):
shape = (2, 2)
slice_.shape = shape
slice_.get_shape.return_value = shape
expected_test_values = PairwiseSignificance(slice_).values
assert len(expected_test_values) == 2
for i, column_pairwise_significance in enumerate(expected_test_values):
Expand All @@ -37,13 +37,13 @@ def it_can_calculate_t_stats(self, t_stats_fixture, slice_):
slice_.proportions.return_value = props
slice_.margin.return_value = margin
np.testing.assert_almost_equal(
_ColumnPairwiseSignificance(slice_, col_idx)._t_stats, t_stats
_ColumnPairwiseSignificance(slice_, col_idx).t_stats, t_stats
)

def it_can_calculate_p_vals(self, p_vals_fixture, slice_, _t_stats_prop_):
def it_can_calculate_p_vals(self, p_vals_fixture, slice_, t_stats_prop_):
col_idx, t_stats, margin, p_vals = p_vals_fixture
slice_.margin.return_value = margin
_t_stats_prop_.return_value = t_stats
t_stats_prop_.return_value = t_stats
np.testing.assert_almost_equal(
_ColumnPairwiseSignificance(slice_, col_idx).p_vals, p_vals
)
Expand Down Expand Up @@ -177,10 +177,6 @@ def t_stats_fixture(self, request):
def slice_(self, request):
return instance_mock(request, CubeSlice)

@pytest.fixture
def _t_stats_prop_(self, request):
return property_mock(request, _ColumnPairwiseSignificance, "_t_stats")

@pytest.fixture
def t_stats_prop_(self, request):
return property_mock(request, _ColumnPairwiseSignificance, "t_stats")
Expand Down

0 comments on commit e2eb27d

Please sign in to comment.