Skip to content

Commit

Permalink
Merge 4a72626 into 7e6c75a
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Apr 5, 2019
2 parents 7e6c75a + 4a72626 commit 9e91756
Show file tree
Hide file tree
Showing 8 changed files with 488 additions and 37 deletions.
19 changes: 6 additions & 13 deletions src/cr/cube/cube_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
lazyproperty,
memoize,
intersperse_hs_in_std_res,
apply_pruning_mask,
)

try:
Expand Down Expand Up @@ -262,7 +263,7 @@ def index_table(self, axis=None, baseline=None, prune=False):

indexes = proportions / baseline * 100

return self._apply_pruning_mask(indexes) if prune else indexes
return apply_pruning_mask(self, indexes) if prune else indexes

@lazyproperty
def is_double_mr(self):
Expand Down Expand Up @@ -510,7 +511,7 @@ def pvals(self, weighted=True, prune=False, hs_dims=None):
stats = self.zscore(weighted=weighted, prune=prune, hs_dims=hs_dims)
pvals = 2 * (1 - norm.cdf(np.abs(stats)))

return self._apply_pruning_mask(pvals, hs_dims) if prune else pvals
return apply_pruning_mask(self, pvals, hs_dims) if prune else pvals

def zscore(self, weighted=True, prune=False, hs_dims=None):
"""Return ndarray with slices's standardized residuals (Z-scores).
Expand Down Expand Up @@ -540,11 +541,11 @@ def zscore(self, weighted=True, prune=False, hs_dims=None):
zscore = intersperse_hs_in_std_res(self, hs_dims, zscore)

if prune:
return self._apply_pruning_mask(zscore, hs_dims)
return apply_pruning_mask(self, zscore, hs_dims)

return zscore

def pairwise_indices(self, alpha=0.05, only_larger=True, hs_dims=None):
def pairwise_indices(self, alpha=0.05, only_larger=True, hs_dims=None, prune=False):
"""Indices of columns where p < alpha for column-comparison t-tests
Returns an array of tuples of columns that are significant at p<alpha,
Expand All @@ -558,7 +559,7 @@ def pairwise_indices(self, alpha=0.05, only_larger=True, hs_dims=None):
self,
hs_dims,
PairwiseSignificance(
self, alpha=alpha, only_larger=only_larger, hs_dims=hs_dims
self, alpha=alpha, only_larger=only_larger, hs_dims=hs_dims, prune=prune
).pairwise_indices,
)

Expand All @@ -571,14 +572,6 @@ def pairwise_significance_tests(self, column_idx):
"""
return PairwiseSignificance(self).values[column_idx]

def _apply_pruning_mask(self, res, hs_dims=None):
array = self.as_array(prune=True, include_transforms_for_dims=hs_dims)

if not isinstance(array, np.ma.core.MaskedArray):
return res

return np.ma.masked_array(res, mask=array.mask)

def _array_type_std_res(self, counts, total, colsum, rowsum):
"""Return ndarray containing standard residuals for array values.
Expand Down
56 changes: 45 additions & 11 deletions src/cr/cube/measures/pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
import numpy as np
from scipy.stats import t

from cr.cube.util import lazyproperty, intersperse_hs_in_std_res
from cr.cube.util import (
lazyproperty,
intersperse_hs_in_std_res,
apply_pruning_mask,
compress_pruned,
)

try:
xrange
Expand All @@ -20,14 +25,22 @@ class PairwiseSignificance:
"""Implementation of p-vals and t-tests for each column proportions comparison."""

def __init__(
self, slice_, axis=0, weighted=True, alpha=0.05, only_larger=True, hs_dims=None
self,
slice_,
axis=0,
weighted=True,
alpha=0.05,
only_larger=True,
hs_dims=None,
prune=False,
):
self._slice = slice_
self._axis = axis
self._weighted = weighted
self._alpha = alpha
self._only_larger = only_larger
self._hs_dims = hs_dims
self._prune = prune

@lazyproperty
def values(self):
Expand All @@ -45,14 +58,21 @@ def values(self):
self._alpha,
self._only_larger,
self._hs_dims,
self._prune,
)
for col_idx in range(self._slice.shape[1])
for col_idx in range(self._slice.shape[1 - self._axis])
if not self._is_pruned(col_idx)
]

@lazyproperty
def pairwise_indices(self):
return np.array([sig.pairwise_indices for sig in self.values]).T

def _is_pruned(self, col_idx):
if not self._prune:
return False
return not np.any(self._slice.as_array().T[col_idx])


# pylint: disable=too-few-public-methods
class _ColumnPairwiseSignificance:
Expand All @@ -67,6 +87,7 @@ def __init__(
alpha=0.05,
only_larger=True,
hs_dims=None,
prune=False,
):
self._slice = slice_
self._col_idx = col_idx
Expand All @@ -75,30 +96,43 @@ def __init__(
self._alpha = alpha
self._only_larger = only_larger
self._hs_dims = hs_dims
self._prune = prune

@lazyproperty
def _props(self):
return self._slice.proportions(axis=self._axis)

@lazyproperty
def _t_stats(self):
props = self._slice.proportions(axis=0)
diff = props - props[:, [self._col_idx]]
margin = self._slice.margin(axis=0, weighted=self._weighted)
var_props = props * (1.0 - props) / margin
diff = self._props - self._props[:, [self._col_idx]]
margin = self._slice.margin(axis=self._axis, weighted=self._weighted)
var_props = self._props * (1.0 - self._props) / margin
se_diff = np.sqrt(var_props + var_props[:, [self._col_idx]])
return diff / se_diff

@lazyproperty
def t_stats(self):
return intersperse_hs_in_std_res(self._slice, self._hs_dims, self._t_stats)
t_stats = intersperse_hs_in_std_res(self._slice, self._hs_dims, self._t_stats)
if self._prune:
t_stats = apply_pruning_mask(self._slice, t_stats, self._hs_dims)
t_stats = compress_pruned(t_stats)
return t_stats

@lazyproperty
def p_vals(self):
unweighted_n = self._slice.margin(axis=0, weighted=False)
unweighted_n = self._slice.margin(axis=self._axis, weighted=False)
df = unweighted_n + unweighted_n[self._col_idx] - 2
p_vals = 2 * (1 - t.cdf(abs(self._t_stats), df=df))
return intersperse_hs_in_std_res(self._slice, self._hs_dims, p_vals)
p_vals = intersperse_hs_in_std_res(self._slice, self._hs_dims, p_vals)
if self._prune:
p_vals = apply_pruning_mask(self._slice, p_vals, self._hs_dims)
p_vals = compress_pruned(p_vals)
return p_vals

@lazyproperty
def pairwise_indices(self):
significance = self.p_vals < self._alpha
if self._only_larger:
significance = np.logical_and(self.t_stats < 0, significance)
return [tuple(np.where(sig_row)[0]) for sig_row in significance]
pwi = [tuple(np.where(sig_row)[0]) for sig_row in significance]
return pwi
9 changes: 9 additions & 0 deletions src/cr/cube/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ def compress_pruned(table):
return table


def apply_pruning_mask(slice_, res, hs_dims=None):
array = slice_.as_array(prune=True, include_transforms_for_dims=hs_dims)

if not isinstance(array, np.ma.core.MaskedArray):
return res

return np.ma.masked_array(res, mask=array.mask)


def intersperse_hs_in_std_res(slice_, hs_dims, res):

if not hs_dims:
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,4 @@ def _load_to_cache(self, fixture_name):

CR = LazyCubeResponseLoader(".") # ---mnemonic: CR = 'cube-response'---
SM = LazyCubeResponseLoader("./scale_means")
PW = LazyCubeResponseLoader("./pairwise")

0 comments on commit 9e91756

Please sign in to comment.