Skip to content

Commit

Permalink
Address PR comments from Steve, part 1
Browse files Browse the repository at this point in the history
* Make as much functionality in PairwisePvalues (as possible) a lazyproperty
* Make class member fields private
* Normalize PairwisePvalues
  • Loading branch information
slobodan-ilic committed Jan 17, 2019
1 parent 99f6076 commit 2c464aa
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 41 deletions.
97 changes: 58 additions & 39 deletions src/cr/cube/measures/pairwise_pvalues.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,63 +12,82 @@
try:
xrange
except NameError: # pragma: no cover
# pylint: disable=invalid-name
xrange = range


class PairwisePvalues(object):
# pylint: disable=too-few-public-methods
class PairwisePvalues:
"""Value object providing matrix of pairwise-comparison P-values"""

def __init__(self, slice_, axis=0):
self.slice = slice_
self.axis = axis
def __init__(self, slice_, axis=0, weighted=True):
self._slice = slice_
self._axis = axis
self._weighted = weighted

@lazyproperty
def values(self):
"""Square ndarray of pairwise Chi-square along axis."""
chisq = self.pairwise_chisq(axis=self.axis)
return 1.0 - wishartCDF(chisq, self.n_min, self.n_max)
return 1.0 - wishartCDF(self._pairwise_chisq, self._n_min, self._n_max)

@lazyproperty
def n_max(self):
"""zero based int representing the bigger of the two cube's dimension."""
return max(self.slice.get_shape()) - 1
def _categorical_pairwise_chisq(self):
"""ndarray containing pairwise comparisons along axis
Returns a square, symmetric matrix of test statistics for the null
hypothesis that each vector along *axis* is equal to each other.
"""
chisq = np.zeros([self._numel, self._numel])
for i in xrange(1, self._numel):
for j in xrange(0, self._numel - 1):
chisq[i, j] = chisq[j, i] = np.sum(
np.square(self._proportions[:, i] - self._proportions[:, j])
/ self._wts
) / (1 / self._margin[i] + 1 / self._margin[j])

return chisq

@lazyproperty
def n_min(self):
"""zero based int representing the smaller of the two cube's dimension."""
return min(self.slice.get_shape()) - 1
def _margin(self):
"""ndarray of floats, representing slice's margin for the correct axis."""
return self._slice.margin(axis=self._axis)

def pairwise_chisq(self, axis=0, weighted=True):
"""Return square ndarray of pairwise Chi-squared statistics along axis.
@lazyproperty
def _off_margin(self):
"""ndarray of floats, representing slice's margin for the opposite axis."""
return self._slice.margin(axis=(1 - self._axis))

@lazyproperty
def _pairwise_chisq(self):
"""ndarray of pairwise Chi-squared statistics along axis.
Zscore is a measure of statistical significance of observed vs.
expected counts. It's only applicable to a 2D contingency tables.
:param weighted: Use weighted counts for zscores
:param axis: axis along which to perform comparison. Only columns (0)
are implemented currently.
"""
return self._categorical_pairwise_chisq(axis, weighted)
return self._categorical_pairwise_chisq

def _categorical_pairwise_chisq(self, axis, weighted):
"""Return ndarray containing pairwise comparisons along axis
@lazyproperty
def _proportions(self):
return self._slice.proportions(axis=self._axis)

Returns a square, symmetric matrix of test statistics for the null
hypothesis that each vector along *axis* is equal to each other.
"""
slice_ = self.slice

margin = slice_.margin(axis=axis)
offmargin = slice_.margin(axis=(1 - axis))
proportions = slice_.proportions(axis=axis)
wts = offmargin / slice_.margin()
counts = slice_.as_array(weighted=weighted)
elements = counts.shape[1 - axis]
chisq = np.zeros([elements, elements])
for i in xrange(1, elements):
for j in xrange(0, elements - 1):
chisq[i, j] = chisq[j, i] = np.sum(
np.square(proportions[:, i] - proportions[:, j]) / wts
) / (1 / margin[i] + 1 / margin[j])
@lazyproperty
def _n_max(self):
"""zero based int representing the bigger of the two cube's dimension."""
return max(self._slice.get_shape()) - 1

return chisq
@lazyproperty
def _n_min(self):
"""zero based int representing the smaller of the two cube's dimension."""
return min(self._slice.get_shape()) - 1

@lazyproperty
def _numel(self):
"""int number of elements of the dimension opposite to axis."""
return self._slice.get_shape()[1 - self._axis]

@lazyproperty
def _wts(self):
# TODO: @mike - come up with a better docstring and property name
"""float representing the stage in chi computation."""
total = self._slice.margin()
return self._off_margin / total
5 changes: 3 additions & 2 deletions tests/integration/test_pairwise.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# encoding: utf-8
# pylint: disable=protected-access

"""Integration tests for pairwise comparisons."""

Expand All @@ -22,7 +23,7 @@ def test_same_col_counts(self):
cube = CrunchCube(CR.SAME_COUNTS_3x4)
pairwise_pvalues = PairwisePvalues(cube.slices[0], axis=0)
expected = np.zeros([4, 4])
actual = pairwise_pvalues.pairwise_chisq(axis=0)
actual = pairwise_pvalues._pairwise_chisq
np.testing.assert_equal(actual, expected)

def test_hirotsu_chisq(self):
Expand Down Expand Up @@ -153,7 +154,7 @@ def test_hirotsu_chisq(self):
],
]
)
actual = pairwise_pvalues.pairwise_chisq(axis=0)
actual = pairwise_pvalues._pairwise_chisq
np.testing.assert_almost_equal(actual, expected)

def test_same_col_pvals(self):
Expand Down

0 comments on commit 2c464aa

Please sign in to comment.