Skip to content

Commit

Permalink
pvals measures for subtotals
Browse files Browse the repository at this point in the history
  • Loading branch information
ernestoarbitrio committed Oct 21, 2019
1 parent 781e038 commit b0c3040
Show file tree
Hide file tree
Showing 3 changed files with 775 additions and 58 deletions.
86 changes: 50 additions & 36 deletions src/cr/cube/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,10 @@ def table_margin(self):
def values(self):
return np.sum(np.array([row.values for row in self._addend_vectors]), axis=0)

@lazyproperty
def _pvals(self):
return 2 * (1 - norm.cdf(np.abs(self._zscore)))

@lazyproperty
def _zscore(self):
variance = (
Expand Down Expand Up @@ -979,6 +983,14 @@ def pruned(self):
np.array([row.base for row in self._matrix.rows])
)

@lazyproperty
def pvals(self):
return self._pvals

@lazyproperty
def zscore(self):
return self._zscore

@lazyproperty
def _addend_vectors(self):
return tuple(
Expand All @@ -999,10 +1011,6 @@ def _expected_counts(self):
def _opposite_margins(self):
return self._addend_vectors[0].opposite_margins

@lazyproperty
def zscore(self):
return self._zscore


class _InsertionRow(_BaseMatrixInsertionVector):
"""Represents an inserted (subtotal) row."""
Expand All @@ -1021,7 +1029,7 @@ def pruned(self):

@lazyproperty
def pvals(self):
return np.array([np.nan] * len(self._matrix.columns))
return self._pvals

@lazyproperty
def zscore(self):
Expand Down Expand Up @@ -1141,11 +1149,7 @@ def proportions(self):

@lazyproperty
def pvals(self):
return np.array(
tuple([np.nan] * len(self._top_values))
+ self._interleaved_pvals
+ tuple([np.nan] * len(self._bottom_values))
)
return np.array(self._top_pvals + self._interleaved_pvals + self._bottom_pvals)

@lazyproperty
def table_proportions(self):
Expand Down Expand Up @@ -1178,6 +1182,12 @@ def _bottom_base_values(self):
for col in self._bottom_insertions
)

@lazyproperty
def _bottom_pvals(self):
return tuple(
vector.pvals[self._vector_idx] for vector in self._bottom_insertions
)

@lazyproperty
def _bottom_values(self):
return tuple(
Expand Down Expand Up @@ -1235,7 +1245,7 @@ def _interleaved_pvals(self):
pvals.append(value)
for inserted_vector in self._opposite_inserted_vectors:
if i == inserted_vector.anchor:
pvals.append(np.nan)
pvals.append(inserted_vector.pvals[self._vector_idx])
return tuple(pvals)

@lazyproperty
Expand All @@ -1261,6 +1271,13 @@ def _interleaved_zscore(self):
zscore.append(inserted_vector.zscore[self._vector_idx])
return tuple(zscore)

@lazyproperty
def _top_base_values(self):
return tuple(
np.sum(self._base_vector.base_values[col.addend_idxs])
for col in self._top_insertions
)

@lazyproperty
def _top_insertions(self):
return tuple(
Expand All @@ -1270,15 +1287,8 @@ def _top_insertions(self):
)

@lazyproperty
def _top_zscores(self):
return tuple(vector.zscore[self._vector_idx] for vector in self._top_insertions)

@lazyproperty
def _top_base_values(self):
return tuple(
np.sum(self._base_vector.base_values[col.addend_idxs])
for col in self._top_insertions
)
def _top_pvals(self):
return tuple(vector.pvals[self._vector_idx] for vector in self._top_insertions)

@lazyproperty
def _top_values(self):
Expand All @@ -1287,6 +1297,10 @@ def _top_values(self):
for col in self._top_insertions
)

@lazyproperty
def _top_zscores(self):
return tuple(vector.zscore[self._vector_idx] for vector in self._top_insertions)


class _VectorAfterHiding(_BaseTransformationVector):
"""Reflects a row or column with hidden elements removed."""
Expand Down Expand Up @@ -1379,14 +1393,6 @@ def column_index(self):
def label(self):
return self._base_vector.label

@lazyproperty
def _opposing_order(self):
return (
slice(None)
if self._opposing_order_arg is None
else self._opposing_order_arg
)

@lazyproperty
def pvals(self):
return self._base_vector.pvals
Expand All @@ -1399,6 +1405,14 @@ def values(self):
def zscore(self):
return self._base_vector.zscore

@lazyproperty
def _opposing_order(self):
return (
slice(None)
if self._opposing_order_arg is None
else self._opposing_order_arg
)


# ===OPERAND VECTORS===

Expand Down Expand Up @@ -1513,14 +1527,6 @@ def table_margin(self):
def values(self):
return self._counts

@lazyproperty
def _residuals(self):
return self.values - self._expected_counts

@lazyproperty
def _expected_counts(self):
return self._opposite_margins * self.margin / self.table_margin

@lazyproperty
def zscore(self):
variance = (
Expand All @@ -1535,6 +1541,14 @@ def zscore(self):

return self._residuals / np.sqrt(variance)

@lazyproperty
def _residuals(self):
return self.values - self._expected_counts

@lazyproperty
def _expected_counts(self):
return self._opposite_margins * self.margin / self.table_margin


class _CatXMrVector(_CategoricalVector):
"""Used for categorical dimension when opposing dimension is multiple-response."""
Expand Down
12 changes: 6 additions & 6 deletions tests/integration/test_cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ def it_calculates_various_measures(self):
np.testing.assert_almost_equal(
slice_.pvals,
[
[0.03851757, np.nan, 0.54097586, 0.21071341, np.nan, 0.23299113],
[0.03851757, 0.0922145, 0.54097586, 0.21071341, np.nan, 0.23299113],
[0.04198008, 0.0922145, 0.50508577, 0.28105398, np.nan, 0.1797169],
[0.73113976, 0.41072494, 0.28019785, 0.32642279, np.nan, 0.79310382],
[0.36684711, 0.29203707, 0.98652895, 0.85178994, np.nan, 0.00305394],
[0.06398878, 0.47430453, 0.21130996, 0.26884987, np.nan, 0.4212984],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[0.73113976, np.nan, 0.28019785, 0.32642279, np.nan, 0.79310382],
[0.36684711, np.nan, 0.98652895, 0.85178994, np.nan, 0.00305394],
[0.06398878, np.nan, 0.21130996, 0.26884987, np.nan, 0.4212984],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[0.82020207, np.nan, 0.91486794, 0.58880283, np.nan, 0.75048675],
[0.82020207, 0.70318269, 0.91486794, 0.58880283, np.nan, 0.75048675],
],
)

Expand Down
Loading

0 comments on commit b0c3040

Please sign in to comment.