Skip to content

Commit

Permalink
Merge 9a03e6f into adbab45
Browse files Browse the repository at this point in the history
  • Loading branch information
ernestoarbitrio committed Apr 20, 2021
2 parents adbab45 + 9a03e6f commit 671a42b
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 104 deletions.
33 changes: 7 additions & 26 deletions src/cr/cube/matrix/assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,30 +191,11 @@ def pairwise_means_indices(self, alpha, only_larger):
Raises `ValueError if the cube-result does not include `means` cube-measures.
"""

def pairwise_indices(p_vals, t_stats):
"""1D ndarray of tuples of int pairwise indices of each column."""
significance = p_vals < alpha
if only_larger:
significance = np.logical_and(t_stats < 0, significance)
col_signif = np.empty((len(significance),), dtype=object)
col_signif[:] = [tuple(np.where(sig_row)[0]) for sig_row in significance]
return col_signif

t_stats = [
self.pairwise_significance_means_t_stats(col)
for col in range(len(self._column_order))
]
p_vals = [
self.pairwise_significance_means_p_vals(col)
for col in range(len(self._column_order))
]
indices = np.array([pairwise_indices(p, t) for p, t in zip(p_vals, t_stats)]).T
# --- a None value indicates "cannot calculate", which is distinct from
# --- () that means "not significance"
for idx in self.inserted_column_idxs:
indices[:, idx] = None
return indices
return self._assemble_matrix(
self._measures.pairwise_means_indices(
self._column_order, alpha, only_larger
).blocks
)

def pairwise_significance_p_vals(self, subvar_idx):
"""2D optional np.float64 ndarray of overlaps-p_vals matrices for subvar idx.
Expand Down Expand Up @@ -243,7 +224,7 @@ def pairwise_significance_means_p_vals(self, column_idx):
"""
return self._assemble_matrix(
self._measures.pairwise_significance_means_p_vals(
column_idx, self.inserted_column_idxs
column_idx, self._column_order
).blocks
)

Expand All @@ -254,7 +235,7 @@ def pairwise_significance_means_t_stats(self, column_idx):
"""
return self._assemble_matrix(
self._measures.pairwise_significance_means_t_stats(
column_idx, self.inserted_column_idxs
column_idx, self._column_order
).blocks
)

Expand Down
227 changes: 158 additions & 69 deletions src/cr/cube/matrix/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from scipy.stats import t

from cr.cube.matrix.cubemeasure import CubeMeasures
from cr.cube.matrix.subtotals import SumSubtotals, NanSubtotals
from cr.cube.matrix.subtotals import SumSubtotals, NanSubtotals, NoneSubtotals
from cr.cube.util import lazyproperty


Expand Down Expand Up @@ -62,6 +62,17 @@ def pairwise_indices(self, alpha, only_larger):
self._dimensions, self, self._cube_measures, alpha, only_larger
)

def pairwise_means_indices(self, column_order, alpha, only_larger):
"""_PairwiseIndices measure object for this cube-result"""
return _PairwiseMeansIndices(
self._dimensions,
self,
self._cube_measures,
column_order,
alpha,
only_larger,
)

def pairwise_p_vals_for_subvar(self, subvar_idx):
"""_PairwiseSigPVals measure object for this cube-result and selected subvar"""
return _PairwiseSigPVals(
Expand All @@ -74,7 +85,7 @@ def pairwise_t_stats_for_subvar(self, subvar_idx):
self._dimensions, self, self._cube_measures, subvar_idx
)

def pairwise_significance_means_p_vals(self, column_idx, inserted_col_idxs):
def pairwise_significance_means_p_vals(self, column_idx, column_order):
"""_PairwiseMeansSigPVals measure object for this cube-result.
The `column_idx` is the reference column on which calculate the pairwise sig
Expand All @@ -86,17 +97,17 @@ def pairwise_significance_means_p_vals(self, column_idx, inserted_col_idxs):
self,
self._cube_measures,
column_idx,
inserted_col_idxs,
column_order,
)

def pairwise_significance_means_t_stats(self, column_idx, inserted_col_idxs):
def pairwise_significance_means_t_stats(self, column_idx, column_order):
"""_PairwiseMeansSigTStats measure object for this cube-result."""
return _PairwiseMeansSigTStats(
self._dimensions,
self,
self._cube_measures,
column_idx,
inserted_col_idxs,
column_order,
)

@lazyproperty
Expand Down Expand Up @@ -486,6 +497,138 @@ def blocks(self):
)


class _PairwiseIndices(_BaseSecondOrderMeasure):
"""Provides pairwise significance indices measure for matrix."""

def __init__(
self, dimensions, second_order_measures, cube_measures, alpha, only_larger
):
super(_PairwiseIndices, self).__init__(
dimensions, second_order_measures, cube_measures
)
self._alpha = alpha
self._only_larger = only_larger

@lazyproperty
def blocks(self):
"""2D array of the four 2D "blocks" making up this measure."""
pairwise_indices = np.array(
[self._pairwise_indices(v.p_vals, v.t_stats) for v in self._values]
).T
return NanSubtotals.blocks(pairwise_indices, self._dimensions)

def _pairwise_indices(self, p_vals, t_stats):
"""1D ndarray containing tuples of int pairwise indices of each column."""
significance = p_vals < self._alpha
if self._only_larger:
significance = np.logical_and(t_stats < 0, significance)
col_significance = np.empty((len(significance),), dtype=object)
col_significance[:] = [tuple(np.where(sig_row)[0]) for sig_row in significance]
return col_significance

@lazyproperty
def _values(self):
"""list of _PairwiseSigPVals tests objects.
Result has as many elements as there are columns in the slice. Each
significance test contains `p_vals` and `t_stats` significance tests.
"""
return [
_PairwiseSigPVals(
self._dimensions,
self._second_order_measures,
self._cube_measures,
col_idx,
)
for col_idx in range(self._cube_measures.cube_overlaps.overlaps.shape[1])
]


class _PairwiseMeansIndices(_BaseSecondOrderMeasure):
def __init__(
self,
dimensions,
second_order_measures,
cube_measures,
column_order,
alpha,
only_larger,
):
super(_PairwiseMeansIndices, self).__init__(
dimensions, second_order_measures, cube_measures
)
self._column_order = column_order
self._alpha = alpha
self._only_larger = only_larger

@lazyproperty
def blocks(self):
"""2D array of the four 2D "blocks" making up this measure."""
pairwise_indices = np.array(
[self._pairwise_indices(v.p_vals, v.t_stats) for v in self._values]
).T
return NoneSubtotals.blocks(pairwise_indices, self._dimensions)

def _pairwise_indices(self, p_vals, t_stats):
"""1D ndarray containing tuples of int pairwise indices of each column."""

def _map_idxs(pairwise_idxs):
"""Return tuple of `pairwise-idxs` adjusted to `col_order`.
If the indices without insertions are:
[
[(), (4,), (), (), ()],
[(), (), (), (), ()],
[(), (0, 2, 4), (), (0, 2, 4), ()],
[(), (), (), (), ()],
]
with 4 insertions in the positions (0,3,5,8), they will be:
[
[(), (7,), (), (), ()],
[(), (), (), (), ()],
[(), (1, 4, 7), (), (1, 4, 7), ()],
[(), (), (), (), ()],
]
"""
column_order = self._column_order
mapped_idxs = [None] * len(column_order)

for sort_idx, item_idx in enumerate(column_order):
mapped_idxs[item_idx] = sort_idx

return tuple(
mapped_idxs[pairwise_idx]
for pairwise_idx in tuple(pairwise_idxs)
if mapped_idxs[pairwise_idx] is not None
)

significance = p_vals < self._alpha
if self._only_larger:
significance = np.logical_and(t_stats < 0, significance)
col_significance = np.empty((len(significance),), dtype=object)
col_significance[:] = [_map_idxs(np.where(row)[0]) for row in significance]
return col_significance

@lazyproperty
def _values(self):
"""list of _PairwiseSigPVals tests objects.
Result has as many elements as there are columns in the slice. Each
significance test contains `p_vals` and `t_stats` significance tests.
"""
return [
_PairwiseMeansSigPVals(
self._dimensions,
self._second_order_measures,
self._cube_measures,
col_idx,
self._column_order,
)
for col_idx, col_order in enumerate(self._column_order)
if col_order >= 0
]


class _PairwiseSigTStats(_BaseSecondOrderMeasure):
"""Provides pairwise significance t-stats measure for matrix and selected subvar.
Expand Down Expand Up @@ -594,13 +737,13 @@ def __init__(
second_order_measures,
cube_measures,
selected_column_idx,
inserted_col_idxs,
column_order,
):
super(_PairwiseMeansSigTStats, self).__init__(
dimensions, second_order_measures, cube_measures
)
self._selected_column_idx = selected_column_idx
self._inserted_col_idxs = inserted_col_idxs
self._column_order = column_order

@lazyproperty
def blocks(self):
Expand All @@ -612,13 +755,6 @@ def t_stats(self):
"""2D float64 ndarray of means t-stats significance for the selected column."""
return self._t_stats[self._selected_column_idx]

@lazyproperty
def _n_cols(self):
"""integer number of columns including subtotals."""
return self._cube_measures.cube_means.means.shape[-1] + len(
self._inserted_col_idxs
)

@lazyproperty
def _t_stats(self):
"""List of 2D float64 ndarrays representing t-stats for means pairwise testing.
Expand All @@ -633,14 +769,14 @@ def _t_stats(self):
col_bases = self._cube_measures.unweighted_cube_counts.column_bases
t_stats = []
offset = 0
for col in range(self._n_cols):
if col in self._inserted_col_idxs:
for col_idx, col_order in enumerate(self._column_order):
if col_order < 0:
offset += 1
t_stats.append(np.full(means.shape, np.nan))
else:
combined_variance = variance[:, col - offset] + variance.T
diff = means.T - means[:, col - offset]
n = col_bases[:, col - offset] + col_bases.T
combined_variance = variance[:, col_idx - offset] + variance.T
diff = means.T - means[:, col_idx - offset]
n = col_bases[:, col_idx - offset] + col_bases.T
t_stats.append(diff.T * np.sqrt(n.T / combined_variance.T))
return t_stats

Expand Down Expand Up @@ -668,64 +804,17 @@ def _p_vals(self):
col_bases = self._cube_measures.unweighted_cube_counts.column_bases
p_vals = []
offset = 0
for col in range(self._n_cols):
if col in self._inserted_col_idxs:
for col_idx, col_order in enumerate(self._column_order):
if col_order < 0:
offset += 1
p_vals.append(np.full(t_stats.shape, np.nan))
else:
n = col_bases[:, col - offset] + col_bases.T
n = col_bases[:, col_idx - offset] + col_bases.T
df = 2 * (n - 1)
p_vals.append(2 * (1 - t.cdf(abs(t_stats), df=df.T)))
return p_vals


class _PairwiseIndices(_BaseSecondOrderMeasure):
"""Provides pairwise significance indices measure for matrix."""

def __init__(
self, dimensions, second_order_measures, cube_measures, alpha, only_larger
):
super(_PairwiseIndices, self).__init__(
dimensions, second_order_measures, cube_measures
)
self._alpha = alpha
self._only_larger = only_larger

@lazyproperty
def blocks(self):
"""2D array of the four 2D "blocks" making up this measure."""
pairwise_indices = np.array(
[self._pairwise_indices(v.p_vals, v.t_stats) for v in self._values]
).T
return NanSubtotals.blocks(pairwise_indices, self._dimensions)

def _pairwise_indices(self, p_vals, t_stats):
"""1D ndarray containing tuples of int pairwise indices of each column."""
significance = p_vals < self._alpha
if self._only_larger:
significance = np.logical_and(t_stats < 0, significance)
col_significance = np.empty((len(significance),), dtype=object)
col_significance[:] = [tuple(np.where(sig_row)[0]) for sig_row in significance]
return col_significance

@lazyproperty
def _values(self):
"""list of _PairwiseSigPVals tests objects.
Result has as many elements as there are columns in the slice. Each
significance test contains `p_vals` and `t_stats` significance tests.
"""
return [
_PairwiseSigPVals(
self._dimensions,
self._second_order_measures,
self._cube_measures,
col_idx,
)
for col_idx in range(self._cube_measures.cube_overlaps.overlaps.shape[1])
]


class _RowProportions(_BaseSecondOrderMeasure):
"""Provides the row-proportions measure for a matrix.
Expand Down
12 changes: 9 additions & 3 deletions src/cr/cube/matrix/subtotals.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,17 +136,23 @@ class NanSubtotals(_BaseSubtotals):
Each subtotal value (and intersection value) is `np.nan`.
"""

filler = np.nan

def _intersection(self, row_subtotal, column_subtotal):
"""Unconditionally return np.nan for each intersection cell."""
return np.nan
return self.filler

def _subtotal_column(self, subtotal):
"""Return (n_rows,) ndarray of np.nan values."""
return np.full(self._nrows, np.nan)
return np.full(self._nrows, self.filler)

def _subtotal_row(self, subtotal):
"""Return (n_cols,) ndarray of np.nan values."""
return np.full(self._ncols, np.nan)
return np.full(self._ncols, self.filler)


class NoneSubtotals(NanSubtotals):
filler = None


class SumSubtotals(_BaseSubtotals):
Expand Down
Loading

0 comments on commit 671a42b

Please sign in to comment.