Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Two level pairwise t #204

Merged
merged 19 commits into from
Jun 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/cr/cube/cube_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from cr.cube.legacy_min_base_size_mask import MinBaseSizeMask
from cr.cube.measures.scale_means import ScaleMeans
from cr.cube.measures.wishart_pairwise_significance import WishartPairwiseSignificance
from cr.cube.measures.pairwise_significance import PairwiseSignificance
from cr.cube.measures.old_pairwise_significance import PairwiseSignificance
from cr.cube.util import (
compress_pruned,
lazyproperty,
Expand Down
186 changes: 149 additions & 37 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from cr.cube.enum import DIMENSION_TYPE as DT
from cr.cube.min_base_size_mask import MinBaseSizeMask
from cr.cube.measures.new_pairwise_significance import NewPairwiseSignificance
from cr.cube.measures.pairwise_significance import PairwiseSignificance
from cr.cube.matrix import TransformedMatrix
from cr.cube.scalar import MeansScalar
from cr.cube.stripe import TransformedStripe
Expand All @@ -34,8 +34,9 @@ class CubePartition(object):
These represent 2, 1, or 0 dimensions of a cube, respectively.
"""

def __init__(self, cube):
def __init__(self, cube, transforms=None):
self._cube = cube
self._transforms_arg = transforms

@classmethod
def factory(
Expand Down Expand Up @@ -114,13 +115,97 @@ def variable_name(self):
"""str representing the name of the superheading variable."""
return self._dimensions[0 if self.ndim < 2 else 1].name

@lazyproperty
def _alpha(self):
"""float confidence-interval threshold for pairwise-t (sig) tests."""
return self._alpha_values[0]

@lazyproperty
def _alpha_alt(self):
"""Alternate float confidence-interval threshold or None.

This is an optional secondary confidence interval allowing two-level
significance testing. Value is None if no alternate alpha was specified by user.
"""
return self._alpha_values[1]

@lazyproperty
def _alpha_values(self):
"""Pair (tuple) of confidence-interval thresholds to be used for t-tests.

The second value is optional and is None when no secondary alpha value was
defined for the cube-set.
"""
value = self._transforms_dict.get("pairwise_indices", {}).get("alpha")

# --- handle omitted, None, [], (), {}, "", 0, and 0.0 cases ---
if not value:
return (0.05, None)
ernestoarbitrio marked this conversation as resolved.
Show resolved Hide resolved

# --- reject invalid types ---
if not isinstance(value, (float, list, tuple)):
raise TypeError(
"transforms.pairwise_indices.alpha, when defined, must be a list of 1 "
"or 2 float values between 0.0 and 1.0 exclusive. Got %r" % value
)

# --- legacy float "by-itself" case ---
if isinstance(value, float):
if not 0.0 < value < 1.0:
raise ValueError(
"alpha value, when provided, must be between 0.0 and 1.0 "
"exclusive. Got %r" % value
)
return (value, None)

# --- sequence case ---
for x in value[:2]:
if not isinstance(x, float) or not 0.0 < x < 1.0:
raise ValueError(
"transforms.pairwise_indices.alpha must be a list of 1 or 2 float "
"values between 0.0 and 1.0 exclusive. Got %r" % value
)

if len(value) == 1:
return (value[0], None)

return tuple(sorted(value[:2]))

@lazyproperty
def _dimensions(self):
"""tuple of Dimension object for each dimension in cube-partition."""
raise NotImplementedError(
"must be implemented by each subclass"
) # pragma: no cover

@lazyproperty
def _only_larger(self):
"""True if only the larger of reciprocal pairwise-t values should appear.

In general, pairwise-t tests are reciprocal. That is, if A is significant with
respect to B, then B is significant with respect to A. Having a letter in both
columns can produce a cluttered appearance. When this flag is set by the user,
only the cell in the reciprocal pair having the largest value gets a letter.
Defaults to True unless explicitly set False.
"""
return (
False
if self._transforms_dict.get("pairwise_indices", {}).get(
"only_larger", True
)
is False
else True
)

@lazyproperty
def _transforms_dict(self):
"""dict holding transforms for this partition, provided as `transforms` arg.

This value is an empty dict (`{}`) when no transforms were specified on
construction.
"""
return {} if self._transforms_arg is None else self._transforms_arg


class _Slice(CubePartition):
"""2D cube partition.
Expand All @@ -131,9 +216,8 @@ class _Slice(CubePartition):
"""

def __init__(self, cube, slice_idx, transforms, population, mask_size):
super(_Slice, self).__init__(cube)
super(_Slice, self).__init__(cube, transforms)
self._slice_idx = slice_idx
self._transforms_arg = transforms
self._population = population
self._mask_size = mask_size

Expand Down Expand Up @@ -279,13 +363,37 @@ def overlaps_tstats(self):

@lazyproperty
def pairwise_indices(self):
alpha = self._transforms_dict.get("pairwise_indices", {}).get("alpha", 0.05)
only_larger = self._transforms_dict.get("pairwise_indices", {}).get(
"only_larger", True
"""2D ndarray of tuple of int column-idxs meeting pairwise-t threshold.

Like::

[
[(1, 3, 4), (), (0,), (), ()],
[(2,), (1, 2), (), (), (0, 3)],
[(), (), (), (), ()],
]

Has the same shape as `.counts`. Each int represents the offset of another
column in the same row with a confidence interval meeting the threshold defined
for this analysis.
"""
return PairwiseSignificance.pairwise_indices(
self, self._alpha, self._only_larger
)

@lazyproperty
def pairwise_indices_alt(self):
"""2D ndarray of tuple of int column-idxs meeting alternate threshold.

This value is None if no alternate threshold has been defined.
"""
return (
None
if self._alpha_alt is None
else PairwiseSignificance.pairwise_indices(
self, self._alpha_alt, self._only_larger
)
)
return NewPairwiseSignificance(
self, alpha=alpha, only_larger=only_larger
).pairwise_indices

@lazyproperty
def pairwise_significance_tests(self):
Expand All @@ -296,7 +404,7 @@ def pairwise_significance_tests(self):
probability values and statistical scores).
"""
return tuple(
NewPairwiseSignificance(self).values[column_idx]
PairwiseSignificance(self).values[column_idx]
for column_idx in range(len(self._matrix.columns))
)

Expand Down Expand Up @@ -375,13 +483,33 @@ def rows_margin(self):

@lazyproperty
def scale_mean_pairwise_indices(self):
alpha = self._transforms_dict.get("pairwise_indices", {}).get("alpha", 0.05)
only_larger = self._transforms_dict.get("pairwise_indices", {}).get(
"only_larger", True
"""Sequence of column-idx tuples indicating pairwise-t result of scale-means.

The calculation is based on the mean of the scale (category numeric-values) for
each column. The length of the array is that of the columns-dimension.
"""
return tuple(
PairwiseSignificance.scale_mean_pairwise_indices(
self, self._alpha, self._only_larger
).tolist()
)

@lazyproperty
def scale_mean_pairwise_indices_alt(self):
"""Sequence of column-idx tuples indicating pairwise-t result of scale-means.

Same calculation as `.scale_mean_pairwise_indices` using the `._alpha_alt`
value. None when no secondary alpha value was specified. The length of the
sequence is that of the columns-dimension.
"""
if self._alpha_alt is None:
return None

return tuple(
PairwiseSignificance.scale_mean_pairwise_indices(
self, self._alpha_alt, self._only_larger
).tolist()
)
return NewPairwiseSignificance(
self, alpha=alpha, only_larger=only_larger
).scale_mean_pairwise_indices

@lazyproperty
def scale_means_column(self):
Expand Down Expand Up @@ -540,12 +668,8 @@ def shape(self):

@lazyproperty
def summary_pairwise_indices(self):
alpha = self._transforms_dict.get("pairwise_indices", {}).get("alpha", 0.05)
only_larger = self._transforms_dict.get("pairwise_indices", {}).get(
"only_larger", True
)
return NewPairwiseSignificance(
self, alpha=alpha, only_larger=only_larger
return PairwiseSignificance(
self, self._alpha, self._only_larger
).summary_pairwise_indices

@lazyproperty
Expand Down Expand Up @@ -735,16 +859,6 @@ def _transform_dicts(self):
self._transforms_dict.get("columns_dimension", {}),
)

@lazyproperty
def _transforms_dict(self):
"""dict containing all transforms for this slice, provided as `transforms` arg.


This value is an empty dict (`{}`) when no transforms were specified on
construction.
"""
return self._transforms_arg if self._transforms_arg is not None else {}


class _Strand(CubePartition):
"""1D cube-partition.
Expand All @@ -754,8 +868,7 @@ class _Strand(CubePartition):
"""

def __init__(self, cube, transforms, population, ca_as_0th, slice_idx, mask_size):
super(_Strand, self).__init__(cube)
self._transforms_arg = transforms
super(_Strand, self).__init__(cube, transforms)
self._population = population
self._ca_as_0th = ca_as_0th
self._slice_idx = slice_idx
Expand Down Expand Up @@ -1041,8 +1154,7 @@ def _rows_dimension(self):
@lazyproperty
def _row_transforms_dict(self):
"""Transforms dict for the single (rows) dimension of this strand."""
transforms_dict = {} if self._transforms_arg is None else self._transforms_arg
return transforms_dict.get("rows_dimension", {})
return self._transforms_dict.get("rows_dimension", {})

@lazyproperty
def _stripe(self):
Expand Down
Loading