Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tstats correct for CATxMRxITSELF Cubes #186

Merged
merged 6 commits into from
Dec 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ celerybeat-schedule
.env

# virtualenv
Pipfile
.venv
venv/
venv3/
Expand Down
16 changes: 16 additions & 0 deletions src/cr/cube/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,22 @@ def inflate(self):
self._mask_size,
)

@lazyproperty
def is_mr_by_itself(self):
"""It identify if the cube contains MRxItself as last 2 dimensions.

If the last 2 dimensions in cube (ndim>=3) are MR and they have
the same alias returns True
"""
return (
# ---there are at least three dimensions---
self.ndim >= 3
# ---the last two are both MR---
and all(dim_type == DT.MR for dim_type in self.dimension_types[-2:])
# ---and they both have the same alias---
and len(set([dimension.alias for dimension in self.dimensions[-2:]])) == 1
)

@lazyproperty
def is_weighted(self):
"""True if cube response contains weighted data."""
Expand Down
59 changes: 56 additions & 3 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,8 @@ def factory(
return _Slice(cube, slice_idx, transforms, population, mask_size)

@lazyproperty
def variable_name(self):
"""str representing the name of the superheading variable."""
return self._dimensions[0 if self.ndim < 2 else 1].name
def cube_is_mr_by_itself(self):
return False

@lazyproperty
def dimension_types(self):
Expand Down Expand Up @@ -97,6 +96,11 @@ def shape(self):
"must be implemented by each subclass"
) # pragma: no cover

@lazyproperty
def variable_name(self):
"""str representing the name of the superheading variable."""
return self._dimensions[0 if self.ndim < 2 else 1].name


class _Slice(CubePartition):
"""2D cube partition.
Expand Down Expand Up @@ -167,6 +171,10 @@ def columns_margin(self):
def counts(self):
return np.array([row.values for row in self._matrix.rows])

@lazyproperty
def cube_is_mr_by_itself(self):
return self._cube.is_mr_by_itself

@lazyproperty
def description(self):
"""str description of this slice, which it takes from its rows-dimension."""
Expand All @@ -182,6 +190,35 @@ def inserted_column_idxs(self):
def inserted_row_idxs(self):
return tuple(i for i, row in enumerate(self._matrix.rows) if row.is_insertion)

@lazyproperty
def insertions(self):
"""Returns masked array with residuals for insertions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, this makes more sense now — even compress_rowcols would leave this whole thing intact. this mask is fine, but we may want to provide a way to get just rows or just columns. 🤔

What do we serialize in second-order for inf, since json is deficient?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use "Infinity" string for np.inf ... in the next release we can provide a way to provide only rows, only cols or both; also we can change the mask using the compress thing of numpy


0 1 2 3 4 5 6
0 inf inf inf inf inf -2.9 inf
1 inf inf inf inf inf -4.3 inf
2 2.5 1.3 3.3 -0.70 -7.25 -6.52 2.25
3 inf inf inf inf inf -2.51 inf
4 -1.16 2.20 5.84 1.78 -8.48 -5.92 0.93
5 inf inf inf inf inf 9.70 inf

Only the insertions residuals are showed in a inf masked array
"""
inserted_rows = self.inserted_row_idxs
inserted_cols = self.inserted_column_idxs
if not inserted_cols and not inserted_cols:
return []
mask = np.zeros(self.pvals.shape)
mask[inserted_rows, :] = 1
mask[:, inserted_cols] = 1
masked_pvals = np.ma.masked_array(self.pvals, np.logical_not(mask)).filled(
np.inf
)
masked_zscores = np.ma.masked_array(self.zscore, np.logical_not(mask)).filled(
np.inf
)
return np.stack([masked_pvals, masked_zscores])

@lazyproperty
def is_empty(self):
return any(s == 0 for s in self.shape)
Expand All @@ -202,6 +239,10 @@ def name(self):
"""
return self.rows_dimension_name

@lazyproperty
def overlaps_tstats(self):
return self._matrix.overlaps_tstats

@lazyproperty
def pairwise_indices(self):
alpha = self._transforms_dict.get("pairwise_indices", {}).get("alpha", 0.05)
Expand Down Expand Up @@ -245,6 +286,14 @@ def population_counts(self):
def pvals(self):
return np.array([row.pvals for row in self._matrix.rows])

@lazyproperty
def residual_test_stats(self):
"""Exposes pvals and zscore (with HS) stacked together

Public method used as cube_method for the SOA API
"""
return np.stack([self.pvals, self.zscore])

@lazyproperty
def row_base(self):
return np.array([row.base for row in self._matrix.rows])
Expand Down Expand Up @@ -458,6 +507,10 @@ def table_name(self):

title = self._cube.name
table_name = self._cube.dimensions[0].valid_elements[self._slice_idx].label

if self._cube.is_mr_by_itself:
return title

return "%s: %s" % (title, table_name)

@lazyproperty
Expand Down
9 changes: 9 additions & 0 deletions src/cr/cube/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,15 @@ def __init__(self, dimension_dict, dimension_type, dimension_transforms=None):
self._dimension_type = dimension_type
self._dimension_transforms_arg = dimension_transforms

@lazyproperty
def alias(self):
"""Return the alias for the dimension if it exists, None otherwise

This property is needed to identify one of the mandatory condition
for a MRxItself cube.
"""
return self._dimension_dict["references"].get("alias", None)

@lazyproperty
def all_elements(self):
"""_AllElements object providing cats or subvars of this dimension.
Expand Down
Loading