Skip to content

Commit

Permalink
spike: MR stripe overlaps
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Mar 15, 2021
1 parent dbbc76a commit 0e3fe8e
Show file tree
Hide file tree
Showing 9 changed files with 439 additions and 1 deletion.
64 changes: 64 additions & 0 deletions src/cr/cube/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,15 @@ def n_responses(self):
"""Total (int) number of responses considered."""
return self._cube_response["result"].get("n", 0)

@lazyproperty
def overlaps(self):
"""np.ndarray of int if the `cube_overlaps` measure exists, None otherwise."""
if self._measures.overlaps is None:
return None
return self._measures.overlaps.raw_cube_array[self._valid_idxs].astype(
np.float64
)

@lazyproperty
def partitions(self):
"""Sequence of _Slice, _Strand, or _Nub objects from this cube-result."""
Expand Down Expand Up @@ -632,6 +641,17 @@ def missing_count(self):
return self.sums.missing_count
return self._cube_dict["result"].get("missing", 0)

@lazyproperty
def overlaps(self):
"""Optional _OverlapMeasure object providing access to overlaps values.
Will be None if no overlaps are available on the cube result.
"""
overlap = _OverlapMeasure(
self._cube_dict, self._all_dimensions, self._cube_idx_arg
)
return None if overlap.raw_cube_array is None else overlap

@lazyproperty
def population_fraction(self):
"""The filtered/unfiltered ratio for cube response.
Expand Down Expand Up @@ -773,6 +793,50 @@ def _flat_values(self):
).flatten()


class _OverlapMeasure(_BaseMeasure):
"""Statistical overlap values from a cube-response."""

@lazyproperty
def missing_count(self):
"""Numeric value representing count of missing rows in response."""
return self._cube_dict["result"]["measures"]["overlap"].get("n_missing", 0)

@lazyproperty
def _measure_payload(self):
return self._cube_dict["result"].get("measures", {}).get("overlap")

@lazyproperty
def _flat_values(self):
"""Optional 1D np.ndarray of np.float64 overlap values as found in cube response.
Overlap data may include missing items represented by a dict like
{'?': -1} in the cube response. These are replaced by np.nan in the
returned value.
"""
if self._measure_payload is None:
return None
return np.array(
tuple(
np.nan if type(x) is dict else x for x in self._measure_payload["data"]
),
dtype=np.float64,
).flatten()

@lazyproperty
def _shape(self):
n_subvars = len(self._measure_payload["metadata"]["type"]["subvariables"])
return self._all_dimensions.shape + (n_subvars,)

@lazyproperty
def raw_cube_array(self):
if self._flat_values is None:
return None
raw_cube_array = self._flat_values.reshape(self._shape)
# ---must be read-only to avoid hard-to-find bugs---
raw_cube_array.flags.writeable = False
return raw_cube_array


class _SumMeasure(_BaseMeasure):
"""Statistical sum values from a cube-response."""

Expand Down
14 changes: 14 additions & 0 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -1288,6 +1288,20 @@ def min_base_size_mask(self):
def name(self):
return self.rows_dimension_name

@lazyproperty
def overlaps(self):
"""1D np.float64 ndarray of MR cube overlaps for each row of strand.
Raises ValueError when accessed on a cube-result that does not contain
an overlaps cube-measure.
"""
# try:
return self._assembler.overlaps
# except ValueError:
# raise ValueError(
# "`.overlaps` is undefined for a cube-result without a `overlap` measure"
# )

@lazyproperty
def population_counts(self):
"""1D np.float64 ndarray of population count for each row of strand.
Expand Down
1 change: 1 addition & 0 deletions src/cr/cube/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class CUBE_MEASURE(enum.Enum):

COUNT = "count"
MEAN = "mean"
OVERLAP = "overlap"
STDDEV = "stddev"
SUM = "sum"
VALID_COUNT_UNWEIGHTED = "valid_count_unweighted"
Expand Down
12 changes: 12 additions & 0 deletions src/cr/cube/stripe/assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ def means(self):
"""
return self._assemble_vector(self._measures.means.blocks)

@lazyproperty
def overlaps(self):
"""1D np.float64 ndarray of MR cube overlaps for each row.
Raises ValueError when the cube-result does not include
an overlap cube-measure.
"""
# We're never going to have insertions on MRs, because they're handled directly
# in the zz9, and come as part of the data (i.e. they're not inserted inside
# the `cr.cube` library). Therefore we don't use blocks here.
return self._measures.overlaps.base_values

@lazyproperty
def row_count(self):
"""int count of rows in this stripe.
Expand Down
53 changes: 53 additions & 0 deletions src/cr/cube/stripe/cubemeasure.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ def cube_means(self):
"""_BaseCubeMeans subclass object for this stripe."""
return _BaseCubeMeans.factory(self._cube, self._rows_dimension)

@lazyproperty
def cube_overlaps(self):
"""_BaseCubeOverlaps subclass object for this stripe."""
return _BaseCubeOverlaps.factory(self._cube, self._rows_dimension)

@lazyproperty
def cube_sum(self):
"""_BaseCubeSum subclass object for this stripe."""
Expand Down Expand Up @@ -83,6 +88,33 @@ def means(self):
) # pragma: no cover


class _BaseCubeOverlaps(_BaseCubeMeasure):
"""Base class for overlaps cube-measure variants."""

def __init__(self, rows_dimension, overlaps):
super(_BaseCubeOverlaps, self).__init__(rows_dimension)
self._overlaps = overlaps

@classmethod
def factory(cls, cube, rows_dimension):
"""Return _BaseCubeOverlaps subclass instance appropriate to `cube`."""
if cube.overlaps is None:
raise ValueError("cube-result does not contain cube-overlaps measure")
OverlapsCls = (
_MrCubeOverlaps
if rows_dimension.dimension_type == DT.MR
else _CatCubeOverlaps
)
return OverlapsCls(rows_dimension, cube.overlaps)

@lazyproperty
def overlaps(self):
"""1D np.float64 ndarray of overlap for each stripe row."""
raise NotImplementedError(
"`%s` must implement `.overlaps`" % type(self).__name__
) # pragma: no cover


class _CatCubeMeans(_BaseCubeMeans):
"""Means cube-measure for a non-MR stripe."""

Expand All @@ -92,6 +124,15 @@ def means(self):
return self._means


class _CatCubeOverlaps(_BaseCubeOverlaps):
"""Overlaps cube-measure for a non-MR stripe."""

@lazyproperty
def overlaps(self):
"""1D np.float64 ndarray of cube overlaps for each stripe row."""
return self._means


class _MrCubeMeans(_BaseCubeMeans):
"""Means cube-measure for an MR stripe.
Expand All @@ -104,6 +145,18 @@ def means(self):
return self._means[:, 0]


class _MrCubeOverlaps(_BaseCubeOverlaps):
"""Overlaps cube-measure for an MR stripe.
Its `.overlaps` is a 2D ndarray with axes (rows, sel/not).
"""

@lazyproperty
def overlaps(self):
"""1D np.float64 ndarray of overlaps for each stripe row."""
return self._overlaps[:, 0]


# === SUM ===


Expand Down
22 changes: 22 additions & 0 deletions src/cr/cube/stripe/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ def means(self):
"""_Means measure object for this stripe."""
return _Means(self._rows_dimension, self, self._cube_measures)

@lazyproperty
def overlaps(self):
"""_Overlaps measure object for this stripe."""
return _Overlaps(self._rows_dimension, self, self._cube_measures)

@lazyproperty
def pruning_base(self):
"""1D np.float64 ndarray of unweighted-N for each stripe row."""
Expand Down Expand Up @@ -187,6 +192,23 @@ def subtotal_values(self):
return NanSubtotals.subtotal_values(self.base_values, self._rows_dimension)


class _Overlaps(_BaseSecondOrderMeasure):
"""Provides the MR cube overlaps measure for a stripe.
Relies on the presence of the overlaps cube-measure in the cube-result.
"""

# We will only ever need base values here, and not the subtotals. This is so
# because the only variable that can have overlaps is MR, and we don't have
# cr.cube insertions on the MR variables (we have zz9 insertions instead, which
# are going to be automatically included, since they're treated as data by cr.cube).

@lazyproperty
def base_values(self):
"""1D np.float64 ndarray of mean for each row."""
return self._cube_measures.cube_overlaps.overlaps


class _ScaledCounts(_BaseSecondOrderMeasure):
"""Provides access to table-totals related to numeric-values/scaled-counts."""

Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,6 @@ def _load_to_cache(self, fixture_name):

CR = LazyCubeResponseLoader(".") # ---mnemonic: CR = 'cube-response'---
NA = LazyCubeResponseLoader("./numeric_arrays")
OL = LazyCubeResponseLoader("./overlaps")
SM = LazyCubeResponseLoader("./scale_means")
TR = LazyCubeResponseLoader("./transforms")
Loading

0 comments on commit 0e3fe8e

Please sign in to comment.