Skip to content

Commit

Permalink
Merge a72fb0b into 722c4ae
Browse files Browse the repository at this point in the history
  • Loading branch information
ernestoarbitrio committed Apr 20, 2021
2 parents 722c4ae + a72fb0b commit a698ee3
Show file tree
Hide file tree
Showing 23 changed files with 5,816 additions and 362 deletions.
78 changes: 61 additions & 17 deletions src/cr/cube/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ def counts_with_missings(self):
categories.
"""
return (
self._measures.valid_counts.raw_cube_array
if self._measures.valid_counts is not None
self._measures.unweighted_valid_counts.raw_cube_array
if self._measures.unweighted_valid_counts is not None
else self._measures.weighted_counts.raw_cube_array
if self.has_weighted_counts
else self._measures.unweighted_counts.raw_cube_array
Expand Down Expand Up @@ -441,22 +441,32 @@ def unweighted_counts(self):
with the valid counts measure.
"""
unweighted_counts = (
self._measures.valid_counts
if self._measures.valid_counts is not None
self._measures.unweighted_valid_counts
if self._measures.unweighted_valid_counts is not None
else self._measures.unweighted_counts
)
return unweighted_counts.raw_cube_array[self._valid_idxs]

@lazyproperty
def unweighted_valid_counts(self):
"""Optional float64 ndarray of unweighted_valid_counts if the measure exists."""
if self._measures.unweighted_valid_counts is None:
return None
return self._measures.unweighted_valid_counts.raw_cube_array[
self._valid_idxs
].astype(np.float64)

@lazyproperty
def valid_counts_summary(self):
"""Optional ndarray of summary valid counts"""
if not self._measures.valid_counts:
if not self._measures.unweighted_valid_counts:
return None
# --- In case of ndim >= 2 the sum should be done on the second axes to get
# --- the correct sequence of valid count (e.g. CA_SUBVAR).
axis = 1 if len(self._all_dimensions) >= 2 else 0
return np.sum(
self._measures.valid_counts.raw_cube_array[self._valid_idxs], axis=axis
self._measures.unweighted_valid_counts.raw_cube_array[self._valid_idxs],
axis=axis,
)

@lazyproperty
Expand All @@ -480,8 +490,8 @@ def weighted_counts(self):
are replaced with the valid counts measure.
"""
weighted_counts = (
self._measures.valid_counts
if self._measures.valid_counts is not None
self._measures.weighted_valid_counts
if self._measures.weighted_valid_counts is not None
else self._measures.weighted_counts
)
return (
Expand All @@ -490,6 +500,15 @@ def weighted_counts(self):
else None
)

@lazyproperty
def weighted_valid_counts(self):
"""Optional float64 ndarray of weighted_valid_counts if the measure exists."""
if self._measures.weighted_valid_counts is None:
return None
return self._measures.weighted_valid_counts.raw_cube_array[
self._valid_idxs
].astype(np.float64)

@lazyproperty
def _all_dimensions(self):
"""The AllDimensions object for this cube.
Expand Down Expand Up @@ -679,8 +698,8 @@ def means(self):
@lazyproperty
def missing_count(self):
"""numeric representing count of missing rows in cube response."""
if self.valid_counts is not None:
return self.valid_counts.missing_count
if self.unweighted_valid_counts is not None:
return self.unweighted_valid_counts.missing_count
# The check on the means measure is needed for retro-compatibility with the old
# fixtures that don't have valid_counts.
if self.means is not None:
Expand Down Expand Up @@ -770,12 +789,12 @@ def unweighted_counts(self):
)

@lazyproperty
def valid_counts(self):
"""_ValidCountsMeasure object for this cube.
def unweighted_valid_counts(self):
"""_UnweightedValidCountsMeasure object for this cube.
Can be None when cube doesn't have valid counts.
Can be None when cube doesn't have unweighted valid counts.
"""
valid_counts = _ValidCountsMeasure(
valid_counts = _UnweightedValidCountsMeasure(
self._cube_dict, self._all_dimensions, self._cube_idx_arg
)
return valid_counts if valid_counts.raw_cube_array is not None else None
Expand All @@ -802,6 +821,17 @@ def weighted_counts(self):
)
return weighted_counts if weighted_counts.raw_cube_array is not None else None

@lazyproperty
def weighted_valid_counts(self):
"""_WeightedValidCountsMeasure object for this cube.
Can be None when cube doesn't have weighted valid counts.
"""
valid_counts = _WeightedValidCountsMeasure(
self._cube_dict, self._all_dimensions, self._cube_idx_arg
)
return valid_counts if valid_counts.raw_cube_array is not None else None


class _BaseMeasure(object):
"""Base class for measure objects."""
Expand Down Expand Up @@ -999,8 +1029,8 @@ def _flat_values(self):
return np.array(self._cube_dict["result"]["counts"], dtype=np.float64)


class _ValidCountsMeasure(_BaseMeasure):
"""Valid counts for cube."""
class _UnweightedValidCountsMeasure(_BaseMeasure):
"""Unweighted Valid counts for cube."""

@lazyproperty
def missing_count(self):
Expand All @@ -1011,7 +1041,7 @@ def missing_count(self):

@lazyproperty
def _flat_values(self):
"""Optional 1D np.ndarray of np.float64 valid counts."""
"""Optional 1D np.ndarray of np.float64 unweighted valid counts."""
valid_counts = (
self._cube_dict["result"]["measures"]
.get("valid_count_unweighted", {})
Expand Down Expand Up @@ -1043,3 +1073,17 @@ def _flat_values(self):
return None

return np.array(weighted_counts, dtype=np.float64)


class _WeightedValidCountsMeasure(_BaseMeasure):
"""Weighted Valid counts for cube."""

@lazyproperty
def _flat_values(self):
"""Optional 1D np.ndarray of np.float64 weighted valid counts."""
valid_counts = (
self._cube_dict["result"]["measures"]
.get("valid_count_weighted", {})
.get("data", [])
)
return np.array(valid_counts, dtype=np.float64) if valid_counts else None
50 changes: 50 additions & 0 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -1263,6 +1263,26 @@ def unweighted_counts(self):
"""2D np.float64 ndarray of unweighted count for each slice matrix cell."""
return self._assembler.unweighted_counts

@lazyproperty
def unweighted_valid_counts(self):
try:
return self._assembler.unweighted_valid_counts
except ValueError:
raise ValueError(
"`.unweighted_valid_counts` is undefined for a cube-result without "
"a valid count unweighted measure"
)

@lazyproperty
def weighted_valid_counts(self):
try:
return self._assembler.weighted_valid_counts
except ValueError:
raise ValueError(
"`.weighted_valid_counts` is undefined for a cube-result without "
"a valid count weighted measure"
)

@lazyproperty
def zscores(self):
"""2D np.float64 ndarray of std-res value for each cell of matrix.
Expand Down Expand Up @@ -1746,6 +1766,21 @@ def unweighted_counts(self):
"""1D np.float64 ndarray of unweighted count for each row of stripe."""
return self._assembler.unweighted_counts

@lazyproperty
def unweighted_valid_counts(self):
"""1D np.float64 ndarray of unweighted valid counts for each row of strand.
Raises ValueError when accessed on a cube-result that does not contain a
unweighted valid count cube-measure.
"""
try:
return self._assembler.unweighted_valid_counts
except ValueError:
raise ValueError(
"`.unweighted_valid_counts` is undefined for a cube-result without a "
"valid count unweighted measure"
)

@lazyproperty
def weighted_bases(self):
"""1D np.float64 ndarray of table-proportion denominator for each row.
Expand All @@ -1756,6 +1791,21 @@ def weighted_bases(self):
"""
return self._assembler.weighted_bases

@lazyproperty
def weighted_valid_counts(self):
"""1D np.float64 ndarray of weighted valid counts for each row of strand.
Raises ValueError when accessed on a cube-result that does not contain a
weighted valid count cube-measure.
"""
try:
return self._assembler.weighted_valid_counts
except ValueError:
raise ValueError(
"`.weighted_valid_counts` is undefined for a cube-result without a "
"valid count weighted measure"
)

# ---implementation (helpers)-------------------------------------

@lazyproperty
Expand Down
3 changes: 3 additions & 0 deletions src/cr/cube/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,16 @@ class CUBE_MEASURE(enum.Enum):
SUM = "sum"
VALID_OVERLAP = "valid_overlap"
VALID_COUNT_UNWEIGHTED = "valid_count_unweighted"
VALID_COUNT_WEIGHTED = "valid_count_weighted"


NUMERIC_MEASURES = frozenset(
(
CUBE_MEASURE.SUM,
CUBE_MEASURE.MEAN,
CUBE_MEASURE.STDDEV,
CUBE_MEASURE.VALID_COUNT_UNWEIGHTED,
CUBE_MEASURE.VALID_COUNT_WEIGHTED,
MEASURE.COL_SHARE_SUM,
MEASURE.ROW_SHARE_SUM,
MEASURE.TOTAL_SHARE_SUM,
Expand Down
29 changes: 22 additions & 7 deletions src/cr/cube/matrix/assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,15 @@ def column_weighted_bases(self):
def columns_base(self):
"""1D/2D np.float64 ndarray of unweighted-N for each slice column/cell."""
# --- an MR_X slice produces a 2D column-base (each cell has its own N) ---
columns_base = self._cube_result_matrix.columns_base
rows_dim_type = self._rows_dimension.dimension_type
columns_base = self._measures.columns_base
if rows_dim_type in (DT.MR_SUBVAR, DT.NUM_ARRAY):
return self._assemble_matrix(
SumSubtotals.blocks(columns_base, self._dimensions, diff_cols_nan=True)
)

# --- otherwise columns-base is a vector ---
return self._assemble_vector(
columns_base,
self._column_subtotals,
self._column_order,
diffs_nan=True,
columns_base, self._column_subtotals, self._column_order, diffs_nan=True
)

@lazyproperty
Expand Down Expand Up @@ -528,7 +524,8 @@ def table_weighted_bases(self):
def total_share_sum(self):
"""2D optional np.float64 ndarray of total share of sum for each cell.
Raises `ValueError` if the cube-result does not include a sum cube-measure.
Raises `ValueError` if the cube-result does not include a sum
cube-measure.
"""
return self._assemble_matrix(self._measures.total_share_sum.blocks)

Expand All @@ -537,11 +534,29 @@ def unweighted_counts(self):
"""2D np.float64 ndarray of unweighted-count for each cell."""
return self._assemble_matrix(self._measures.unweighted_counts.blocks)

@lazyproperty
def unweighted_valid_counts(self):
"""2D np.float64 ndarray of unweighted valid counts for each cell.
Raises `ValueError` if the cube-result does not include a valid-count-unweighted
cube-measure.
"""
return self._assemble_matrix(self._measures.unweighted_valid_counts.blocks)

@lazyproperty
def weighted_counts(self):
"""2D np.float64 ndarray of weighted-count for each cell."""
return self._assemble_matrix(self._measures.weighted_counts.blocks)

@lazyproperty
def weighted_valid_counts(self):
"""2D np.float64 ndarray of weighted valid counts for each cell.
Raises `ValueError` if the cube-result does not include a valid-count-weighted
cube-measure.
"""
return self._assemble_matrix(self._measures.weighted_valid_counts.blocks)

@lazyproperty
def zscores(self):
"""2D np.float64 ndarray of std-res value for each cell of matrix.
Expand Down
Loading

0 comments on commit a698ee3

Please sign in to comment.