From 7dee0c596fb122bfb4773abd4a6874373dd1cf71 Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Thu, 11 Feb 2021 10:59:54 +0100 Subject: [PATCH 01/10] spike: cube_sum --- src/cr/cube/cube.py | 184 ++++++--- src/cr/cube/cubepart.py | 28 +- src/cr/cube/enums.py | 4 + src/cr/cube/matrix/assembler.py | 11 +- src/cr/cube/matrix/cubemeasure.py | 179 +++----- src/cr/cube/stripe/assembler.py | 8 + src/cr/cube/stripe/cubemeasure.py | 96 ++++- src/cr/cube/stripe/measure.py | 32 ++ tests/fixtures/cat-sum.json | 162 ++++++++ tests/fixtures/mr-sum.json | 300 ++++++++++++++ tests/fixtures/numeric-measures-x-cat.json | 190 +++++++++ .../num-arr-sum-grouped-by-cat.json | 217 ++++++++++ tests/fixtures/sum-cat-x-mr.json | 386 ++++++++++++++++++ tests/fixtures/sum-mr-x-cat.json | 386 ++++++++++++++++++ tests/integration/test_cube.py | 27 +- tests/integration/test_cubepart.py | 30 +- tests/integration/test_matrix.py | 12 +- tests/integration/test_numeric_array.py | 33 +- 18 files changed, 2077 insertions(+), 208 deletions(-) create mode 100644 tests/fixtures/cat-sum.json create mode 100644 tests/fixtures/mr-sum.json create mode 100644 tests/fixtures/numeric-measures-x-cat.json create mode 100644 tests/fixtures/numeric_arrays/num-arr-sum-grouped-by-cat.json create mode 100644 tests/fixtures/sum-cat-x-mr.json create mode 100644 tests/fixtures/sum-mr-x-cat.json diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index e8059a552..fc10711c0 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -14,7 +14,10 @@ from cr.cube.cubepart import CubePartition from cr.cube.dimension import AllDimensions -from cr.cube.enums import DIMENSION_TYPE as DT +from cr.cube.enums import ( + DIMENSION_TYPE as DT, + QUANTITY_OF_INTEREST_MEASURES as QOI_MEASURES, +) from cr.cube.util import lazyproperty np.seterr(divide="ignore", invalid="ignore") @@ -44,6 +47,9 @@ def __init__(self, cube_responses, transforms, population, min_base): self._population = population self._min_base = min_base + def __getitem__(self, index): + return self._cubes[index] + @lazyproperty def can_show_pairwise(self): """True if all 2D cubes in a multi-cube set can provide pairwise comparison.""" @@ -61,11 +67,6 @@ def description(self): """str description of first cube in this set.""" return self._cubes[0].description - @lazyproperty - def has_means(self): - """True if cubes in this set include a means measure.""" - return self._cubes[0].has_means - @lazyproperty def has_weighted_counts(self): """True if cube-responses include a weighted-count measure.""" @@ -160,9 +161,9 @@ def iter_cubes(): population=self._population, mask_size=self._min_base, ) - # --- numeric-mean cubes require inflation to restore their + # --- numeric-measures cubes require inflation to restore their # --- rows-dimension, others don't - yield cube.inflate() if self._is_numeric_mean else cube + yield cube.inflate() if self._is_numeric_measure else cube return tuple(iter_cubes()) @@ -172,7 +173,7 @@ def _is_multi_cube(self): return len(self._cube_responses) > 1 @lazyproperty - def _is_numeric_mean(self): + def _is_numeric_measure(self): """True when CubeSet is special-case "numeric-mean" case requiring inflation. When a numeric variable appears as the rows-dimension in a multitable analysis, @@ -232,13 +233,22 @@ def __repr__(self): except Exception: return super(Cube, self).__repr__() + @lazyproperty + def available_measures(self): + """Tuple of available measures in the cube response.""" + return tuple(self._cube_response.get("result", {}).get("measures", {}).keys()) + @lazyproperty def counts(self): return self.counts_with_missings[self._valid_idxs] @lazyproperty def counts_with_missings(self): - return self._measure(self.is_weighted).raw_cube_array + return ( + self._measures.weighted_counts.raw_cube_array + if self.is_weighted + else self._measures.unweighted_counts.raw_cube_array + ) @lazyproperty def cube_index(self): @@ -270,11 +280,6 @@ def dimensions(self): """ return self._all_dimensions.apparent_dimensions - @lazyproperty - def has_means(self): - """True if cube includes a means measure.""" - return self._measures.means is not None - def inflate(self): """Return new Cube object with rows-dimension added. @@ -283,10 +288,15 @@ def inflate(self): """ cube_dict = self._cube_dict dimensions = cube_dict["result"]["dimensions"] + default = "-".join(self._numeric_measures) + # --- The default value in case of numeric variable is the combination of all + # --- the measures expressed in the cube response. + alias = self._numeric_measure_references.get("alias", default) + name = self._numeric_measure_references.get("name", default) rows_dimension = { - "references": {"alias": "mean", "name": "mean"}, + "references": {"alias": alias, "name": name}, "type": { - "categories": [{"id": 1, "name": "Mean"}], + "categories": [{"id": 1, "name": name.title()}], "class": "categorical", }, } @@ -304,6 +314,15 @@ def is_weighted(self): """True if cube response contains weighted data.""" return self._measures.is_weighted + @lazyproperty + def means(self): + """float64 ndarray of the cube_means if the measure exists.""" + if self._measures.means: + return self._measures.means.raw_cube_array[self._valid_idxs].astype( + np.float64 + ) + return None + @lazyproperty def missing(self): """Get missing count of a cube.""" @@ -326,6 +345,11 @@ def ndim(self): """int count of dimensions for this cube.""" return len(self.dimensions) + @lazyproperty + def n_responses(self): + """Total (int) number of responses considered.""" + return self._cube_response["result"].get("n", 0) + @lazyproperty def partitions(self): """Sequence of _Slice, _Strand, or _Nub objects from this cube-result.""" @@ -352,6 +376,15 @@ def population_fraction(self): """ return self._measures.population_fraction + @lazyproperty + def sum(self): + """float64 ndarray of the cube_means if the measure exists.""" + if self._measures.sum: + return self._measures.sum.raw_cube_array[self._valid_idxs].astype( + np.float64 + ) + return None + @lazyproperty def title(self): """str alternate-name given to cube-result. @@ -362,11 +395,6 @@ def title(self): """ return self._cube_dict["result"].get("title", "Untitled") - @lazyproperty - def n_responses(self): - """Total (int) number of responses considered.""" - return self._cube_response["result"].get("n", 0) - @lazyproperty def unweighted_counts(self): """ndarray of unweighted counts, valid elements only. @@ -381,7 +409,7 @@ def unweighted_counts(self): @lazyproperty def valid_counts(self): """ndarray of valid counts, valid elements only.""" - valid_counts = self._measure(self.is_weighted).valid_counts + valid_counts = self._base_measure.valid_counts if valid_counts.any(): return valid_counts[self._valid_idxs] return np.empty(0) @@ -408,6 +436,11 @@ def _all_dimensions(self): """ return AllDimensions(dimension_dicts=self._cube_dict["result"]["dimensions"]) + @lazyproperty + def _base_measure(self): + """...""" + return _BaseMeasure(self._cube_dict, self._all_dimensions, self._cube_idx_arg) + @lazyproperty def _ca_as_0th(self): """True if slicing is to be performed in so-called "CA-as-0th" mode. @@ -426,7 +459,7 @@ def _ca_as_0th(self): def _cube_dict(self): """dict containing raw cube response, parsed from JSON payload.""" cube_dict = copy.deepcopy(self._cube_response) - if self._mean_subvariables: + if self._numeric_measure_subvariables: dimensions = cube_dict.get("result", {}).get("dimensions", []) # ---dim inflation--- # ---In case of numeric arrays, we need to inflate the row dimension @@ -458,39 +491,33 @@ def _is_single_filter_col_cube(self): return self._cube_dict["result"].get("is_single_col_cube", False) @lazyproperty - def _mean_references(self): - """Dict of mean references, tipically for numeric arrays.""" + def _numeric_measure_references(self): + """Dict of numeric measure references, tipically for numeric means or sum.""" + if not self._numeric_measures: + return {} cube_response = self._cube_response cube_measures = cube_response.get("result", {}).get("measures", {}) - metadata = cube_measures.get("mean", {}).get("metadata", {}) + metadata = cube_measures.get(self._numeric_measures[0], {}).get("metadata", {}) return metadata.get("references", {}) @lazyproperty - def _mean_subvariables(self): + def _numeric_measure_subvariables(self): """List of mean subvariables, tipically for numeric arrays.""" + if not self._numeric_measures: + return [] cube_response = self._cube_response cube_measures = cube_response.get("result", {}).get("measures", {}) - metadata = cube_measures.get("mean", {}).get("metadata", {}) + metadata = cube_measures.get(self._numeric_measures[0], {}).get("metadata", {}) return metadata.get("type", {}).get("subvariables", []) - def _measure(self, weighted): - """_BaseMeasure subclass representing primary measure for this cube. - - If the cube response includes a means measure, the return value is - means. Otherwise it is counts, with the choice between weighted or - unweighted determined by *weighted*. + @lazyproperty + def _numeric_measures(self): + """List of numeric measures expressed in the cube_response. - Note that weighted counts are provided on an "as-available" basis. - When *weighted* is True and the cube response is not weighted, - unweighted counts are returned. + Basically the numeric measures are the intersection between all the measures + within the cube response and the defined QUANTITY_OF_INTEREST_MEASURES. """ - return ( - self._measures.means - if self._measures.means is not None - else self._measures.weighted_counts - if weighted - else self._measures.unweighted_counts - ) + return list(set(self.available_measures).intersection(QOI_MEASURES)) @lazyproperty def _measures(self): @@ -504,20 +531,20 @@ def _measures(self): @lazyproperty def _numeric_array_dimension(self): """Rows dimension object according to the mean subvariables.""" - if not self._mean_subvariables: + if not self._numeric_measure_subvariables: return None - subrefs = self._mean_references.get("subreferences", []) + subrefs = self._numeric_measure_references.get("subreferences", []) rows_dimension = { "references": { - "alias": self._mean_references.get("alias", "mean"), - "name": self._mean_references.get("name", "mean"), + "alias": self._numeric_measure_references.get("alias"), + "name": self._numeric_measure_references.get("name"), }, "type": {"elements": [], "class": "enum", "subtype": {"class": "num_arr"}}, } # ---In case of numeric arrays the row dimension should contains additional # ---information related to the subreferences for each subvariable of the # ---array. - for i, _ in enumerate(self._mean_subvariables): + for i, _ in enumerate(self._numeric_measure_subvariables): # ---The row dimensions elements must be expanded with the alias and the # ---name of the numeric array mean measure subreferences. rows_dimension["type"].get("elements", []).append( @@ -528,7 +555,7 @@ def _numeric_array_dimension(self): "alias": subrefs[i].get("alias") if subrefs else None, "name": subrefs[i].get("name") if subrefs else None, }, - "id": self._mean_subvariables[i], + "id": self._numeric_measure_subvariables[i], }, }, ) @@ -552,7 +579,7 @@ def _valid_idxs(self): ) def _reshape_idxs(valid_indices): - if self._measure(self.is_weighted).requires_array_transposition: + if self._base_measure.requires_array_transposition: if len(self._all_dimensions) == 3: # ---In case of 3D array and a numeric array is involved we have to # ---change the order of the valid idxs, from [0,1,2] to [1,2,0]. @@ -658,6 +685,20 @@ def population_fraction(self): except Exception: return 1.0 + @lazyproperty + def sum(self): + """_SumMeasure object providing access to cube sum values. + None when the cube response does not contain a sum measure. + """ + sum_measure_dict = ( + self._cube_dict.get("result", {}).get("measures", {}).get("sum") + ) + return ( + _SumMeasure(self._cube_dict, self._all_dimensions, self._cube_idx_arg) + if sum_measure_dict + else None + ) + @lazyproperty def unweighted_counts(self): """_UnweightedCountMeasure object for this cube. @@ -779,17 +820,34 @@ def _flat_values(self): ) +class _SumMeasure(_BaseMeasure): + """Statistical sum values from a cube-response.""" + + @lazyproperty + def missing_count(self): + """numeric representing count of missing rows reflected in response.""" + return self._cube_dict["result"]["measures"]["sum"].get("n_missing", 0) + + @lazyproperty + def _flat_values(self): + """Return tuple of mean values as found in cube response. + Mean data may include missing items represented by a dict like + {'?': -1} in the cube response. These are replaced by np.nan in the + returned value. + """ + return tuple( + np.nan if type(x) is dict else x + for x in self._cube_dict["result"]["measures"]["sum"]["data"] + ) + + class _UnweightedCountMeasure(_BaseMeasure): """Unweighted counts for cube.""" @lazyproperty def _flat_values(self): """tuple of int counts before weighting.""" - if ( - self._cube_dict["result"]["measures"] - .get("valid_count_unweighted", {}) - .get("data") - ): + if self.valid_counts.size > 0: # ---If valid_count are expressed in the cube dict, returns its data. # ---This condition can happen in case of numeric array cube response. # ---Under this circumstances the numeric array measures will contain the @@ -806,4 +864,14 @@ class _WeightedCountMeasure(_BaseMeasure): @lazyproperty def _flat_values(self): """tuple of numeric counts after weighting.""" - return tuple(self._cube_dict["result"]["measures"]["count"]["data"]) + if self.valid_counts.size > 0: + # ---If valid_count are expressed in the cube dict, returns its data. + # ---This condition can happen in case of numeric array cube response. + # ---Under this circumstances the numeric array measures will contain the + # ---mean measure and a valid count measure for the unweighted counts. + return tuple( + self._cube_dict["result"]["measures"]["valid_count_unweighted"]["data"] + ) + if self._cube_dict["result"]["measures"].get("count", {}).get("data", []): + return tuple(self._cube_dict["result"]["measures"]["count"]["data"]) + return tuple(self._cube_dict["result"]["counts"]) diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index fb54ca36f..1e797160d 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -98,11 +98,6 @@ def evaluate(self, measure_expr): raise NotImplementedError("Function {} is not available.".format(function)) return SingleSidedMovingAvgSmoother(self, measure_expr).values - @lazyproperty - def has_means(self): - """True if cube-result includes means values.""" - return self._cube.has_means - @lazyproperty def ndim(self): """int count of dimensions for this partition.""" @@ -927,6 +922,15 @@ def smoothed_dimension_dict(self): # from the matrix later on. return self._columns_dimension._dimension_dict + @lazyproperty + def sum(self): + """2D optional np.float64 ndarray of sum value for each table cell. + Cell value is `np.nan` for each cell corresponding to an inserted subtotal + (sum of addend cells cannot simply be added to get the sum of the subtotal). + Raises `ValueError` if the cube-result does not include a sum cube-measure. + """ + return self._assembler.sum + @lazyproperty def summary_pairwise_indices(self): return PairwiseSignificance( @@ -1258,10 +1262,6 @@ def means(self): Raises ValueError when accessed on a cube-result that does not contain a means cube-measure. """ - if not self._cube.has_means: - raise ValueError( - "`.means` is undefined for a cube-result without a means measure" - ) return self._assembler.means @lazyproperty @@ -1412,6 +1412,14 @@ def smoothed_dimension_dict(self): """dict, row dimension definition""" return self._rows_dimension._dimension_dict + @lazyproperty + def sum(self): + """1D np.float64 ndarray of sum for each row of strand. + Raises ValueError when accessed on a cube-result that does not contain a sum + cube-measure. + """ + return self._assembler.sum + @lazyproperty def table_base_range(self): """[min, max] np.int64 ndarray range of unweighted-N for this stripe. @@ -1569,4 +1577,4 @@ def _dimensions(self): @lazyproperty def _scalar(self): """The pre-transforms data-array for this slice.""" - return MeansScalar(self._cube.counts, self._cube.unweighted_counts) + return MeansScalar(self._cube.means, self._cube.unweighted_counts) diff --git a/src/cr/cube/enums.py b/src/cr/cube/enums.py index af3bc69cd..175db2c01 100644 --- a/src/cr/cube/enums.py +++ b/src/cr/cube/enums.py @@ -75,7 +75,11 @@ class MEASURE(enum.Enum): COL_INDEX = "col_index" COL_PERCENT = "col_percent" MEAN = "mean" + SUM = "sum" TABLE_STDERR = "table_stderr" UNWEIGHTED_COUNT = "count_unweighted" WEIGHTED_COUNT = "count_weighted" Z_SCORE = "z_score" + + +QUANTITY_OF_INTEREST_MEASURES = {MEASURE.SUM.value, MEASURE.MEAN.value} diff --git a/src/cr/cube/matrix/assembler.py b/src/cr/cube/matrix/assembler.py index 277cbc8e2..3beccb0fa 100644 --- a/src/cr/cube/matrix/assembler.py +++ b/src/cr/cube/matrix/assembler.py @@ -164,8 +164,6 @@ def means(self): Raises `ValueError` if the cube-result does not include a means cube-measure. """ - if not self._cube.has_means: - raise ValueError("cube-result does not include a means cube-measure") return self._assemble_matrix( NanSubtotals.blocks(self._cube_result_matrix.means, self._dimensions) ) @@ -254,6 +252,15 @@ def rows_margin(self): self._cube_result_matrix.rows_margin, self._row_subtotals, self._row_order ) + @lazyproperty + def sum(self): + """2D optional np.float64 ndarray of sum for each cell. + Raises `ValueError` if the cube-result does not include a sum cube-measure. + """ + return self._assemble_matrix( + NanSubtotals.blocks(self._cube_result_matrix.sum, self._dimensions) + ) + @lazyproperty def table_base(self): """Scalar, 1D, or 2D ndarray of np.int64 unweighted-N for this slice. diff --git a/src/cr/cube/matrix/cubemeasure.py b/src/cr/cube/matrix/cubemeasure.py index a2665f401..8dd8fe7d6 100644 --- a/src/cr/cube/matrix/cubemeasure.py +++ b/src/cr/cube/matrix/cubemeasure.py @@ -761,22 +761,26 @@ class BaseCubeResultMatrix(object): """Base class for all cube-result matrix (2D second-order analyzer) objects.""" def __init__( - self, dimensions, weighted_counts, unweighted_counts, counts_with_missings=None + self, + dimensions, + weighted_counts, + unweighted_counts, + counts_with_missings=None, + means=None, + sum=None, ): self._dimensions = dimensions self._weighted_counts = weighted_counts self._unweighted_counts = unweighted_counts self._counts_with_missings = counts_with_missings + self._means = means + self._sum = sum @classmethod def factory(cls, cube, dimensions, slice_idx): """Return a base-matrix object of appropriate type for `cube`.""" - # --- means cube gets one of the means-matrix types --- - if cube.has_means: - return cls._means_matrix_factory(cube, dimensions, slice_idx) - - # --- everything else gets a more conventional matrix --- - return cls._regular_matrix_factory(cube, dimensions, slice_idx) + MatrixCls = cls._regular_matrix_class(cube.dimension_types[-2:]) + return MatrixCls(dimensions, *cls._sliced_counts(cube, slice_idx)) @lazyproperty def column_index(self): @@ -932,40 +936,15 @@ def _array_type_std_res(self, counts, total, rowsum, colsum): variance = rowsum * colsum * (total - rowsum) * (total - colsum) / total ** 3 return (counts - expected_counts) / np.sqrt(variance) - @classmethod - def _means_matrix_factory(cls, cube, dimensions, slice_idx): - """ -> matrix object appropriate to means `cube`.""" - dimension_types = cube.dimension_types[-2:] - - if dimension_types == (DT.MR, DT.MR): - # --- this MEANS_MR_X_MR case hasn't arisen yet --- - raise NotImplementedError( - "MR x MR with means is not implemented" - ) # pragma: no cover - - MatrixCls = ( - _NumArrayXMrMeansMatrix - if dimension_types == (DT.NUM_ARRAY, DT.MR) - else _NumArrayXCatMeansMatrix - if dimension_types[0] == DT.NUM_ARRAY - else _MrXCatMeansMatrix - if dimension_types[0] == DT.MR - else _CatXMrMeansMatrix - if dimension_types[1] == DT.MR - else _CatXCatMeansMatrix - ) - counts, unweighted_counts = ( - (cube.counts[slice_idx], cube.unweighted_counts[slice_idx]) - if cube.ndim == 3 - else (cube.counts, cube.unweighted_counts) - ) - return MatrixCls(dimensions, counts, unweighted_counts) - @staticmethod def _regular_matrix_class(dimension_types): """Return BaseCubeResultMatrix subclass appropriate to `dimension_types`.""" return ( - _MrXMrMatrix + _NumArrayXMrMatrix + if dimension_types == (DT.NUM_ARRAY, DT.MR) + else _NumArrayXCatMatrix + if dimension_types[0] == DT.NUM_ARRAY + else _MrXMrMatrix if dimension_types == (DT.MR, DT.MR) else _MrXCatMatrix if dimension_types[0] == DT.MR @@ -975,7 +954,7 @@ def _regular_matrix_class(dimension_types): ) @staticmethod - def _regular_matrix_counts_slice(cube, slice_idx): + def _regular_matrix_slice(cube, slice_idx): """return `np.s_` object with correct slicing for the cube type.""" if cube.ndim <= 2: return np.s_[:] @@ -989,12 +968,6 @@ def _regular_matrix_counts_slice(cube, slice_idx): # --- appropriate slice (element of the 0th dimension). return np.s_[slice_idx] - @classmethod - def _regular_matrix_factory(cls, cube, dimensions, slice_idx): - """ -> matrix object for non-means slice.""" - MatrixCls = cls._regular_matrix_class(cube.dimension_types[-2:]) - return MatrixCls(dimensions, *cls._sliced_counts(cube, slice_idx)) - @classmethod def _sliced_counts(cls, cube, slice_idx): """Return tuple of cube counts, prepared for regular matrix construction. @@ -1008,8 +981,14 @@ def _sliced_counts(cls, cube, slice_idx): need to extract only the selected counts, since we're "just" dealing with the tabulation. """ - i = cls._regular_matrix_counts_slice(cube, slice_idx) - return (cube.counts[i], cube.unweighted_counts[i], cube.counts_with_missings[i]) + i = cls._regular_matrix_slice(cube, slice_idx) + return ( + cube.counts[i], + cube.unweighted_counts[i], + cube.counts_with_missings[i], + cube.means[i] if cube.means is not None else None, + cube.sum[i] if cube.sum is not None else None, + ) @lazyproperty def _valid_row_idxs(self): @@ -1063,6 +1042,15 @@ def columns_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=0) + @lazyproperty + def means(self): + """2D np.float64 ndarray of mean for each valid matrix cell.""" + if self._means is None: + raise ValueError( + "`.means` is undefined for a cube-result without a means measure" + ) + return self._means + @lazyproperty def rows_base(self): """1D ndarray of np.int64 unweighted-N for each matrix row.""" @@ -1085,6 +1073,15 @@ def rows_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=1) + @lazyproperty + def sum(self): + """2D np.float64 ndarray of sum for each valid matrix cell.""" + if self._sum is None: + raise ValueError( + "`.sum` is undefined for a cube-result without a sum measure" + ) + return self._sum + @lazyproperty def table_base(self): """np.int64 count of actual respondents who answered both questions. @@ -1237,6 +1234,11 @@ def columns_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=(0, 2)) + @lazyproperty + def means(self): + """2D np.float64 ndarray of mean for each valid matrix cell.""" + return self._means[:, :, 0] + @lazyproperty def rows_base(self): """2D np.int64 ndarray of row-wise unweighted-N for this matrix. @@ -1268,6 +1270,11 @@ def rows_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=(1, 2)) + @lazyproperty + def sum(self): + """2D np.float64 ndarray of mean for each valid matrix cell.""" + return self._sum[:, :, 0] + @lazyproperty def table_base(self): """1D np.int64 unweighted N for each column of matrix. @@ -1409,6 +1416,11 @@ def columns_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=(0, 1)) + @lazyproperty + def means(self): + """2D np.float64 ndarray of mean for each valid matrix cell.""" + return self._means[:, 0, :] + @lazyproperty def rows_base(self): """1D ndarray of np.int64 unweighted-N for each matrix row. @@ -1435,6 +1447,11 @@ def rows_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=(1, 2)) + @lazyproperty + def sum(self): + """2D np.float64 ndarray of sum for each valid matrix cell.""" + return self._sum[:, 0, :] + @lazyproperty def table_base(self): """1D np.int64 ndarray of unweighted N for each row of matrix. @@ -1740,73 +1757,7 @@ def _table_proportion_variances(self): return p * (1 - p) -# === LEGACY MEANS MATRIX OBJECTS === - - -class _CatXCatMeansMatrix(_CatXCatMatrix): - """CAT_X_CAT matrix for means measure. - - A means matrix has an array of mean values instead of a `counts` array. - """ - - def __init__(self, dimensions, means, unweighted_counts): - super(_CatXCatMeansMatrix, self).__init__(dimensions, None, unweighted_counts) - self._means = means - - @lazyproperty - def means(self): - """2D np.float64 ndarray of mean for each valid matrix cell.""" - return self._means - - @lazyproperty - def rows_margin(self): - """(nrows,) ndarray of np.nan. (A means matrix has no rows margin).""" - nrows = self._means.shape[0] - return np.full((nrows,), np.nan) - - @lazyproperty - def weighted_counts(self): - """2D ndarray of np.nan for each valid matrix cell. - - Weighted-counts have no meaning for a means matrix (although unweighted counts - do). - """ - return np.full(self._means.shape, np.nan) - - -class _CatXMrMeansMatrix(_CatXMrMatrix): - """Basis for CAT_X_MR slice having mean measure instead of counts.""" - - def __init__(self, dimensions, means, unweighted_counts): - counts = np.zeros(means.shape) - super(_CatXMrMeansMatrix, self).__init__(dimensions, counts, unweighted_counts) - self._means = means - - @lazyproperty - def means(self): - """2D np.float64 ndarray of mean for each valid matrix cell.""" - return self._means[:, :, 0] - - -class _MrXCatMeansMatrix(_MrXCatMatrix): - """MR_X_CAT slice with means measure instead of counts. - - Note that its (weighted) counts are all set to zero. A means slice still has - meaningful unweighted counts. - """ - - def __init__(self, dimensions, means, unweighted_counts): - counts = np.zeros(means.shape) - super(_MrXCatMeansMatrix, self).__init__(dimensions, counts, unweighted_counts) - self._means = means - - @lazyproperty - def means(self): - """2D np.float64 ndarray of mean for each valid matrix cell.""" - return self._means[:, 0, :] - - -class _NumArrayXCatMeansMatrix(_CatXCatMeansMatrix): +class _NumArrayXCatMatrix(_CatXCatMatrix): """NUM_ARR_X_CAT slice with means measure instead of counts.""" @lazyproperty @@ -1819,7 +1770,7 @@ def columns_base(self): return self._unweighted_counts -class _NumArrayXMrMeansMatrix(_CatXMrMeansMatrix): +class _NumArrayXMrMatrix(_CatXMrMatrix): """NUM_ARR_X_MR slice with means measure instead of counts.""" @lazyproperty diff --git a/src/cr/cube/stripe/assembler.py b/src/cr/cube/stripe/assembler.py index 44fe524e6..e66c14765 100644 --- a/src/cr/cube/stripe/assembler.py +++ b/src/cr/cube/stripe/assembler.py @@ -134,6 +134,14 @@ def scale_stderr(self): """ return self._measures.scaled_counts.scale_stderr + @lazyproperty + def sum(self): + """1D np.float64 ndarray of sum for each row. + The array is filled with `np.nan` when the cube-result does not include a sum + cube-measure. + """ + return self._assemble_vector(self._measures.sum.blocks) + @lazyproperty def table_base_range(self): """[min, max] np.int64 ndarray range of (total) unweighted-N for this stripe. diff --git a/src/cr/cube/stripe/cubemeasure.py b/src/cr/cube/stripe/cubemeasure.py index bd098af08..9f88afecd 100644 --- a/src/cr/cube/stripe/cubemeasure.py +++ b/src/cr/cube/stripe/cubemeasure.py @@ -26,8 +26,21 @@ def __init__(self, cube, rows_dimension, ca_as_0th, slice_idx): @lazyproperty def cube_means(self): """_BaseCubeMeans subclass object for this stripe.""" + if self._cube.means is None: + raise ValueError( + "`.means` is undefined for a cube-result without a means measure" + ) return _BaseCubeMeans.factory(self._cube, self._rows_dimension) + @lazyproperty + def cube_sum(self): + """_BaseCubeMeans subclass object for this stripe.""" + if self._cube.sum is None: + raise ValueError( + "`.sum` is undefined for a cube-result without a sum measure" + ) + return _BaseCubeSum.factory(self._cube, self._rows_dimension) + @lazyproperty def unweighted_cube_counts(self): """_BaseUnweightedCubeCounts subclass object for this stripe.""" @@ -63,9 +76,7 @@ def __init__(self, rows_dimension, means): @classmethod def factory(cls, cube, rows_dimension): """Return _BaseCubeMeans subclass instance appropriate to `cube`.""" - # --- TODO: note that `cube.counts` is improperly overloaded to return means - # --- when cube.has_means. This needs to be fixed. - means = cube.counts + means = cube.means MeansCls = ( _MrCubeMeans if rows_dimension.dimension_type == DT.MR else _CatCubeMeans ) @@ -100,6 +111,56 @@ def means(self): return self._means[:, 0] +class _NumArrCubeMeans(_BaseCubeMeans): + """Means cube-measure for an MR stripe. + + Its `.means` is a 2D ndarray with axes (rows, sel/not). + """ + + @lazyproperty + def means(self): + """1D np.float64 ndarray of mean for each stripe row.""" + return self._means + + +# === SUM === + + +class _BaseCubeSum(_BaseCubeMeasure): + """Base class for means cube-measure variants.""" + + def __init__(self, rows_dimension, sum): + super(_BaseCubeSum, self).__init__(rows_dimension) + self._sum = sum + + @classmethod + def factory(cls, cube, rows_dimension): + """Return _BaseCubeSum subclass instance appropriate to `cube`.""" + sum = cube.sum + SumCls = _MrCubeSum if rows_dimension.dimension_type == DT.MR else _CatCubeSum + return SumCls(rows_dimension, sum) + + +class _CatCubeSum(_BaseCubeSum): + """Means cube-measure for a non-MR stripe.""" + + @lazyproperty + def sum(self): + """1D np.float64 ndarray of mean for each stripe row.""" + return self._sum + + +class _MrCubeSum(_BaseCubeSum): + """Means cube-measure for an MR stripe. + Its `.means` is a 2D ndarray with axes (rows, sel/not). + """ + + @lazyproperty + def sum(self): + """1D np.float64 ndarray of mean for each stripe row.""" + return self._sum[:, 0] + + # === UNWEIGHTED COUNTS === @@ -118,6 +179,9 @@ def factory(cls, cube, rows_dimension, ca_as_0th, slice_idx): rows_dimension, cube.unweighted_counts[slice_idx] ) + if rows_dimension.dimension_type == DT.NUM_ARRAY: + return _NumArrUnweightedCubeCounts(rows_dimension, cube.unweighted_counts) + if rows_dimension.dimension_type == DT.MR: return _MrUnweightedCubeCounts(rows_dimension, cube.unweighted_counts) @@ -210,6 +274,32 @@ def unweighted_counts(self): return self._unweighted_counts[:, 0] +class _NumArrUnweightedCubeCounts(_BaseUnweightedCubeCounts): + """Unweighted-counts cube-measure for a non-MR stripe.""" + + @lazyproperty + def bases(self): + """1D np.int64 ndarray of table-proportion denonimator (base) for each row. + + Each row in a CAT stripe has the same base (the table-base). + """ + return self._unweighted_counts + + @lazyproperty + def pruning_base(self): + """1D np.int64 ndarray of unweighted-N for each matrix row. + + Because this matrix has no MR dimension, this is simply the unweighted count for + each row. + """ + return self._unweighted_counts + + @lazyproperty + def unweighted_counts(self): + """1D np.int64 ndarray of unweighted-count for each row of stripe.""" + return self._unweighted_counts + + # === WEIGHTED COUNTS === diff --git a/src/cr/cube/stripe/measure.py b/src/cr/cube/stripe/measure.py index c2476f0de..09827c34e 100644 --- a/src/cr/cube/stripe/measure.py +++ b/src/cr/cube/stripe/measure.py @@ -47,6 +47,11 @@ def scaled_counts(self): """ return _ScaledCounts(self._rows_dimension, self, self._cube_measures) + @lazyproperty + def sum(self): + """_Sum measure object for this stripe.""" + return _Sum(self._rows_dimension, self, self._cube_measures) + @lazyproperty def table_proportion_stddevs(self): """_TableProportionStddevs measure object for this stripe.""" @@ -305,6 +310,33 @@ def _weighted_counts(self): return self._weighted_cube_counts.weighted_counts[self._has_numeric_value] +class _Sum(_BaseSecondOrderMeasure): + """Provides the means measure for a stripe. + Relies on the presence of a means cube-measure in the cube-result. + """ + + @lazyproperty + def base_values(self): + """1D np.float64 ndarray of sum for each row.""" + return self._cube_sum.sum + + @lazyproperty + def subtotal_values(self): + """1D ndarray of np.nan for each row-subtotal. + Sum values cannot be subtotaled and each subtotal value is unconditionally + np.nan. + """ + return NanSubtotals.subtotal_values(self.base_values, self._rows_dimension) + + @lazyproperty + def _cube_sum(self): + """_BaseCubeMeans subclass instance for this measure. + Provides the means measures from the cube-result, encapsulating variation based + on dimensionality. + """ + return self._cube_measures.cube_sum + + class _TableProportionStddevs(_BaseSecondOrderMeasure): """Provides the table-proportion standard-deviation measure for a stripe.""" diff --git a/tests/fixtures/cat-sum.json b/tests/fixtures/cat-sum.json new file mode 100644 index 000000000..849c00b83 --- /dev/null +++ b/tests/fixtures/cat-sum.json @@ -0,0 +1,162 @@ +{ + "query": { + "measures": { + "valid_count_unweighted": { + "function": "cube_valid_count", + "args": [ + { + "variable": "3a7cfcaf0d5349fe854057603f63b2f3" + } + ] + }, + "sum": { + "function": "cube_sum", + "args": [ + { + "variable": "3a7cfcaf0d5349fe854057603f63b2f3" + } + ] + } + }, + "dimensions": [ + { + "variable": "03694f029f9a408cb56f7cadfe9e9b48" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": false, + "references": { + "alias": "Gender", + "name": "Gender" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "id": 1, + "name": "Male", + "missing": false + }, + { + "numeric_value": null, + "id": 2, + "name": "Female", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 0, + "measures": { + "valid_count_unweighted": { + "data": [ + 3, + 2, + 0 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "integer": false, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + }, + "sum": { + "data": [ + 88, + 77, + 0 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "integer": null, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + } + }, + "n": 5, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 5, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 5, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 5, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 5, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 5, + "weighted_n": 5 + }, + "filtered": { + "unweighted_n": 5, + "weighted_n": 5 + }, + "counts": [ + 3, + 2, + 0 + ], + "element": "crunch:cube" + } +} diff --git a/tests/fixtures/mr-sum.json b/tests/fixtures/mr-sum.json new file mode 100644 index 000000000..4e5653233 --- /dev/null +++ b/tests/fixtures/mr-sum.json @@ -0,0 +1,300 @@ +{ + "element": "shoji:view", + "value": { + "query": { + "dimensions": [ + { + "args": [ + { + "args": [ + { + "variable": "3ceb4ea17cd34c6295c26e3d43017757" + } + ], + "function": "as_selected" + }, + { + "value": "subvariables" + } + ], + "function": "dimension" + }, + { + "args": [ + { + "variable": "3ceb4ea17cd34c6295c26e3d43017757" + } + ], + "function": "as_selected" + } + ], + "measures": { + "sum": { + "args": [ + { + "variable": "38bb586090ec488fb58177f687572db6" + } + ], + "function": "cube_sum" + }, + "valid_count_unweighted": { + "args": [ + { + "variable": "38bb586090ec488fb58177f687572db6" + } + ], + "function": "cube_valid_count" + } + }, + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "counts": [ + 2, + 1, + 1, + 1, + 2, + 1, + 1, + 2, + 1 + ], + "dimensions": [ + { + "derived": true, + "references": { + "alias": "M", + "name": "M", + "subreferences": [ + { + "alias": "X", + "description": null, + "name": "X" + }, + { + "alias": "Y", + "description": null, + "name": "Y" + }, + { + "alias": "Z", + "description": null, + "name": "Z" + } + ], + "uniform_basis": false + }, + "type": { + "class": "enum", + "elements": [ + { + "id": 1, + "missing": false, + "value": { + "derived": false, + "id": "72OcG2j95yZHLX7YUk7Xkg000000", + "references": { + "alias": "X", + "description": null, + "name": "X" + } + } + }, + { + "id": 2, + "missing": false, + "value": { + "derived": false, + "id": "72OcG2j95yZHLX7YUk7Xkg000001", + "references": { + "alias": "Y", + "description": null, + "name": "Y" + } + } + }, + { + "id": 3, + "missing": false, + "value": { + "derived": false, + "id": "72OcG2j95yZHLX7YUk7Xkg000002", + "references": { + "alias": "Z", + "description": null, + "name": "Z" + } + } + } + ], + "subtype": { + "class": "variable" + } + } + }, + { + "derived": true, + "references": { + "alias": "M", + "name": "M", + "subreferences": [ + { + "alias": "X", + "description": null, + "name": "X" + }, + { + "alias": "Y", + "description": null, + "name": "Y" + }, + { + "alias": "Z", + "description": null, + "name": "Z" + } + ], + "uniform_basis": false + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Selected", + "numeric_value": 1, + "selected": true + }, + { + "id": 0, + "missing": false, + "name": "Other", + "numeric_value": 0 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false, + "subvariables": [ + "72OcG2j95yZHLX7YUk7Xkg000000", + "72OcG2j95yZHLX7YUk7Xkg000001", + "72OcG2j95yZHLX7YUk7Xkg000002" + ] + } + } + ], + "element": "crunch:cube", + "filter_stats": { + "filtered": { + "unweighted": { + "missing": 0, + "other": 0, + "selected": 4 + }, + "weighted": { + "missing": 0, + "other": 0, + "selected": 4 + } + }, + "filtered_complete": { + "unweighted": { + "missing": 0, + "other": 0, + "selected": 4 + }, + "weighted": { + "missing": 0, + "other": 0, + "selected": 4 + } + } + }, + "filtered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "measures": { + "sum": { + "data": [ + 3, + 2, + 3, + 2, + 3, + 3, + 2, + 3, + 3 + ], + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "class": "numeric", + "integer": null, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 1 + }, + "valid_count_unweighted": { + "data": [ + 2, + 1, + 1, + 1, + 2, + 1, + 1, + 2, + 1 + ], + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 1 + } + }, + "missing": 1, + "n": 4, + "unfiltered": { + "unweighted_n": 4, + "weighted_n": 4 + } + } + } +} diff --git a/tests/fixtures/numeric-measures-x-cat.json b/tests/fixtures/numeric-measures-x-cat.json new file mode 100644 index 000000000..6b033b0d0 --- /dev/null +++ b/tests/fixtures/numeric-measures-x-cat.json @@ -0,0 +1,190 @@ +{ + "query": { + "measures": { + "valid_count_unweighted": { + "function": "cube_valid_count", + "args": [ + { + "variable": "6b1df04bb3644ec88998b638a1ff9968" + } + ] + }, + "sum": { + "function": "cube_sum", + "args": [ + { + "variable": "6b1df04bb3644ec88998b638a1ff9968" + } + ] + }, + "mean": { + "function": "cube_mean", + "args": [ + { + "variable": "6b1df04bb3644ec88998b638a1ff9968" + } + ] + } + }, + "dimensions": [ + { + "variable": "8b1301ab6a254f34992e30f07d652775" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": false, + "references": { + "alias": "Gender", + "name": "Gender" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "id": 1, + "name": "Male", + "missing": false + }, + { + "numeric_value": null, + "id": 2, + "name": "Female", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 0, + "measures": { + "valid_count_unweighted": { + "data": [ + 3, + 2, + 0 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies" + }, + "type": { + "integer": false, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + }, + "sum": { + "data": [ + 8.0, + 7.0, + 0.0 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies" + }, + "type": { + "integer": null, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + }, + "mean": { + "data": [ + 2.6666666667, + 3.5, + { + "?": -8 + } + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies" + }, + "type": { + "integer": true, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1, + "NaN": -8 + } + } + } + } + }, + "n": 5, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 5, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 5, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 5, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 5, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 5, + "weighted_n": 5 + }, + "filtered": { + "unweighted_n": 5, + "weighted_n": 5 + }, + "counts": [ + 3, + 2, + 0 + ], + "element": "crunch:cube" + } +} diff --git a/tests/fixtures/numeric_arrays/num-arr-sum-grouped-by-cat.json b/tests/fixtures/numeric_arrays/num-arr-sum-grouped-by-cat.json new file mode 100644 index 000000000..4989fdfbf --- /dev/null +++ b/tests/fixtures/numeric_arrays/num-arr-sum-grouped-by-cat.json @@ -0,0 +1,217 @@ +{ + "element": "shoji:view", + "value": { + "query": { + "measures": { + "valid_count_unweighted": { + "function": "cube_valid_count", + "args": [ + { + "variable": "bcfba831c5d8480c815d4b08f70bbd4e" + } + ] + }, + "sum": { + "function": "cube_sum", + "args": [ + { + "variable": "bcfba831c5d8480c815d4b08f70bbd4e" + } + ] + } + }, + "dimensions": [ + { + "variable": "9a82ac0a10764b49be5f2bafefe7a0f9" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": false, + "references": { + "alias": "Gender", + "name": "Gender" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "id": 1, + "name": "Male", + "missing": false + }, + { + "numeric_value": null, + "id": 2, + "name": "Female", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 0, + "measures": { + "valid_count_unweighted": { + "data": [ + 3, + 3, + 3, + 2, + 2, + 2, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Ticket sold", + "uniform_basis": false, + "subreferences": [ + { + "alias": "Dark Knight", + "name": "Dark Knight" + }, + { + "alias": "Fight Club", + "name": "Fight Club" + }, + { + "alias": "Meets the Par.", + "name": "Meets the Par." + } + ], + "name": "Ticket", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "integer": false, + "subvariables": [ + "S1", + "S2", + "S3" + ], + "class": "numeric", + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + } + }, + "sum": { + "data": [ + 4, + 3, + 2, + 3, + 0, + 3, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Ticket sold", + "uniform_basis": false, + "subreferences": [ + { + "alias": "Dark Knight", + "name": "Dark Knight" + }, + { + "alias": "Fight Club", + "name": "Fight Club" + }, + { + "alias": "Meets the Par.", + "name": "Meets the Par." + } + ], + "name": "Ticket", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "integer": null, + "subvariables": [ + "S1", + "S2", + "S3" + ], + "class": "numeric", + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + } + } + }, + "n": 5, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 5, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 5, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 5, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 5, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 5, + "weighted_n": 5 + }, + "filtered": { + "unweighted_n": 5, + "weighted_n": 5 + }, + "counts": [ + 3, + 2, + 0 + ], + "element": "crunch:cube" + } + } +} diff --git a/tests/fixtures/sum-cat-x-mr.json b/tests/fixtures/sum-cat-x-mr.json new file mode 100644 index 000000000..3446a9ccb --- /dev/null +++ b/tests/fixtures/sum-cat-x-mr.json @@ -0,0 +1,386 @@ +{ + "query": { + "measures": { + "valid_count_unweighted": { + "function": "cube_valid_count", + "args": [ + { + "variable": "abdff06f5dfd42d09478cc535fc484a9" + } + ] + }, + "sum": { + "function": "cube_sum", + "args": [ + { + "variable": "abdff06f5dfd42d09478cc535fc484a9" + } + ] + } + }, + "dimensions": [ + { + "variable": "6H7ZxHgHP71rIt0g2dsVJ6000006" + }, + { + "function": "dimension", + "args": [ + { + "function": "as_selected", + "args": [ + { + "variable": "727cf07595d848bbb68ec59f3307362a" + } + ] + }, + { + "value": "subvariables" + } + ] + }, + { + "function": "as_selected", + "args": [ + { + "variable": "727cf07595d848bbb68ec59f3307362a" + } + ] + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "references": { + "alias": "cat", + "name": "cat", + "description": null + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "id": 1, + "name": "a", + "missing": false + }, + { + "numeric_value": null, + "id": 2, + "name": "b", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "X", + "name": "X", + "description": null + }, + { + "alias": "Y", + "name": "Y", + "description": null + }, + { + "alias": "Z", + "name": "Z", + "description": null + } + ], + "uniform_basis": false, + "name": "M", + "alias": "M" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "references": { + "alias": "X", + "name": "X", + "description": null + }, + "derived": false, + "id": "6H7ZxHgHP71rIt0g2dsVJ6000000" + }, + "missing": false + }, + { + "id": 2, + "value": { + "references": { + "alias": "Y", + "name": "Y", + "description": null + }, + "derived": false, + "id": "6H7ZxHgHP71rIt0g2dsVJ6000001" + }, + "missing": false + }, + { + "id": 3, + "value": { + "references": { + "alias": "Z", + "name": "Z", + "description": null + }, + "derived": false, + "id": "6H7ZxHgHP71rIt0g2dsVJ6000002" + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "references": { + "subreferences": [ + { + "alias": "X", + "name": "X", + "description": null + }, + { + "alias": "Y", + "name": "Y", + "description": null + }, + { + "alias": "Z", + "name": "Z", + "description": null + } + ], + "uniform_basis": false, + "name": "M", + "alias": "M" + }, + "derived": true, + "type": { + "ordinal": false, + "subvariables": [ + "6H7ZxHgHP71rIt0g2dsVJ6000000", + "6H7ZxHgHP71rIt0g2dsVJ6000001", + "6H7ZxHgHP71rIt0g2dsVJ6000002" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "name": "Selected", + "missing": false + }, + { + "numeric_value": 0, + "id": 0, + "name": "Other", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 1, + "measures": { + "valid_count_unweighted": { + "data": [ + 2, + 0, + 0, + 1, + 1, + 0, + 1, + 1, + 0, + 0, + 1, + 1, + 0, + 1, + 1, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 1, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "integer": false, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + }, + "sum": { + "data": [ + 3, + 0, + 0, + 2, + 1, + 0, + 2, + 1, + 0, + 0, + 2, + 3, + 0, + 2, + 3, + 0, + 2, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 1, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "integer": null, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + } + }, + "n": 4, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 4, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 4, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 4, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 4, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "filtered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "counts": [ + 2, + 0, + 0, + 1, + 1, + 0, + 1, + 1, + 0, + 0, + 1, + 1, + 0, + 1, + 1, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "element": "crunch:cube" + } +} diff --git a/tests/fixtures/sum-mr-x-cat.json b/tests/fixtures/sum-mr-x-cat.json new file mode 100644 index 000000000..d2866486f --- /dev/null +++ b/tests/fixtures/sum-mr-x-cat.json @@ -0,0 +1,386 @@ +{ + "query": { + "measures": { + "valid_count_unweighted": { + "function": "cube_valid_count", + "args": [ + { + "variable": "4d2dbd48338b45e3abb7f0f64975f0c9" + } + ] + }, + "sum": { + "function": "cube_sum", + "args": [ + { + "variable": "4d2dbd48338b45e3abb7f0f64975f0c9" + } + ] + } + }, + "dimensions": [ + { + "function": "dimension", + "args": [ + { + "function": "as_selected", + "args": [ + { + "variable": "0d85b19293704f48bf991358f1fb3a7f" + } + ] + }, + { + "value": "subvariables" + } + ] + }, + { + "function": "as_selected", + "args": [ + { + "variable": "0d85b19293704f48bf991358f1fb3a7f" + } + ] + }, + { + "variable": "6zVdSaG9ijnwgEIxTO6lo1000006" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "X", + "name": "X", + "description": null + }, + { + "alias": "Y", + "name": "Y", + "description": null + }, + { + "alias": "Z", + "name": "Z", + "description": null + } + ], + "uniform_basis": false, + "name": "M", + "alias": "M" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "references": { + "alias": "X", + "name": "X", + "description": null + }, + "derived": false, + "id": "6zVdSaG9ijnwgEIxTO6lo1000000" + }, + "missing": false + }, + { + "id": 2, + "value": { + "references": { + "alias": "Y", + "name": "Y", + "description": null + }, + "derived": false, + "id": "6zVdSaG9ijnwgEIxTO6lo1000001" + }, + "missing": false + }, + { + "id": 3, + "value": { + "references": { + "alias": "Z", + "name": "Z", + "description": null + }, + "derived": false, + "id": "6zVdSaG9ijnwgEIxTO6lo1000002" + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "references": { + "subreferences": [ + { + "alias": "X", + "name": "X", + "description": null + }, + { + "alias": "Y", + "name": "Y", + "description": null + }, + { + "alias": "Z", + "name": "Z", + "description": null + } + ], + "uniform_basis": false, + "name": "M", + "alias": "M" + }, + "derived": true, + "type": { + "ordinal": false, + "subvariables": [ + "6zVdSaG9ijnwgEIxTO6lo1000000", + "6zVdSaG9ijnwgEIxTO6lo1000001", + "6zVdSaG9ijnwgEIxTO6lo1000002" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "name": "Selected", + "missing": false + }, + { + "numeric_value": 0, + "id": 0, + "name": "Other", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + }, + { + "references": { + "alias": "cat", + "name": "cat", + "description": null + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "id": 1, + "name": "a", + "missing": false + }, + { + "numeric_value": null, + "id": 2, + "name": "b", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 1, + "measures": { + "valid_count_unweighted": { + "data": [ + 2, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 0 + ], + "n_missing": 1, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "integer": false, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + }, + "sum": { + "data": [ + 3, + 0, + 0, + 0, + 2, + 0, + 0, + 3, + 0, + 2, + 0, + 0, + 1, + 2, + 0, + 0, + 3, + 0, + 2, + 0, + 0, + 1, + 2, + 0, + 0, + 3, + 0 + ], + "n_missing": 1, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "integer": null, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + } + }, + "n": 4, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 4, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 4, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 4, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 4, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "filtered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "counts": [ + 2, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 0 + ], + "element": "crunch:cube" + } +} diff --git a/tests/integration/test_cube.py b/tests/integration/test_cube.py index f2f643f87..3fce0fa8b 100644 --- a/tests/integration/test_cube.py +++ b/tests/integration/test_cube.py @@ -35,7 +35,6 @@ def it_provides_values_for_cat_x_cat(self): assert cube.description == "Pet Owners" assert cube.dimension_types == (DT.CAT, DT.CAT) assert isinstance(cube.dimensions, _ApparentDimensions) - assert cube.has_means is False assert cube.is_weighted is False assert cube.missing == 5 assert cube.name == "v4" @@ -274,7 +273,31 @@ def it_handles_means_cat_hs_x_cat_hs(self): def it_knows_if_it_has_means(self): slice_ = Cube(CR.MEANS_CAT_HS_X_CAT_HS).partitions[0] - assert slice_.has_means + assert slice_.means.any() + + +class DescribeIntegrated_SumMeasure(object): + def it_provides_sum_measure_for_CAT(self): + cube = Cube(CR.CAT_SUM) + partition = cube.partitions[0] + + np.testing.assert_array_equal(partition.sum, [88.0, 77.0]) + np.testing.assert_array_equal(partition.table_base_range, [5, 5]) + + def it_provides_sum_measure_for_MR(self): + cube = Cube(CR.MR_SUM) + partition = cube.partitions[0] + + np.testing.assert_array_almost_equal(partition.sum, [3.0, 2.0, 2.0]) + np.testing.assert_array_almost_equal(partition.table_base_range, [3, 3]) + + def it_provides_sum_and_mean_measure_for_CAT(self): + cube = Cube(CR.NUMERIC_MEASURES_X_CAT) + partition = cube.partitions[0] + + np.testing.assert_array_almost_equal(partition.means, [2.66666667, 3.5]) + np.testing.assert_array_almost_equal(partition.sum, [8, 7]) + np.testing.assert_array_almost_equal(partition.counts, [3, 2]) class DescribeIntegrated_UnweightedCountMeasure(object): diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index dc38f18de..233f7c696 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -74,6 +74,19 @@ def it_provides_values_for_cat_x_cat(self): ] assert slice_.unweighted_counts.tolist() == [[5, 2], [5, 3]] assert slice_.variable_name == "v7" + # A cube without means or sum available in the response throws an exception. + with pytest.raises(ValueError) as e: + slice_.means + assert ( + str(e.value) + == "`.means` is undefined for a cube-result without a means measure" + ) + with pytest.raises(ValueError) as e: + slice_.sum + assert ( + str(e.value) + == "`.sum` is undefined for a cube-result without a sum measure" + ) def it_provides_values_for_cat_hs_mt_x_cat_hs_mt(self): slice_ = Cube(CR.CAT_HS_MT_X_CAT_HS_MT, population=1000).partitions[0] @@ -322,9 +335,9 @@ def it_provides_values_for_mean_cat_x_cat_hs(self): slice_.means, np.array([[24.43935757, 37.32122746, np.nan, 55.48571956, 73.02427659]]), ) - np.testing.assert_array_almost_equal(slice_.rows_margin, np.array([np.nan])) + np.testing.assert_array_almost_equal(slice_.rows_margin, np.array([1500.0])) np.testing.assert_array_almost_equal( - slice_.columns_margin, np.array([np.nan] * len(slice_.counts[0, :])) + slice_.columns_margin, np.array([189, 395, 584, 606, 310]) ) @pytest.mark.skip(reason="Needs change to Cube.counts (and add Cube.means)") @@ -831,7 +844,6 @@ def it_provides_values_for_univariate_cat(self): assert strand.counts.tolist() == [10, 5] assert strand.cube_index == 0 assert strand.dimension_types == (DT.CAT,) - assert strand.has_means is False assert strand.inserted_row_idxs == () assert strand.is_empty is False with pytest.raises(ValueError) as e: @@ -839,6 +851,11 @@ def it_provides_values_for_univariate_cat(self): assert str(e.value) == ( "`.means` is undefined for a cube-result without a means measure" ) + with pytest.raises(ValueError) as e: + strand.sum + assert str(e.value) == ( + "`.sum` is undefined for a cube-result without a sum measure" + ) assert strand.min_base_size_mask.tolist() == [False, False] assert strand.name == "v7" assert strand.ndim == 1 @@ -892,10 +909,9 @@ def it_provides_values_for_univariate_cat_means_and_counts(self): assert strand.shape == (4,) assert strand.table_base_range.tolist() == [1628, 1628] # --- means cube that also has counts has a table-margin --- - # TODO: This should be something like [2685.782, 2685.782] because weighted - # cube_counts are present in the payload. It's NaN here because there's - # currently no way to get both means and counts from the same Cube object. - assert strand.table_margin_range == pytest.approx([np.nan, np.nan], nan_ok=True) + assert strand.table_margin_range == pytest.approx( + [16029.22309748, 16029.22309748] + ) @pytest.mark.xfail(reason="NumArr", raises=AssertionError, strict=True) # --- remove this stub test once this is fixed. This assertion will live in the test diff --git a/tests/integration/test_matrix.py b/tests/integration/test_matrix.py index 3441f8f1c..b7775acd4 100644 --- a/tests/integration/test_matrix.py +++ b/tests/integration/test_matrix.py @@ -985,7 +985,7 @@ def it_computes_columns_margin_for_cat_hs_x_cat_hs_hiddens_explicit_order(self): def it_computes_means_cat_x_cat_columns_margin(self): slice_ = Cube(CR.MEANS_CAT_HS_X_CAT_HS).partitions[0] np.testing.assert_almost_equal( - slice_.columns_margin, np.array([np.nan, np.nan, np.nan, np.nan, np.nan]) + slice_.columns_margin, np.array([431, 494, 294, 1219, 433]) ) def it_computes_cat_x_mr_columns_margin(self): @@ -1048,7 +1048,7 @@ def it_computes_means_cat_x_cat_rows_margin(self): slice_ = Cube(CR.MEANS_CAT_HS_X_CAT_HS).partitions[0] np.testing.assert_almost_equal( slice_.rows_margin, - np.array([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]), + np.array([55, 126, 613, 710, 310, 400, 148]), ) def it_computes_cat_x_mr_rows_margin(self): @@ -1486,6 +1486,14 @@ def it_computes_the_sort_by_value_row_order_to_help( assert assembler._row_order.tolist() == expected_value + def it_computes_sum_cat_x_mr(self): + slice_ = Cube(CR.SUM_CAT_X_MR).partitions[0] + np.testing.assert_almost_equal(slice_.sum, [[3.0, 2.0, 2.0], [0.0, 0.0, 0.0]]) + + def it_computes_sum_mr_x_cat(self): + slice_ = Cube(CR.SUM_MR_X_CAT).partitions[0] + np.testing.assert_almost_equal(slice_.sum, [[3.0, 0.0], [2.0, 0.0], [2.0, 0.0]]) + class Describe_BaseOrderHelper(object): """Integration-test suite for `cr.cube.matrix._BaseOrderHelper`.""" diff --git a/tests/integration/test_numeric_array.py b/tests/integration/test_numeric_array.py index 5fb0daffc..86aed1831 100644 --- a/tests/integration/test_numeric_array.py +++ b/tests/integration/test_numeric_array.py @@ -10,10 +10,10 @@ from ..fixtures import NA -class TestNumericArrays: +class TestNumericArrays(object): """Test-suite for numeric-array behaviors.""" - def test_num_arr_scale_measures(self): + def test_num_arr_means_scale_measures(self): slice_ = Cube(NA.NUM_ARR_MEANS_SCALE_MEASURES).partitions[0] np.testing.assert_array_almost_equal( @@ -28,10 +28,10 @@ def test_num_arr_scale_measures(self): ], ) np.testing.assert_array_almost_equal( - slice_.rows_scale_median, [4.0, 3.0, 3.0, 4.0] + slice_.rows_scale_median, [3.0, 3.0, 3.0, 3.0] ) - def test_num_arr_grouped_by_cat(self): + def test_num_arr_means_grouped_by_cat(self): """Test means on numeric array, grouped by single categorical dimension.""" slice_ = Cube(NA.NUM_ARR_MEANS_GROUPED_BY_CAT).partitions[0] @@ -46,7 +46,7 @@ def test_num_arr_grouped_by_cat(self): ) np.testing.assert_almost_equal(slice_.columns_base, [[3, 2], [3, 1], [1, 1]]) - def test_num_arr_grouped_by_date(self): + def test_num_arr_means_grouped_by_date(self): """Test means on numeric array, grouped by single categorical dimension.""" slice_ = Cube(NA.NUM_ARR_MEANS_GROUPED_BY_DATE).partitions[0] @@ -61,7 +61,7 @@ def test_num_arr_grouped_by_date(self): ) np.testing.assert_almost_equal(slice_.columns_base, [[10, 9], [8, 10], [9, 10]]) - def test_num_arr_grouped_by_cat_weighted(self): + def test_num_arr_means_grouped_by_cat_weighted(self): """Test means on numeric array, grouped by single categorical dimension.""" slice_ = Cube(NA.NUM_ARR_MEANS_GROUPED_BY_CAT_WEIGHTED).partitions[0] @@ -77,7 +77,7 @@ def test_num_arr_grouped_by_cat_weighted(self): ) np.testing.assert_almost_equal(slice_.columns_base, [[3, 2], [3, 1], [1, 1]]) - def test_num_arr_x_mr(self): + def test_num_arr_means_x_mr(self): slice_ = Cube(NA.NUM_ARR_MEANS_X_MR).partitions[0] np.testing.assert_almost_equal( @@ -95,14 +95,27 @@ def test_num_arr_x_mr(self): slice_.columns_base, [[38, 14, 6, 18, 38], [38, 14, 6, 18, 38]] ) - @pytest.mark.xfail(reason="NumArray problems", raises=AssertionError, strict=True) def test_num_arr_means_no_grouping(self): """Test means on no-dimensions measure of numeric array.""" strand = Cube(NA.NUM_ARR_MEANS_NO_GROUPING).partitions[0] assert strand.means == pytest.approx([2.5, 25.0]) assert strand.unweighted_counts.tolist() == [6, 6] - # --- unweighted-bases (and therefore table_base_range) fails because num-array - # --- base is computed differently it looks like assert strand.unweighted_bases.tolist() == [6, 6] assert strand.table_base_range.tolist() == [6, 6] + + def test_num_arr_sum_grouped_by_cat(self): + """Test sum on numeric array, grouped by single categorical dimension.""" + slice_ = Cube(NA.NUM_ARR_SUM_GROUPED_BY_CAT).partitions[0] + + np.testing.assert_almost_equal( + slice_.sum, + [ + # --------Gender------------ + # M F + [4.0, 3.0], # S1 (Ticket Sold) + [3.0, 0.0], # S2 (Ticket Sold) + [2.0, 3.0], # S3 (Ticket Sold) + ], + ) + np.testing.assert_almost_equal(slice_.columns_base, [[3, 2], [3, 2], [3, 2]]) From ffde4f0f441714d2e813c76ef680cd540061e917 Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Tue, 2 Mar 2021 13:58:36 +0100 Subject: [PATCH 02/10] fix xfail and skip marked test --- tests/integration/test_cubepart.py | 26 ++++++++++---------------- tests/integration/test_stripe.py | 4 +++- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index 233f7c696..c52eb5583 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -340,16 +340,15 @@ def it_provides_values_for_mean_cat_x_cat_hs(self): slice_.columns_margin, np.array([189, 395, 584, 606, 310]) ) - @pytest.mark.skip(reason="Needs change to Cube.counts (and add Cube.means)") - # --- this fails because `Cube.counts` returns the _means_ measure (and not the - # --- cube-count measure, as it should now). This needs to be fixed, including - # --- resolving whether we should continue to return NaNs or raise an exception (as - # --- would be the general behavior when requesting a measure on a cube that lacks - # --- cube-measure its computation is based on. def but_it_has_no_counts_because_there_is_no_cube_count_measure(self): slice_ = Cube(CR.MEANS_CAT_X_CAT_HS).partitions[0] - assert slice_.counts == pytest.approx( - np.array([[np.nan, np.nan, np.nan, np.nan, np.nan]]), nan_ok=True + + # This fixture has both cube_counts and cube_means measure, for this reason + # both measures are available at cubepart level. + assert slice_.counts == pytest.approx(np.array([[189, 395, 584, 606, 310]])) + assert slice_.means == pytest.approx( + np.array([[24.43935757, 37.32122746, np.nan, 55.48571956, 73.02427659]]), + nan_ok=True, ) def it_provides_values_for_mr_x_cat_hs(self): @@ -913,18 +912,13 @@ def it_provides_values_for_univariate_cat_means_and_counts(self): [16029.22309748, 16029.22309748] ) - @pytest.mark.xfail(reason="NumArr", raises=AssertionError, strict=True) - # --- remove this stub test once this is fixed. This assertion will live in the test - # --- immediately above, once fixed there. def it_provides_table_margin_range_for_univariate_cat_means_and_counts(self): """The cube_mean and cube_count measures can appear together.""" strand = Cube(CR.CAT_MEANS_AND_COUNTS).partitions[0] - # --- means cube that also has counts has a table-margin-range --- - # TODO: This should be something like [2685.782, 2685.782] because weighted - # cube_counts are present in the payload. It's NaN here because there's - # currently no way to get both means and counts from the same Cube object. - assert strand.table_margin_range == pytest.approx([2685.782, 2685.782]) + # for a cube with numeric measure like mean, table margin are calculated on the + # counts and not on the means. + assert strand.table_margin_range == pytest.approx([16029.223097, 16029.223097]) def it_provides_values_for_univariate_datetime(self): strand = Cube(CR.DATE, population=9001).partitions[0] diff --git a/tests/integration/test_stripe.py b/tests/integration/test_stripe.py index 7b5bd16ca..bde7c1e32 100644 --- a/tests/integration/test_stripe.py +++ b/tests/integration/test_stripe.py @@ -62,7 +62,9 @@ def it_provides_values_for_univariate_cat_means(self): assert assembler.scale_stddev is None assert assembler.scale_stderr is None assert assembler.table_base_range == pytest.approx([661, 661]) - assert assembler.table_margin_range == pytest.approx([86.4992, 86.4992]) + # for a cube with numeric measure like mean, table margin and table base are the + # same because they are both calculated on the counts. + assert assembler.table_margin_range == pytest.approx([661, 661]) def it_provides_values_for_univariate_mr(self): cube = Cube(CR.MR_WGTD) From 4aa3300ef47fae4eabfa2f7291ec9bd66e2948d9 Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Tue, 2 Mar 2021 15:37:29 +0100 Subject: [PATCH 03/10] refactor cubemeasure and its related tests --- src/cr/cube/matrix/cubemeasure.py | 16 +++ tests/unit/matrix/test_cubemeasure.py | 194 ++++++++------------------ 2 files changed, 71 insertions(+), 139 deletions(-) diff --git a/src/cr/cube/matrix/cubemeasure.py b/src/cr/cube/matrix/cubemeasure.py index 8dd8fe7d6..e936387e1 100644 --- a/src/cr/cube/matrix/cubemeasure.py +++ b/src/cr/cube/matrix/cubemeasure.py @@ -1237,6 +1237,10 @@ def columns_pruning_base(self): @lazyproperty def means(self): """2D np.float64 ndarray of mean for each valid matrix cell.""" + if self._means is None: + raise ValueError( + "`.means` is undefined for a cube-result without a means measure" + ) return self._means[:, :, 0] @lazyproperty @@ -1273,6 +1277,10 @@ def rows_pruning_base(self): @lazyproperty def sum(self): """2D np.float64 ndarray of mean for each valid matrix cell.""" + if self._sum is None: + raise ValueError( + "`.sum` is undefined for a cube-result without a sum measure" + ) return self._sum[:, :, 0] @lazyproperty @@ -1419,6 +1427,10 @@ def columns_pruning_base(self): @lazyproperty def means(self): """2D np.float64 ndarray of mean for each valid matrix cell.""" + if self._means is None: + raise ValueError( + "`.means` is undefined for a cube-result without a means measure" + ) return self._means[:, 0, :] @lazyproperty @@ -1450,6 +1462,10 @@ def rows_pruning_base(self): @lazyproperty def sum(self): """2D np.float64 ndarray of sum for each valid matrix cell.""" + if self._sum is None: + raise ValueError( + "`.sum` is undefined for a cube-result without a sum measure" + ) return self._sum[:, 0, :] @lazyproperty diff --git a/tests/unit/matrix/test_cubemeasure.py b/tests/unit/matrix/test_cubemeasure.py index 6bca86cd9..469bbcc73 100644 --- a/tests/unit/matrix/test_cubemeasure.py +++ b/tests/unit/matrix/test_cubemeasure.py @@ -14,16 +14,13 @@ _BaseUnweightedCubeCounts, _BaseWeightedCubeCounts, _CatXCatMatrix, - _CatXCatMeansMatrix, _CatXCatUnweightedCubeCounts, _CatXCatWeightedCubeCounts, _CatXMrMatrix, - _CatXMrMeansMatrix, _CatXMrUnweightedCubeCounts, _CatXMrWeightedCubeCounts, CubeMeasures, _MrXCatMatrix, - _MrXCatMeansMatrix, _MrXCatUnweightedCubeCounts, _MrXCatWeightedCubeCounts, _MrXMrMatrix, @@ -926,29 +923,6 @@ def raw_weighted_counts(self, request): class DescribeBaseCubeResultMatrix(object): """Unit test suite for `cr.cube.matrix.cubemeasure.BaseCubeResultMatrix` object.""" - @pytest.mark.parametrize( - "has_means, factory_method_name", - ((True, "_means_matrix_factory"), (False, "_regular_matrix_factory")), - ) - def it_calls_the_correct_factory_method_for_appropriate_matrix_type( - self, request, cube_, dimension_, has_means, factory_method_name - ): - cube_.has_means = has_means - cube_result_matrix_ = instance_mock(request, BaseCubeResultMatrix) - factory_method = method_mock( - request, - BaseCubeResultMatrix, - factory_method_name, - return_value=cube_result_matrix_, - ) - - cube_result_matrix = BaseCubeResultMatrix.factory( - cube_, (dimension_, dimension_), slice_idx=71 - ) - - factory_method.assert_called_once_with(cube_, (dimension_, dimension_), 71) - assert cube_result_matrix is cube_result_matrix_ - def it_knows_its_column_proportions(self, request): property_mock( request, @@ -1005,70 +979,6 @@ def but_it_produces_zero_valued_zscores_for_a_deficient_matrix(self): assert residuals.tolist() == [[0, 0], [0, 0]] - @pytest.mark.parametrize( - "dimension_types, matrix_class_name", - ( - ((DT.MR, DT.CAT), "_MrXCatMeansMatrix"), - ((DT.CAT, DT.MR), "_CatXMrMeansMatrix"), - ((DT.CAT, DT.CAT), "_CatXCatMeansMatrix"), - ), - ) - def it_can_construct_a_means_matrix_for_a_2D_slice_to_help( - self, request, cube_, dimension_types, dimension_, matrix_class_name - ): - cube_.dimension_types = dimension_types - cube_.ndim = 2 - cube_.counts = [1, 2, 3, 4] - cube_.unweighted_counts = [5, 6, 7, 8] - MatrixCls_ = class_mock( - request, "cr.cube.matrix.cubemeasure.%s" % matrix_class_name - ) - - matrix = BaseCubeResultMatrix._means_matrix_factory( - cube_, (dimension_, dimension_), None - ) - - MatrixCls_.assert_called_once_with( - (dimension_, dimension_), [1, 2, 3, 4], [5, 6, 7, 8] - ) - assert matrix is MatrixCls_.return_value - - @pytest.mark.parametrize( - "dimension_types, matrix_class_name", - ( - ((None, DT.MR, DT.CAT), "_MrXCatMeansMatrix"), - ((None, DT.CAT, DT.MR), "_CatXMrMeansMatrix"), - ((None, DT.CAT, DT.CAT), "_CatXCatMeansMatrix"), - ), - ) - def and_it_can_construct_a_means_matrix_for_a_3D_slice_to_help( - self, request, cube_, dimension_types, dimension_, matrix_class_name - ): - cube_.dimension_types = dimension_types - cube_.ndim = 3 - cube_.counts = [None, [1, 2, 3, 4], None] - cube_.unweighted_counts = [None, [5, 6, 7, 8], None] - MatrixCls_ = class_mock( - request, "cr.cube.matrix.cubemeasure.%s" % matrix_class_name - ) - - matrix = BaseCubeResultMatrix._means_matrix_factory( - cube_, (dimension_, dimension_), slice_idx=1 - ) - - MatrixCls_.assert_called_once_with( - (dimension_, dimension_), [1, 2, 3, 4], [5, 6, 7, 8] - ) - assert matrix is MatrixCls_.return_value - - def but_it_raises_on_MEANS_MR_X_MR(self, cube_): - cube_.dimension_types = (DT.MR, DT.MR) - - with pytest.raises(NotImplementedError) as e: - BaseCubeResultMatrix._means_matrix_factory(cube_, None, None) - - assert str(e.value) == "MR x MR with means is not implemented" - @pytest.mark.parametrize( "dimension_types, expected_value", ( @@ -1110,7 +1020,7 @@ def it_knows_its_regular_matrix_counts_slice_to_help( cube_.dimension_types = dim_types cube_.ndim = len(dim_types) - s = BaseCubeResultMatrix._regular_matrix_counts_slice(cube_, slice_idx) + s = BaseCubeResultMatrix._regular_matrix_slice(cube_, slice_idx) assert s == expected @@ -1123,7 +1033,7 @@ def it_knows_its_regular_matrix_counts_slice_to_help( "_MrXMrMatrix", ), ) - def it_can_construct_a_regular_matrix_to_help( + def it_can_construct_a_matrix_to_help( self, request, cube_, dimension_, matrix_class_name ): cube_.dimension_types = (DT.CAT, DT.MR, DT.CAT) @@ -1143,7 +1053,7 @@ def it_can_construct_a_regular_matrix_to_help( return_value=([[1], [2]], [[3], [4]]), ) - matrix = BaseCubeResultMatrix._regular_matrix_factory( + matrix = BaseCubeResultMatrix.factory( cube_, (dimension_, dimension_), slice_idx=17 ) @@ -1172,14 +1082,14 @@ def it_knows_its_sliced_counts_to_help( _regular_matrix_counts_slice = method_mock( request, BaseCubeResultMatrix, - "_regular_matrix_counts_slice", + "_regular_matrix_slice", return_value=counts_slice, ) sliced_counts = BaseCubeResultMatrix._sliced_counts(cube_, slice_idx=23) _regular_matrix_counts_slice.assert_called_once_with(cube_, 23) - counts, unweighted, with_missing = sliced_counts + counts, unweighted, with_missing, means, sum = sliced_counts assert counts.tolist() == expected assert unweighted.tolist() == expected @@ -1372,6 +1282,22 @@ def it_knows_its_table_proportion_variances_to_help(self, request): np.array([[0.0, 0.0622222, 0.1155556], [0.16, 0.1955556, 0.2222222]]), ) + def it_knows_its_means(self): + cube_means = np.array([[2, 3, 1], [5, 6, 4]]) + matrix = _CatXCatMatrix(None, None, None, means=cube_means) + + assert matrix.means.tolist() == [[2, 3, 1], [5, 6, 4]] + + def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): + matrix = _CatXCatMatrix(None, None, None, means=None) + with pytest.raises(ValueError) as e: + matrix.means + + assert ( + str(e.value) + == "`.means` is undefined for a cube-result without a means measure" + ) + class Describe_CatXMrMatrix(object): """Unit test suite for `cr.cube.matrix._CatXMrMatrix` object.""" @@ -1531,6 +1457,23 @@ def it_knows_its_table_proportion_variances_to_help(self, request): np.array([[0.0, 0.0826446, 0.1155556], [0.244898, 0.231405, 0.2222222]]), ) + def it_knows_its_means(self): + means = np.array([[[1, 6], [2, 5], [3, 4]], [[5, 3], [6, 2], [7, 1]]]) + np.testing.assert_equal( + _CatXMrMatrix(None, None, None, means=means).means, + np.array([[1, 2, 3], [5, 6, 7]]), + ) + + def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): + matrix = _CatXMrMatrix(None, None, None, means=None) + with pytest.raises(ValueError) as e: + matrix.means + + assert ( + str(e.value) + == "`.means` is undefined for a cube-result without a means measure" + ) + class Describe_MrXCatMatrix(object): """Unit test suite for `cr.cube.matrix._MrXCatMatrix` object.""" @@ -1744,6 +1687,23 @@ def it_knows_its_table_proportion_variances_to_help(self, request): np.array([[0.0, 0.0622222, 0.1155556], [0.1038062, 0.118416, 0.1322568]]), ) + def it_knows_its_means(self): + means = np.arange(24).reshape(3, 2, 4) + np.testing.assert_equal( + _MrXCatMatrix(None, None, None, means=means).means, + np.array([[0, 1, 2, 3], [8, 9, 10, 11], [16, 17, 18, 19]]), + ) + + def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): + matrix = _MrXCatMatrix(None, None, None, means=None) + with pytest.raises(ValueError) as e: + matrix.means + + assert ( + str(e.value) + == "`.means` is undefined for a cube-result without a means measure" + ) + class Describe_MrXMrMatrix(object): """Unit test suite for `cr.cube.matrix._MrXMrMatrix` object.""" @@ -2023,47 +1983,3 @@ def it_knows_its_table_proportion_variances_to_help(self, request): _MrXMrMatrix(None, weighted_counts, None)._table_proportion_variances, np.array([[0.0, 0.0826446, 0.1155556], [0.1560874, 0.16, 0.1630506]]), ) - - -# === LEGACY MEANS MATRIX OBJECTS === - - -class Describe_CatXCatMeansMatrix(object): - """Unit test suite for `cr.cube.matrix._CatXCatMeansMatrix` object.""" - - def it_knows_its_means(self): - cube_means = np.array([[2, 3, 1], [5, 6, 4]]) - matrix = _CatXCatMeansMatrix(None, cube_means, None) - - assert matrix.means.tolist() == [[2, 3, 1], [5, 6, 4]] - - def it_knows_its_weighted_counts(self): - cube_means = np.array([[3, 2, 1], [6, 5, 4]]) - matrix = _CatXCatMeansMatrix(None, cube_means, None) - - np.testing.assert_equal( - matrix.weighted_counts, - [[np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]], - ) - - -class Describe_CatXMrMeansMatrix(object): - """Unit test suite for `cr.cube.matrix._CatXMrMeansMatrix` object.""" - - def it_knows_its_means(self): - means = np.array([[[1, 6], [2, 5], [3, 4]], [[5, 3], [6, 2], [7, 1]]]) - np.testing.assert_equal( - _CatXMrMeansMatrix(None, means, None).means, - np.array([[1, 2, 3], [5, 6, 7]]), - ) - - -class Describe_MrXCatMeansMatrix(object): - """Unit test suite for `cr.cube.matrix._MrXCatMeansMatrix` object.""" - - def it_knows_its_means(self): - means = np.arange(24).reshape(3, 2, 4) - np.testing.assert_equal( - _MrXCatMeansMatrix(None, means, None).means, - np.array([[0, 1, 2, 3], [8, 9, 10, 11], [16, 17, 18, 19]]), - ) From b9d69032604bb82388f2187fbebef906fefa25d2 Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Tue, 2 Mar 2021 15:49:03 +0100 Subject: [PATCH 04/10] test: unit test for cubemeasure sum --- tests/unit/matrix/test_cubemeasure.py | 66 +++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/tests/unit/matrix/test_cubemeasure.py b/tests/unit/matrix/test_cubemeasure.py index 469bbcc73..0462e490e 100644 --- a/tests/unit/matrix/test_cubemeasure.py +++ b/tests/unit/matrix/test_cubemeasure.py @@ -1298,6 +1298,22 @@ def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): == "`.means` is undefined for a cube-result without a means measure" ) + def it_knows_its_sum(self): + cube_sum = np.array([[4, 3, 1], [5, 9, 4]]) + matrix = _CatXCatMatrix(None, None, None, sum=cube_sum) + + assert matrix.sum.tolist() == [[4, 3, 1], [5, 9, 4]] + + def but_it_raises_value_error_when_the_cube_result_does_not_contain_sum(self): + matrix = _CatXCatMatrix(None, None, None, sum=None) + with pytest.raises(ValueError) as e: + matrix.sum + + assert ( + str(e.value) + == "`.sum` is undefined for a cube-result without a sum measure" + ) + class Describe_CatXMrMatrix(object): """Unit test suite for `cr.cube.matrix._CatXMrMatrix` object.""" @@ -1424,7 +1440,7 @@ def it_knows_its_zscores(self, request): "_array_type_std_res", return_value=np.array([[1, 2], [3, 4]]), ) - weighted_cube_counts = np.arange(24).reshape(3, 4, 2) + weighted_cube_counts = np.arange(24).reshape((3, 4, 2)) matrix = _CatXMrMatrix(None, weighted_cube_counts, None) zscores = matrix.zscores @@ -1451,7 +1467,7 @@ def it_knows_its_baseline_to_help(self, request): ) def it_knows_its_table_proportion_variances_to_help(self, request): - weighted_cube_counts = np.arange(12).reshape(2, 3, 2) + weighted_cube_counts = np.arange(12).reshape((2, 3, 2)) np.testing.assert_almost_equal( _CatXMrMatrix(None, weighted_cube_counts, None)._table_proportion_variances, np.array([[0.0, 0.0826446, 0.1155556], [0.244898, 0.231405, 0.2222222]]), @@ -1474,6 +1490,23 @@ def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): == "`.means` is undefined for a cube-result without a means measure" ) + def it_knows_its_sum(self): + sum = np.array([[[4, 6], [2, 5], [1, 4]], [[5, 3], [9, 2], [7, 1]]]) + np.testing.assert_equal( + _CatXMrMatrix(None, None, None, sum=sum).sum, + np.array([[4, 2, 1], [5, 9, 7]]), + ) + + def but_it_raises_value_error_when_the_cube_result_does_not_contain_sum(self): + matrix = _CatXMrMatrix(None, None, None, sum=None) + with pytest.raises(ValueError) as e: + matrix.sum + + assert ( + str(e.value) + == "`.sum` is undefined for a cube-result without a sum measure" + ) + class Describe_MrXCatMatrix(object): """Unit test suite for `cr.cube.matrix._MrXCatMatrix` object.""" @@ -1650,7 +1683,7 @@ def it_knows_its_zscores(self, request): _array_type_std_res_ = method_mock( request, _MrXCatMatrix, "_array_type_std_res", return_value=[[1, 2], [3, 4]] ) - weighted_counts = np.arange(24).reshape(3, 2, 4) + weighted_counts = np.arange(24).reshape((3, 2, 4)) matrix = _MrXCatMatrix(None, weighted_counts, None) zscores = matrix.zscores @@ -1681,14 +1714,14 @@ def it_knows_its_baseline_to_help(self, request): ) def it_knows_its_table_proportion_variances_to_help(self, request): - weighted_counts = np.arange(12).reshape(2, 2, 3) + weighted_counts = np.arange(12).reshape((2, 2, 3)) np.testing.assert_almost_equal( _MrXCatMatrix(None, weighted_counts, None)._table_proportion_variances, np.array([[0.0, 0.0622222, 0.1155556], [0.1038062, 0.118416, 0.1322568]]), ) def it_knows_its_means(self): - means = np.arange(24).reshape(3, 2, 4) + means = np.arange(24).reshape((3, 2, 4)) np.testing.assert_equal( _MrXCatMatrix(None, None, None, means=means).means, np.array([[0, 1, 2, 3], [8, 9, 10, 11], [16, 17, 18, 19]]), @@ -1704,6 +1737,23 @@ def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): == "`.means` is undefined for a cube-result without a means measure" ) + def it_knows_its_sum(self): + sum = np.arange(24).reshape((3, 2, 4)) + np.testing.assert_equal( + _MrXCatMatrix(None, None, None, sum=sum).sum, + np.array([[0, 1, 2, 3], [8, 9, 10, 11], [16, 17, 18, 19]]), + ) + + def but_it_raises_value_error_when_the_cube_result_does_not_contain_sum(self): + matrix = _MrXCatMatrix(None, None, None, sum=None) + with pytest.raises(ValueError) as e: + matrix.sum + + assert ( + str(e.value) + == "`.sum` is undefined for a cube-result without a sum measure" + ) + class Describe_MrXMrMatrix(object): """Unit test suite for `cr.cube.matrix._MrXMrMatrix` object.""" @@ -1940,7 +1990,7 @@ def it_knows_its_zscores(self, request): _array_type_std_res_ = method_mock( request, _MrXMrMatrix, "_array_type_std_res", return_value=[[1, 2], [3, 4]] ) - weighted_counts = np.arange(48).reshape(3, 2, 4, 2) + weighted_counts = np.arange(48).reshape((3, 2, 4, 2)) matrix = _MrXMrMatrix(None, weighted_counts, None) zscores = matrix.zscores @@ -1977,8 +2027,8 @@ def it_knows_its_baseline_to_help(self, request): np.array([[0.5, 0.5], [0.5, 0.5]]), ) - def it_knows_its_table_proportion_variances_to_help(self, request): - weighted_counts = np.arange(24).reshape(2, 2, 3, 2) + def it_knows_its_table_proportion_variances_to_help(self): + weighted_counts = np.arange(24).reshape((2, 2, 3, 2)) np.testing.assert_almost_equal( _MrXMrMatrix(None, weighted_counts, None)._table_proportion_variances, np.array([[0.0, 0.0826446, 0.1155556], [0.1560874, 0.16, 0.1630506]]), From 5382f4980d713c877485f60a3c8a2c55fe1e30bb Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Tue, 2 Mar 2021 15:58:29 +0100 Subject: [PATCH 05/10] test: mastrix assembler unit tests --- tests/unit/matrix/test_assembler.py | 31 +++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/tests/unit/matrix/test_assembler.py b/tests/unit/matrix/test_assembler.py index bf57ce32e..c9e5f0a43 100644 --- a/tests/unit/matrix/test_assembler.py +++ b/tests/unit/matrix/test_assembler.py @@ -18,7 +18,6 @@ from cr.cube.matrix.cubemeasure import ( BaseCubeResultMatrix, _CatXCatMatrix, - _CatXCatMeansMatrix, ) from cr.cube.matrix.measure import ( _BaseSecondOrderMeasure, @@ -258,9 +257,8 @@ def it_knows_the_means( NanSubtotals_, _assemble_matrix_, ): - cube_.has_means = True cube_result_matrix_ = instance_mock( - request, _CatXCatMeansMatrix, means=[[1, 2], [3, 4]] + request, _CatXCatMatrix, means=[[1, 2], [3, 4]] ) _cube_result_matrix_prop_.return_value = cube_result_matrix_ NanSubtotals_.blocks.return_value = [[[3], [2]], [[4], [1]]] @@ -273,11 +271,28 @@ def it_knows_the_means( _assemble_matrix_.assert_called_once_with(assembler, [[[3], [2]], [[4], [1]]]) assert means == [[1, 2, 3], [4, 5, 6]] - def but_it_raises_when_the_cube_result_does_not_contain_means_measure(self, cube_): - cube_.has_means = False - with pytest.raises(ValueError) as e: - Assembler(cube_, None, None).means - assert str(e.value) == "cube-result does not include a means cube-measure" + def it_knows_the_sum( + self, + request, + cube_, + _cube_result_matrix_prop_, + dimensions_, + NanSubtotals_, + _assemble_matrix_, + ): + cube_result_matrix_ = instance_mock( + request, _CatXCatMatrix, sum=[[1, 2], [3, 4]] + ) + _cube_result_matrix_prop_.return_value = cube_result_matrix_ + NanSubtotals_.blocks.return_value = [[[3], [2]], [[4], [1]]] + _assemble_matrix_.return_value = [[1, 2, 3], [4, 5, 6]] + assembler = Assembler(cube_, dimensions_, None) + + sum = assembler.sum + + NanSubtotals_.blocks.assert_called_once_with([[1, 2], [3, 4]], dimensions_) + _assemble_matrix_.assert_called_once_with(assembler, [[[3], [2]], [[4], [1]]]) + assert sum == [[1, 2, 3], [4, 5, 6]] def it_knows_the_pvalues(self, request): property_mock( From 1bbf15d5bef3564ce486d504282eeca2c057c938 Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Tue, 2 Mar 2021 16:21:44 +0100 Subject: [PATCH 06/10] tests: refactor unit test_cube --- tests/unit/test_cube.py | 132 ++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 59 deletions(-) diff --git a/tests/unit/test_cube.py b/tests/unit/test_cube.py index 739f84f4d..5d30a0d2f 100644 --- a/tests/unit/test_cube.py +++ b/tests/unit/test_cube.py @@ -5,7 +5,14 @@ import pytest import numpy as np -from cr.cube.cube import Cube, CubeSet, _BaseMeasure, _Measures, _MeanMeasure +from cr.cube.cube import ( + Cube, + CubeSet, + _BaseMeasure, + _Measures, + _MeanMeasure, + _SumMeasure, +) from cr.cube.cubepart import _Slice, _Strand, _Nub from cr.cube.dimension import AllDimensions, Dimension from cr.cube.enums import DIMENSION_TYPE as DT @@ -57,20 +64,6 @@ def it_knows_its_description(self, _cubes_prop_, cube_): assert description == "Are you male or female?" - @pytest.mark.parametrize( - ("first_cube_has_means", "expected_value"), ((True, True), (False, False)) - ) - def it_knows_whether_it_has_means( - self, first_cube_has_means, expected_value, _cubes_prop_, cube_ - ): - cube_.has_means = first_cube_has_means - _cubes_prop_.return_value = (cube_,) - cube_set = CubeSet(None, None, None, None) - - has_means = cube_set.has_means - - assert has_means == expected_value - @pytest.mark.parametrize( ("first_cube_has_w_counts", "expected_value"), ((True, True), (False, False)) ) @@ -158,11 +151,11 @@ def it_has_proper_population_fraction( np.testing.assert_almost_equal(cubeset_population_fraction, expected_value) def it_constructs_its_sequence_of_cube_objects_to_help( - self, request, Cube_, _is_numeric_mean_prop_ + self, request, Cube_, _is_numeric_measure_prop_ ): cubes_ = tuple(instance_mock(request, Cube) for _ in range(4)) Cube_.side_effect = iter(cubes_) - _is_numeric_mean_prop_.return_value = False + _is_numeric_measure_prop_.return_value = False cube_set = CubeSet( cube_responses=[{"cube": "resp-1"}, {"cube": "resp-2"}, {"cube": "resp-3"}], transforms=[{"xfrms": 1}, {"xfrms": 2}, {"xfrms": 3}], @@ -198,12 +191,12 @@ def it_constructs_its_sequence_of_cube_objects_to_help( assert cubes == cubes_[:3] def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload( - self, request, Cube_, cube_, _is_numeric_mean_prop_ + self, request, Cube_, cube_, _is_numeric_measure_prop_ ): cubes_ = tuple(instance_mock(request, Cube) for _ in range(4)) cube_.inflate.side_effect = iter(cubes_) Cube_.return_value = cube_ - _is_numeric_mean_prop_.return_value = True + _is_numeric_measure_prop_.return_value = True cube_set = CubeSet( cube_responses=[{"cube": "resp-1"}, {"cube": "resp-2"}, {"cube": "resp-3"}], transforms=[{"xfrms": 1}, {"xfrms": 2}, {"xfrms": 3}], @@ -243,7 +236,7 @@ def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload( ("is_multi_cube", "cube_0_ndim", "expected_value"), ((False, 1, False), (False, 0, False), (True, 1, False), (True, 0, True)), ) - def it_knows_whether_it_is_numeric_mean_to_help( + def it_knows_whether_it_is_numeric_measure_to_help( self, _is_multi_cube_prop_, is_multi_cube, @@ -257,14 +250,13 @@ def it_knows_whether_it_is_numeric_mean_to_help( Cube_.return_value = cube_ cube_set = CubeSet(({"cube": 0}, {"cube": 1}), None, None, None) - is_numeric_mean = cube_set._is_numeric_mean + is_numeric_mean = cube_set._is_numeric_measure assert Cube_.call_args_list == ([call({"cube": 0})] if is_multi_cube else []) assert is_numeric_mean == expected_value def it_knows_its_valid_counts_summary_to_help(self, _cubes_prop_, cube_): cube_.valid_counts_summary = np.array([1, 2, 3]) - cube_.has_means = True _cubes_prop_.return_value = (cube_,) cube_set = CubeSet(None, None, None, None) @@ -300,8 +292,8 @@ def _is_multi_cube_prop_(self, request): return property_mock(request, CubeSet, "_is_multi_cube") @pytest.fixture - def _is_numeric_mean_prop_(self, request): - return property_mock(request, CubeSet, "_is_numeric_mean") + def _is_numeric_measure_prop_(self, request): + return property_mock(request, CubeSet, "_is_numeric_measure") class DescribeCube(object): @@ -319,7 +311,18 @@ def it_provides_the_default_repr_when_enhanced_repr_fails( def it_can_inflate_itself(self, request): cube = Cube( - {"result": {"dimensions": [{"other": "dim"}]}}, + { + "result": { + "dimensions": [{"other": "dim"}], + "measures": { + "mean": { + "metadata": { + "references": {"alias": "mean", "name": "Mean"} + } + } + }, + } + }, cube_idx=1, transforms={"trans": "forms"}, population=1000, @@ -337,14 +340,21 @@ def it_can_inflate_itself(self, request): "result": { "dimensions": [ { - "references": {"alias": "mean", "name": "mean"}, + "references": {"alias": "mean", "name": "Mean"}, "type": { - "class": "categorical", "categories": [{"id": 1, "name": "Mean"}], + "class": "categorical", }, }, {"other": "dim"}, - ] + ], + "measures": { + "mean": { + "metadata": { + "references": {"alias": "mean", "name": "Mean"} + } + } + }, }, }, 1, @@ -455,7 +465,7 @@ def but_it_raises_on_other_cube_response_types( assert str(e.value) == expected_value @pytest.mark.parametrize( - "mean_subvariables, mean_references, expected_value", + "numeric_subvariables, num_measure_references, expected_value", ( ( ["001", "002"], @@ -486,22 +496,24 @@ def but_it_raises_on_other_cube_response_types( ) def it_knows_its_num_array_dimensions( self, - _mean_references_prop_, - _mean_subvariables_prop_, - mean_subvariables, - mean_references, + _numeric_references_prop_, + _numeric_subvariables_prop_, + numeric_subvariables, + num_measure_references, expected_value, ): - _mean_references_prop_.return_value = mean_references - _mean_subvariables_prop_.return_value = mean_subvariables + _numeric_references_prop_.return_value = num_measure_references + _numeric_subvariables_prop_.return_value = numeric_subvariables cube = Cube(None) _num_array_dimensions = cube._numeric_array_dimension assert _num_array_dimensions["type"]["elements"] == expected_value - def but_it_returns_None_when_mean_subvars_is_empty(self, _mean_subvariables_prop_): - _mean_subvariables_prop_.return_value = [] + def but_it_returns_None_when_numeric_subvars_is_empty( + self, _numeric_subvariables_prop_ + ): + _numeric_subvariables_prop_.return_value = [] cube = Cube(None) _num_array_dimensions = cube._numeric_array_dimension @@ -525,15 +537,15 @@ def but_it_returns_None_when_mean_subvars_is_empty(self, _mean_subvariables_prop ), ), ) - def it_knows_its_mean_subvariables( + def it_knows_its_numeric_subvariables( self, _cube_response_prop_, cube_response, expected_value ): _cube_response_prop_.return_value = cube_response cube = Cube(None) - mean_subvariables = cube._mean_subvariables + numeric_measure_subvariables = cube._numeric_measure_subvariables - assert mean_subvariables == expected_value + assert numeric_measure_subvariables == expected_value @pytest.mark.parametrize( "cube_response, expected_value", @@ -566,18 +578,18 @@ def it_knows_its_mean_subvariables( ), ), ) - def it_knows_its_mean_references( + def it_knows_its_numeric_references( self, _cube_response_prop_, cube_response, expected_value ): _cube_response_prop_.return_value = cube_response cube = Cube(None) - mean_subreferences = cube._mean_references + numeric_references = cube._numeric_measure_references - assert mean_subreferences == expected_value + assert numeric_references == expected_value @pytest.mark.parametrize( - "cube_response, cube_idx_arg, mean_subvars, num_array_dim, expected_value", + "cube_response, cube_idx_arg, numeric_subvars, num_array_dim, expected_value", ( ({}, None, [], {}, {}), ({"result": {"foo": "bar"}}, None, [], {}, {"result": {"foo": "bar"}}), @@ -608,15 +620,15 @@ def it_knows_its_cube_dict( self, cube_response, cube_idx_arg, - mean_subvars, + numeric_subvars, num_array_dim, expected_value, _cube_response_prop_, - _mean_subvariables_prop_, + _numeric_subvariables_prop_, _numeric_array_dimension_prop_, ): _cube_response_prop_.return_value = cube_response - _mean_subvariables_prop_.return_value = mean_subvars + _numeric_subvariables_prop_.return_value = numeric_subvars _numeric_array_dimension_prop_.return_value = num_array_dim cube = Cube(None, cube_idx=cube_idx_arg) @@ -633,12 +645,12 @@ def _cube_response_prop_(self, request): return property_mock(request, Cube, "_cube_response") @pytest.fixture - def _mean_references_prop_(self, request): - return property_mock(request, Cube, "_mean_references") + def _numeric_references_prop_(self, request): + return property_mock(request, Cube, "_numeric_measure_references") @pytest.fixture - def _mean_subvariables_prop_(self, request): - return property_mock(request, Cube, "_mean_subvariables") + def _numeric_subvariables_prop_(self, request): + return property_mock(request, Cube, "_numeric_measure_subvariables") @pytest.fixture def _numeric_array_dimension_prop_(self, request): @@ -705,26 +717,28 @@ def it_knows_if_require_array_transposition( is expected_value ) - -class Describe_MeanMeasure(object): @pytest.mark.parametrize( - "valid_counts_u, cube_idx_arg, expected_value", + "NumericMeasureCls, valid_counts_u, cube_idx_arg, expected_value", ( - ([], None, []), - ([[3, 2, 1], [2, 2, 0]], None, [[3, 2, 1], [2, 2, 0]]), - ([[3, 2, 1], [2, 2, 0]], 1, [[3, 2, 1], [2, 2, 0]]), + (_MeanMeasure, [], None, []), + (_MeanMeasure, [[3, 2, 1], [2, 2, 0]], None, [[3, 2, 1], [2, 2, 0]]), + (_MeanMeasure, [[3, 2, 1], [2, 2, 0]], 1, [[3, 2, 1], [2, 2, 0]]), + (_SumMeasure, [], None, []), + (_SumMeasure, [[3, 2, 1], [2, 2, 0]], None, [[3, 2, 1], [2, 2, 0]]), + (_SumMeasure, [[3, 2, 1], [2, 2, 0]], 1, [[3, 2, 1], [2, 2, 0]]), ), ) def it_knows_its_valid_counts_to_help( self, request, + NumericMeasureCls, valid_counts_u, cube_idx_arg, expected_value, ): _all_dimensions_ = instance_mock(request, AllDimensions) _all_dimensions_.shape = (2, 3) - _mean_measure = _MeanMeasure( + _numeric_measure = NumericMeasureCls( { "result": { "measures": {"valid_count_unweighted": {"data": valid_counts_u}} @@ -734,4 +748,4 @@ def it_knows_its_valid_counts_to_help( cube_idx_arg, ) - np.testing.assert_array_equal(_mean_measure.valid_counts, expected_value) + np.testing.assert_array_equal(_numeric_measure.valid_counts, expected_value) From 9e61f74efbf6e199f8b8f8fd5e4f9755b8288c88 Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Tue, 2 Mar 2021 17:10:37 +0100 Subject: [PATCH 07/10] refactoring cubemeasure and uni tests --- src/cr/cube/cube.py | 1 - src/cr/cube/stripe/cubemeasure.py | 36 ++++----- tests/unit/stripe/test_cubemeasure.py | 107 +++++++++++++++++++++++++- 3 files changed, 122 insertions(+), 22 deletions(-) diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index fc10711c0..5eef7998b 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -787,7 +787,6 @@ def _shape(self): """All dimensions shape (row, col)""" # NOTE: Inverting the shape cannot be enough in future when we'll have more than # 2 dimensions in the new dim_order option. - # original_shape = self._all_dimensions.shape shape = self._all_dimensions.shape if self.requires_array_transposition: return ( diff --git a/src/cr/cube/stripe/cubemeasure.py b/src/cr/cube/stripe/cubemeasure.py index 9f88afecd..d92aa7810 100644 --- a/src/cr/cube/stripe/cubemeasure.py +++ b/src/cr/cube/stripe/cubemeasure.py @@ -26,19 +26,11 @@ def __init__(self, cube, rows_dimension, ca_as_0th, slice_idx): @lazyproperty def cube_means(self): """_BaseCubeMeans subclass object for this stripe.""" - if self._cube.means is None: - raise ValueError( - "`.means` is undefined for a cube-result without a means measure" - ) return _BaseCubeMeans.factory(self._cube, self._rows_dimension) @lazyproperty def cube_sum(self): """_BaseCubeMeans subclass object for this stripe.""" - if self._cube.sum is None: - raise ValueError( - "`.sum` is undefined for a cube-result without a sum measure" - ) return _BaseCubeSum.factory(self._cube, self._rows_dimension) @lazyproperty @@ -96,6 +88,10 @@ class _CatCubeMeans(_BaseCubeMeans): @lazyproperty def means(self): """1D np.float64 ndarray of mean for each stripe row.""" + if self._means is None: + raise ValueError( + "`.means` is undefined for a cube-result without a means measure" + ) return self._means @@ -108,21 +104,13 @@ class _MrCubeMeans(_BaseCubeMeans): @lazyproperty def means(self): """1D np.float64 ndarray of mean for each stripe row.""" + if self._means is None: + raise ValueError( + "`.means` is undefined for a cube-result without a means measure" + ) return self._means[:, 0] -class _NumArrCubeMeans(_BaseCubeMeans): - """Means cube-measure for an MR stripe. - - Its `.means` is a 2D ndarray with axes (rows, sel/not). - """ - - @lazyproperty - def means(self): - """1D np.float64 ndarray of mean for each stripe row.""" - return self._means - - # === SUM === @@ -147,6 +135,10 @@ class _CatCubeSum(_BaseCubeSum): @lazyproperty def sum(self): """1D np.float64 ndarray of mean for each stripe row.""" + if self._sum is None: + raise ValueError( + "`.sum` is undefined for a cube-result without a sum measure" + ) return self._sum @@ -158,6 +150,10 @@ class _MrCubeSum(_BaseCubeSum): @lazyproperty def sum(self): """1D np.float64 ndarray of mean for each stripe row.""" + if self._sum is None: + raise ValueError( + "`.sum` is undefined for a cube-result without a sum measure" + ) return self._sum[:, 0] diff --git a/tests/unit/stripe/test_cubemeasure.py b/tests/unit/stripe/test_cubemeasure.py index f0ccc93ea..092f3e84c 100644 --- a/tests/unit/stripe/test_cubemeasure.py +++ b/tests/unit/stripe/test_cubemeasure.py @@ -10,13 +10,16 @@ from cr.cube.enums import DIMENSION_TYPE as DT from cr.cube.stripe.cubemeasure import ( _BaseCubeMeans, + _BaseCubeSum, _BaseUnweightedCubeCounts, _BaseWeightedCubeCounts, _CatCubeMeans, + _CatCubeSum, _CatUnweightedCubeCounts, _CatWeightedCubeCounts, CubeMeasures, _MrCubeMeans, + _MrCubeSum, _MrUnweightedCubeCounts, _MrWeightedCubeCounts, ) @@ -42,6 +45,19 @@ def it_provides_access_to_the_cube_means_object( _BaseCubeMeans_.factory.assert_called_once_with(cube_, rows_dimension_) assert cube_means is cube_means_ + def it_provides_access_to_the_cube_sum_object( + self, request, cube_, rows_dimension_ + ): + cube_sum_ = instance_mock(request, _BaseCubeSum) + _BaseCubeSum_ = class_mock(request, "cr.cube.stripe.cubemeasure._BaseCubeSum") + _BaseCubeSum_.factory.return_value = cube_sum_ + cube_measures = CubeMeasures(cube_, rows_dimension_, None, None) + + cube_sum = cube_measures.cube_sum + + _BaseCubeSum_.factory.assert_called_once_with(cube_, rows_dimension_) + assert cube_sum is cube_sum_ + def it_provides_access_to_the_unweighted_cube_counts_object( self, request, cube_, rows_dimension_ ): @@ -103,7 +119,7 @@ class Describe_BaseCubeMeans(object): def it_provides_a_factory_for_constructing_unweighted_cube_count_objects( self, request, rows_dimension_type, CubeMeansCls, means ): - cube_ = instance_mock(request, Cube, counts=means) + cube_ = instance_mock(request, Cube, means=means) rows_dimension_ = instance_mock( request, Dimension, dimension_type=rows_dimension_type ) @@ -127,6 +143,16 @@ def it_knows_its_means(self): cube_means = _CatCubeMeans(None, np.array([1.1, 2.2, 3.3])) assert cube_means.means == pytest.approx([1.1, 2.2, 3.3]) + def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): + cube_means = _CatCubeMeans(None, means=None) + with pytest.raises(ValueError) as e: + cube_means.means + + assert ( + str(e.value) + == "`.means` is undefined for a cube-result without a means measure" + ) + class Describe_MrCubeMeans(object): """Unit-test suite for `cr.cube.stripe.cubemeasure._MrCubeMeans`.""" @@ -135,6 +161,85 @@ def it_knows_its_means(self): cube_means = _MrCubeMeans(None, np.array([[1.1, 2.2], [3.3, 4.4], [5.5, 6.6]])) assert cube_means.means == pytest.approx([1.1, 3.3, 5.5]) + def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): + cube_means = _MrCubeMeans(None, means=None) + with pytest.raises(ValueError) as e: + cube_means.means + + assert ( + str(e.value) + == "`.means` is undefined for a cube-result without a means measure" + ) + + +# === SUM === + + +class Describe_BaseCubeSum(object): + """Unit test suite for `cr.cube.matrix.cubemeasure._BaseCubeSum`.""" + + @pytest.mark.parametrize( + "rows_dimension_type, CubeSumCls, sum", + ( + (DT.CAT, _CatCubeSum, [1, 2, 3]), + (DT.MR, _MrCubeSum, [[1, 6], [2, 5], [3, 4]]), + ), + ) + def it_provides_a_factory_for_constructing_unweighted_cube_count_objects( + self, request, rows_dimension_type, CubeSumCls, sum + ): + cube_ = instance_mock(request, Cube, sum=sum) + rows_dimension_ = instance_mock( + request, Dimension, dimension_type=rows_dimension_type + ) + cube_sum_ = instance_mock(request, CubeSumCls) + CubeSumCls_ = class_mock( + request, + "cr.cube.stripe.cubemeasure.%s" % CubeSumCls.__name__, + return_value=cube_sum_, + ) + + cube_sum = _BaseCubeSum.factory(cube_, rows_dimension_) + + CubeSumCls_.assert_called_once_with(rows_dimension_, sum) + assert cube_sum is cube_sum_ + + +class Describe_CatCubeSum(object): + """Unit-test suite for `cr.cube.stripe.cubemeasure._CatCubeSum`.""" + + def it_knows_its_sum(self): + cube_sum = _CatCubeSum(None, np.array([1, 2, 3])) + assert cube_sum.sum == pytest.approx([1, 2, 3]) + + def but_it_raises_value_error_when_the_cube_result_does_not_contain_sum(self): + cube_sum = _CatCubeSum(None, sum=None) + with pytest.raises(ValueError) as e: + cube_sum.sum + + assert ( + str(e.value) + == "`.sum` is undefined for a cube-result without a sum measure" + ) + + +class Describe_MrCubeSum(object): + """Unit-test suite for `cr.cube.stripe.cubemeasure._MrCubeSum`.""" + + def it_knows_its_sum(self): + cube_sum = _MrCubeSum(None, np.array([[1, 2], [3, 4], [5, 6]])) + assert cube_sum.sum == pytest.approx([1, 3, 5]) + + def but_it_raises_value_error_when_the_cube_result_does_not_contain_sum(self): + cube_sum = _MrCubeSum(None, sum=None) + with pytest.raises(ValueError) as e: + cube_sum.sum + + assert ( + str(e.value) + == "`.sum` is undefined for a cube-result without a sum measure" + ) + # === UNWEIGHTED COUNTS === From 678ee10478deabcd97bb8d452ac2e7e168ca7329 Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Tue, 2 Mar 2021 17:45:48 +0100 Subject: [PATCH 08/10] add tests for missing coverage --- src/cr/cube/cube.py | 2 ++ src/cr/cube/stripe/cubemeasure.py | 7 +++++++ tests/integration/test_cube.py | 5 +++++ tests/unit/test_cube.py | 9 +++++++++ 4 files changed, 23 insertions(+) diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index 5eef7998b..30414d40d 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -643,6 +643,8 @@ def missing_count(self): """numeric representing count of missing rows in cube response.""" if self.means: return self.means.missing_count + if self.sum: + return self.sum.missing_count return self._cube_dict["result"].get("missing", 0) @lazyproperty diff --git a/src/cr/cube/stripe/cubemeasure.py b/src/cr/cube/stripe/cubemeasure.py index d92aa7810..f715b7b7f 100644 --- a/src/cr/cube/stripe/cubemeasure.py +++ b/src/cr/cube/stripe/cubemeasure.py @@ -128,6 +128,13 @@ def factory(cls, cube, rows_dimension): SumCls = _MrCubeSum if rows_dimension.dimension_type == DT.MR else _CatCubeSum return SumCls(rows_dimension, sum) + @lazyproperty + def sum(self): + """1D np.float64 ndarray of sum for each stripe row.""" + raise NotImplementedError( + "`%s` must implement `.sum`" % type(self).__name__ + ) # pragma: no cover + class _CatCubeSum(_BaseCubeSum): """Means cube-measure for a non-MR stripe.""" diff --git a/tests/integration/test_cube.py b/tests/integration/test_cube.py index 3fce0fa8b..fb075487f 100644 --- a/tests/integration/test_cube.py +++ b/tests/integration/test_cube.py @@ -174,6 +174,11 @@ def it_provides_the_means_missing_count_when_means_are_available(self): missing_count = measures.missing_count assert missing_count == 3 + def it_provides_the_means_missing_count_when_sum_are_available(self): + measures = _Measures(CR.SUM_CAT_X_MR, None) + missing_count = measures.missing_count + assert missing_count == 1 + def but_provides_the_general_missing_count_otherwise(self): measures = _Measures(CR.CAT_X_CAT, None) missing_count = measures.missing_count diff --git a/tests/unit/test_cube.py b/tests/unit/test_cube.py index 5d30a0d2f..b6f464201 100644 --- a/tests/unit/test_cube.py +++ b/tests/unit/test_cube.py @@ -273,6 +273,15 @@ def it_knows_its_n_reposnes_to_help(self, _cubes_prop_, cube_): assert n_responses == 6 + def it_is_subscriptable(self, _cubes_prop_, cube_): + cube_.description = "Hello" + _cubes_prop_.return_value = (cube_,) + cube_set = CubeSet(None, None, None, None) + + first_cube = cube_set[0] + + assert first_cube.description == "Hello" + # fixture components --------------------------------------------- @pytest.fixture From c39328bde31a1082ff6343a3a1620ae1f281f4a9 Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Wed, 3 Mar 2021 10:26:58 +0100 Subject: [PATCH 09/10] address steve's comments --- src/cr/cube/cube.py | 153 ++++++++++++------------ src/cr/cube/enums.py | 2 +- tests/integration/test_cube.py | 32 +---- tests/integration/test_cubepart.py | 84 +++++++------ tests/integration/test_matrix.py | 25 ++-- tests/integration/test_numeric_array.py | 16 +-- tests/unit/test_cube.py | 59 ++++----- 7 files changed, 163 insertions(+), 208 deletions(-) diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index 30414d40d..08ced8787 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -14,10 +14,7 @@ from cr.cube.cubepart import CubePartition from cr.cube.dimension import AllDimensions -from cr.cube.enums import ( - DIMENSION_TYPE as DT, - QUANTITY_OF_INTEREST_MEASURES as QOI_MEASURES, -) +from cr.cube.enums import DIMENSION_TYPE as DT, NUMERIC_MEASURES from cr.cube.util import lazyproperty np.seterr(divide="ignore", invalid="ignore") @@ -47,30 +44,51 @@ def __init__(self, cube_responses, transforms, population, min_base): self._population = population self._min_base = min_base - def __getitem__(self, index): - return self._cubes[index] - @lazyproperty def can_show_pairwise(self): """True if all 2D cubes in a multi-cube set can provide pairwise comparison.""" - if len(self._cubes) < 2: + if len(self.cubes) < 2: return False return all( all(dt in DT.ALLOWED_PAIRWISE_TYPES for dt in cube.dimension_types[-2:]) and cube.ndim >= 2 - for cube in self._cubes[1:] + for cube in self.cubes[1:] ) + @lazyproperty + def cubes(self): + """Sequence of Cube objects containing data for this analysis.""" + + def iter_cubes(): + """Generate a Cube object for each of cube_responses. + + 0D cube-responses and 1D second-and-later cubes are "inflated" to add their + missing row dimension. + """ + for idx, cube_response in enumerate(self._cube_responses): + cube = Cube( + cube_response, + cube_idx=idx if self._is_multi_cube else None, + transforms=self._transforms_dicts[idx], + population=self._population, + mask_size=self._min_base, + ) + # --- numeric-measures cubes require inflation to restore their + # --- rows-dimension, others don't + yield cube.inflate() if self._is_numeric_measure else cube + + return tuple(iter_cubes()) + @lazyproperty def description(self): """str description of first cube in this set.""" - return self._cubes[0].description + return self.cubes[0].description @lazyproperty def has_weighted_counts(self): """True if cube-responses include a weighted-count measure.""" - return self._cubes[0].is_weighted + return self.cubes[0].is_weighted @lazyproperty def is_ca_as_0th(self): @@ -83,19 +101,19 @@ def is_ca_as_0th(self): if not self._is_multi_cube: return False # ---the rest depends on the row-var cube--- - cube = self._cubes[0] + cube = self.cubes[0] # ---True if row-var cube is CA--- return cube.dimension_types[0] == DT.CA_SUBVAR @lazyproperty def missing_count(self): """The number of missing values from first cube in this set.""" - return self._cubes[0].missing + return self.cubes[0].missing @lazyproperty def name(self): """str name of first cube in this set.""" - return self._cubes[0].name + return self.cubes[0].name @lazyproperty def partition_sets(self): @@ -120,7 +138,7 @@ def partition_sets(self): a _Strand and the rest being _Slice objects. Multiple partition sets only arise for a tabbook in the CA-as-0th case. """ - return tuple(zip(*(cube.partitions for cube in self._cubes))) + return tuple(zip(*(cube.partitions for cube in self.cubes))) @lazyproperty def population_fraction(self): @@ -131,41 +149,17 @@ def population_fraction(self): if the unfiltered count is zero, which would otherwise result in a divide-by-zero error. """ - return self._cubes[0].population_fraction + return self.cubes[0].population_fraction @lazyproperty def n_responses(self): """Total number of responses considered from first cube in this set.""" - return self._cubes[0].n_responses + return self.cubes[0].n_responses @lazyproperty def valid_counts_summary(self): """The valid count summary values from first cube in this set.""" - return self._cubes[0].valid_counts_summary - - @lazyproperty - def _cubes(self): - """Sequence of Cube objects containing data for this analysis.""" - - def iter_cubes(): - """Generate a Cube object for each of cube_responses. - - 0D cube-responses and 1D second-and-later cubes are "inflated" to add their - missing row dimension. - """ - for idx, cube_response in enumerate(self._cube_responses): - cube = Cube( - cube_response, - cube_idx=idx if self._is_multi_cube else None, - transforms=self._transforms_dicts[idx], - population=self._population, - mask_size=self._min_base, - ) - # --- numeric-measures cubes require inflation to restore their - # --- rows-dimension, others don't - yield cube.inflate() if self._is_numeric_measure else cube - - return tuple(iter_cubes()) + return self.cubes[0].valid_counts_summary @lazyproperty def _is_multi_cube(self): @@ -235,8 +229,10 @@ def __repr__(self): @lazyproperty def available_measures(self): - """Tuple of available measures in the cube response.""" - return tuple(self._cube_response.get("result", {}).get("measures", {}).keys()) + """frozenset of available measures in the cube response.""" + return frozenset( + self._cube_response.get("result", {}).get("measures", {}).keys() + ) @lazyproperty def counts(self): @@ -244,6 +240,7 @@ def counts(self): @lazyproperty def counts_with_missings(self): + """ndarray of weighted or unweighted cube counts.""" return ( self._measures.weighted_counts.raw_cube_array if self.is_weighted @@ -288,11 +285,11 @@ def inflate(self): """ cube_dict = self._cube_dict dimensions = cube_dict["result"]["dimensions"] - default = "-".join(self._numeric_measures) + default_name = "-".join(self._numeric_measures) # --- The default value in case of numeric variable is the combination of all # --- the measures expressed in the cube response. - alias = self._numeric_measure_references.get("alias", default) - name = self._numeric_measure_references.get("name", default) + alias = self._numeric_measure_references.get("alias", default_name) + name = self._numeric_measure_references.get("name", default_name) rows_dimension = { "references": {"alias": alias, "name": name}, "type": { @@ -316,12 +313,10 @@ def is_weighted(self): @lazyproperty def means(self): - """float64 ndarray of the cube_means if the measure exists.""" - if self._measures.means: - return self._measures.means.raw_cube_array[self._valid_idxs].astype( - np.float64 - ) - return None + """Optional float64 ndarray of the cube_means if the measure exists.""" + if self._measures.means is None: + return None + return self._measures.means.raw_cube_array[self._valid_idxs].astype(np.float64) @lazyproperty def missing(self): @@ -378,12 +373,10 @@ def population_fraction(self): @lazyproperty def sum(self): - """float64 ndarray of the cube_means if the measure exists.""" - if self._measures.sum: - return self._measures.sum.raw_cube_array[self._valid_idxs].astype( - np.float64 - ) - return None + """Optional float64 ndarray of the cube_sum if the measure exists.""" + if self._measures.sum is None: + return None + return self._measures.sum.raw_cube_array[self._valid_idxs].astype(np.float64) @lazyproperty def title(self): @@ -517,7 +510,7 @@ def _numeric_measures(self): Basically the numeric measures are the intersection between all the measures within the cube response and the defined QUANTITY_OF_INTEREST_MEASURES. """ - return list(set(self.available_measures).intersection(QOI_MEASURES)) + return list(self.available_measures.intersection(NUMERIC_MEASURES)) @lazyproperty def _measures(self): @@ -747,7 +740,7 @@ def raw_cube_array(self): response. Specifically, it includes values for missing elements, any MR_CAT dimensions, and any prunable rows and columns. """ - raw_cube_array = np.array(self._flat_values).flatten().reshape(self._shape) + raw_cube_array = self._flat_values.reshape(self._shape) # ---must be read-only to avoid hard-to-find bugs--- raw_cube_array.flags.writeable = False return raw_cube_array @@ -778,7 +771,7 @@ def valid_counts(self): @lazyproperty def _flat_values(self): # pragma: no cover - """Return tuple of mean values as found in cube response. + """Return 1D ndarray of values as found in cube response. This property must be implemented by each subclass. """ @@ -809,15 +802,17 @@ def missing_count(self): @lazyproperty def _flat_values(self): - """Return tuple of mean values as found in cube response. + """1D float64 ndarray of mean values as found in cube response. Mean data may include missing items represented by a dict like {'?': -1} in the cube response. These are replaced by np.nan in the returned value. """ - return tuple( - np.nan if type(x) is dict else x - for x in self._cube_dict["result"]["measures"]["mean"]["data"] + return np.array( + [ + np.nan if type(x) is dict else x + for x in self._cube_dict["result"]["measures"]["mean"]["data"] + ] ) @@ -831,14 +826,16 @@ def missing_count(self): @lazyproperty def _flat_values(self): - """Return tuple of mean values as found in cube response. - Mean data may include missing items represented by a dict like + """1D float64 ndarray of sum values as found in cube response. + Sum data may include missing items represented by a dict like {'?': -1} in the cube response. These are replaced by np.nan in the returned value. """ - return tuple( - np.nan if type(x) is dict else x - for x in self._cube_dict["result"]["measures"]["sum"]["data"] + return np.array( + [ + np.nan if type(x) is dict else x + for x in self._cube_dict["result"]["measures"]["sum"]["data"] + ] ) @@ -847,16 +844,16 @@ class _UnweightedCountMeasure(_BaseMeasure): @lazyproperty def _flat_values(self): - """tuple of int counts before weighting.""" + """1D int64 ndarray of counts before weighting.""" if self.valid_counts.size > 0: # ---If valid_count are expressed in the cube dict, returns its data. # ---This condition can happen in case of numeric array cube response. # ---Under this circumstances the numeric array measures will contain the # ---mean measure and a valid count measure for the unweighted counts. - return tuple( + return np.array( self._cube_dict["result"]["measures"]["valid_count_unweighted"]["data"] ) - return tuple(self._cube_dict["result"]["counts"]) + return np.array(self._cube_dict["result"]["counts"]) class _WeightedCountMeasure(_BaseMeasure): @@ -864,15 +861,15 @@ class _WeightedCountMeasure(_BaseMeasure): @lazyproperty def _flat_values(self): - """tuple of numeric counts after weighting.""" + """1D float64 ndarray of numeric counts after weighting.""" if self.valid_counts.size > 0: # ---If valid_count are expressed in the cube dict, returns its data. # ---This condition can happen in case of numeric array cube response. # ---Under this circumstances the numeric array measures will contain the # ---mean measure and a valid count measure for the unweighted counts. - return tuple( + return np.array( self._cube_dict["result"]["measures"]["valid_count_unweighted"]["data"] ) if self._cube_dict["result"]["measures"].get("count", {}).get("data", []): - return tuple(self._cube_dict["result"]["measures"]["count"]["data"]) - return tuple(self._cube_dict["result"]["counts"]) + return np.array(self._cube_dict["result"]["measures"]["count"]["data"]) + return np.array(self._cube_dict["result"]["counts"]) diff --git a/src/cr/cube/enums.py b/src/cr/cube/enums.py index 175db2c01..d5e0791df 100644 --- a/src/cr/cube/enums.py +++ b/src/cr/cube/enums.py @@ -82,4 +82,4 @@ class MEASURE(enum.Enum): Z_SCORE = "z_score" -QUANTITY_OF_INTEREST_MEASURES = {MEASURE.SUM.value, MEASURE.MEAN.value} +NUMERIC_MEASURES = {MEASURE.SUM.value, MEASURE.MEAN.value} diff --git a/tests/integration/test_cube.py b/tests/integration/test_cube.py index fb075487f..48d9c715c 100644 --- a/tests/integration/test_cube.py +++ b/tests/integration/test_cube.py @@ -175,9 +175,7 @@ def it_provides_the_means_missing_count_when_means_are_available(self): assert missing_count == 3 def it_provides_the_means_missing_count_when_sum_are_available(self): - measures = _Measures(CR.SUM_CAT_X_MR, None) - missing_count = measures.missing_count - assert missing_count == 1 + assert _Measures(CR.SUM_CAT_X_MR, None).missing_count == 1 def but_provides_the_general_missing_count_otherwise(self): measures = _Measures(CR.CAT_X_CAT, None) @@ -276,34 +274,6 @@ def it_handles_means_cat_hs_x_cat_hs(self): ], ) - def it_knows_if_it_has_means(self): - slice_ = Cube(CR.MEANS_CAT_HS_X_CAT_HS).partitions[0] - assert slice_.means.any() - - -class DescribeIntegrated_SumMeasure(object): - def it_provides_sum_measure_for_CAT(self): - cube = Cube(CR.CAT_SUM) - partition = cube.partitions[0] - - np.testing.assert_array_equal(partition.sum, [88.0, 77.0]) - np.testing.assert_array_equal(partition.table_base_range, [5, 5]) - - def it_provides_sum_measure_for_MR(self): - cube = Cube(CR.MR_SUM) - partition = cube.partitions[0] - - np.testing.assert_array_almost_equal(partition.sum, [3.0, 2.0, 2.0]) - np.testing.assert_array_almost_equal(partition.table_base_range, [3, 3]) - - def it_provides_sum_and_mean_measure_for_CAT(self): - cube = Cube(CR.NUMERIC_MEASURES_X_CAT) - partition = cube.partitions[0] - - np.testing.assert_array_almost_equal(partition.means, [2.66666667, 3.5]) - np.testing.assert_array_almost_equal(partition.sum, [8, 7]) - np.testing.assert_array_almost_equal(partition.counts, [3, 2]) - class DescribeIntegrated_UnweightedCountMeasure(object): def it_provides_access_to_its_raw_cube_array(self): diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index c52eb5583..69055360e 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -39,6 +39,12 @@ def it_provides_values_for_cat_x_cat(self): assert slice_.inserted_row_idxs == () assert slice_.is_empty is False assert slice_.name == "v4" + with pytest.raises(ValueError) as e: + slice_.means + assert ( + str(e.value) + == "`.means` is undefined for a cube-result without a means measure" + ) assert pytest.approx(slice_.population_counts) == [ [3000.333, 1200.133], [3000.333, 1800.200], @@ -62,6 +68,12 @@ def it_provides_values_for_cat_x_cat(self): assert slice_.rows_dimension_type == DT.CAT assert slice_.rows_margin.tolist() == [7, 8] assert slice_.shape == (2, 2) + with pytest.raises(ValueError) as e: + slice_.sum + assert ( + str(e.value) + == "`.sum` is undefined for a cube-result without a sum measure" + ) assert slice_.table_margin == 15 assert slice_.table_name is None assert pytest.approx(slice_.table_percentages) == [ @@ -74,19 +86,6 @@ def it_provides_values_for_cat_x_cat(self): ] assert slice_.unweighted_counts.tolist() == [[5, 2], [5, 3]] assert slice_.variable_name == "v7" - # A cube without means or sum available in the response throws an exception. - with pytest.raises(ValueError) as e: - slice_.means - assert ( - str(e.value) - == "`.means` is undefined for a cube-result without a means measure" - ) - with pytest.raises(ValueError) as e: - slice_.sum - assert ( - str(e.value) - == "`.sum` is undefined for a cube-result without a sum measure" - ) def it_provides_values_for_cat_hs_mt_x_cat_hs_mt(self): slice_ = Cube(CR.CAT_HS_MT_X_CAT_HS_MT, population=1000).partitions[0] @@ -331,25 +330,15 @@ def it_provides_values_for_cat_hs_x_mr(self): def it_provides_values_for_mean_cat_x_cat_hs(self): slice_ = Cube(CR.MEANS_CAT_X_CAT_HS).partitions[0] - np.testing.assert_array_almost_equal( - slice_.means, - np.array([[24.43935757, 37.32122746, np.nan, 55.48571956, 73.02427659]]), - ) - np.testing.assert_array_almost_equal(slice_.rows_margin, np.array([1500.0])) - np.testing.assert_array_almost_equal( - slice_.columns_margin, np.array([189, 395, 584, 606, 310]) - ) - - def but_it_has_no_counts_because_there_is_no_cube_count_measure(self): - slice_ = Cube(CR.MEANS_CAT_X_CAT_HS).partitions[0] - # This fixture has both cube_counts and cube_means measure, for this reason # both measures are available at cubepart level. - assert slice_.counts == pytest.approx(np.array([[189, 395, 584, 606, 310]])) assert slice_.means == pytest.approx( - np.array([[24.43935757, 37.32122746, np.nan, 55.48571956, 73.02427659]]), + np.array([[24.4393575, 37.3212274, np.nan, 55.4857195, 73.0242765]]), nan_ok=True, ) + assert slice_.counts == pytest.approx(np.array([[189, 395, 584, 606, 310]])) + assert slice_.rows_margin.tolist() == [1500.0] + assert slice_.columns_margin.tolist() == [189, 395, 584, 606, 310] def it_provides_values_for_mr_x_cat_hs(self): slice_ = Cube(CR.MR_X_CAT_HS_MT).partitions[0] @@ -850,11 +839,6 @@ def it_provides_values_for_univariate_cat(self): assert str(e.value) == ( "`.means` is undefined for a cube-result without a means measure" ) - with pytest.raises(ValueError) as e: - strand.sum - assert str(e.value) == ( - "`.sum` is undefined for a cube-result without a sum measure" - ) assert strand.min_base_size_mask.tolist() == [False, False] assert strand.name == "v7" assert strand.ndim == 1 @@ -872,6 +856,11 @@ def it_provides_values_for_univariate_cat(self): assert strand.scale_std_dev == pytest.approx(0.9428090) assert strand.scale_std_err == pytest.approx(0.2434322) assert strand.shape == (2,) + with pytest.raises(ValueError) as e: + strand.sum + assert str(e.value) == ( + "`.sum` is undefined for a cube-result without a sum measure" + ) assert strand.table_base_range.tolist() == [15, 15] assert strand.table_margin_range.tolist() == [15, 15] assert strand.table_name == "v7: C" @@ -908,17 +897,7 @@ def it_provides_values_for_univariate_cat_means_and_counts(self): assert strand.shape == (4,) assert strand.table_base_range.tolist() == [1628, 1628] # --- means cube that also has counts has a table-margin --- - assert strand.table_margin_range == pytest.approx( - [16029.22309748, 16029.22309748] - ) - - def it_provides_table_margin_range_for_univariate_cat_means_and_counts(self): - """The cube_mean and cube_count measures can appear together.""" - strand = Cube(CR.CAT_MEANS_AND_COUNTS).partitions[0] - - # for a cube with numeric measure like mean, table margin are calculated on the - # counts and not on the means. - assert strand.table_margin_range == pytest.approx([16029.223097, 16029.223097]) + assert strand.table_margin_range == pytest.approx([1500.961, 1500.961]) def it_provides_values_for_univariate_datetime(self): strand = Cube(CR.DATE, population=9001).partitions[0] @@ -1097,6 +1076,25 @@ def it_knows_when_it_is_empty(self): strand = Cube(CR.OM_SGP8334215_VN_2019_SEP_19_STRAND).partitions[0] assert strand.is_empty is True + def it_provides_sum_measure_for_CAT(self): + strand = Cube(CR.CAT_SUM).partitions[0] + + assert strand.sum == pytest.approx([88.0, 77.0]) + assert strand.table_base_range.tolist() == [5, 5] + + def it_provides_sum_measure_for_MR(self): + strand = Cube(CR.MR_SUM).partitions[0] + + assert strand.sum == pytest.approx([3.0, 2.0, 2.0]) + assert strand.table_base_range.tolist() == [3, 3] + + def it_provides_sum_and_mean_measure_for_CAT(self): + strand = Cube(CR.NUMERIC_MEASURES_X_CAT).partitions[0] + + assert strand.counts == pytest.approx([3, 2]) + assert strand.means == pytest.approx([2.66666667, 3.5]) + assert strand.sum == pytest.approx([8, 7]) + class Describe_Nub(object): """Integration-test suite for `cr.cube.cubepart._Nub` object.""" diff --git a/tests/integration/test_matrix.py b/tests/integration/test_matrix.py index b7775acd4..e83711ae1 100644 --- a/tests/integration/test_matrix.py +++ b/tests/integration/test_matrix.py @@ -984,15 +984,14 @@ def it_computes_columns_margin_for_cat_hs_x_cat_hs_hiddens_explicit_order(self): def it_computes_means_cat_x_cat_columns_margin(self): slice_ = Cube(CR.MEANS_CAT_HS_X_CAT_HS).partitions[0] - np.testing.assert_almost_equal( - slice_.columns_margin, np.array([431, 494, 294, 1219, 433]) - ) + + assert slice_.columns_margin.tolist() == [431, 494, 294, 1219, 433] def it_computes_cat_x_mr_columns_margin(self): slice_ = Cube(CR.CAT_X_MR_2).partitions[0] - np.testing.assert_almost_equal( - slice_.columns_margin, - np.array([31.631521, 70.7307341, 125.7591135, 366.8883914, 376.7656406]), + + assert slice_.columns_margin == pytest.approx( + [31.63152, 70.730734, 125.759113, 366.888391, 376.765640] ) def it_computes_mr_x_cat_columns_margin(self): @@ -1046,10 +1045,8 @@ def it_computes_rows_margin_for_cat_hs_x_cat_hs_hiddens_explicit_order(self): def it_computes_means_cat_x_cat_rows_margin(self): slice_ = Cube(CR.MEANS_CAT_HS_X_CAT_HS).partitions[0] - np.testing.assert_almost_equal( - slice_.rows_margin, - np.array([55, 126, 613, 710, 310, 400, 148]), - ) + + assert slice_.rows_margin.tolist() == [55, 126, 613, 710, 310, 400, 148] def it_computes_cat_x_mr_rows_margin(self): slice_ = Cube(CR.CAT_X_MR_2).partitions[0] @@ -1488,11 +1485,15 @@ def it_computes_the_sort_by_value_row_order_to_help( def it_computes_sum_cat_x_mr(self): slice_ = Cube(CR.SUM_CAT_X_MR).partitions[0] - np.testing.assert_almost_equal(slice_.sum, [[3.0, 2.0, 2.0], [0.0, 0.0, 0.0]]) + + assert slice_.sum == pytest.approx(np.array([[3.0, 2.0, 2.0], [0.0, 0.0, 0.0]])) def it_computes_sum_mr_x_cat(self): slice_ = Cube(CR.SUM_MR_X_CAT).partitions[0] - np.testing.assert_almost_equal(slice_.sum, [[3.0, 0.0], [2.0, 0.0], [2.0, 0.0]]) + + assert slice_.sum == pytest.approx( + np.array([[3.0, 0.0], [2.0, 0.0], [2.0, 0.0]]) + ) class Describe_BaseOrderHelper(object): diff --git a/tests/integration/test_numeric_array.py b/tests/integration/test_numeric_array.py index 86aed1831..0309c3701 100644 --- a/tests/integration/test_numeric_array.py +++ b/tests/integration/test_numeric_array.py @@ -10,10 +10,10 @@ from ..fixtures import NA -class TestNumericArrays(object): +class DescribeNumericArrays(object): """Test-suite for numeric-array behaviors.""" - def test_num_arr_means_scale_measures(self): + def it_provides_means_scale_measures(self): slice_ = Cube(NA.NUM_ARR_MEANS_SCALE_MEASURES).partitions[0] np.testing.assert_array_almost_equal( @@ -31,7 +31,7 @@ def test_num_arr_means_scale_measures(self): slice_.rows_scale_median, [3.0, 3.0, 3.0, 3.0] ) - def test_num_arr_means_grouped_by_cat(self): + def it_provides_means_for_num_array_grouped_by_cat(self): """Test means on numeric array, grouped by single categorical dimension.""" slice_ = Cube(NA.NUM_ARR_MEANS_GROUPED_BY_CAT).partitions[0] @@ -46,7 +46,7 @@ def test_num_arr_means_grouped_by_cat(self): ) np.testing.assert_almost_equal(slice_.columns_base, [[3, 2], [3, 1], [1, 1]]) - def test_num_arr_means_grouped_by_date(self): + def it_provides_means_for_num_array_grouped_by_date(self): """Test means on numeric array, grouped by single categorical dimension.""" slice_ = Cube(NA.NUM_ARR_MEANS_GROUPED_BY_DATE).partitions[0] @@ -61,7 +61,7 @@ def test_num_arr_means_grouped_by_date(self): ) np.testing.assert_almost_equal(slice_.columns_base, [[10, 9], [8, 10], [9, 10]]) - def test_num_arr_means_grouped_by_cat_weighted(self): + def it_provides_means_for_num_array_grouped_by_cat_weighted(self): """Test means on numeric array, grouped by single categorical dimension.""" slice_ = Cube(NA.NUM_ARR_MEANS_GROUPED_BY_CAT_WEIGHTED).partitions[0] @@ -77,7 +77,7 @@ def test_num_arr_means_grouped_by_cat_weighted(self): ) np.testing.assert_almost_equal(slice_.columns_base, [[3, 2], [3, 1], [1, 1]]) - def test_num_arr_means_x_mr(self): + def it_provides_means_for_num_array_x_mr(self): slice_ = Cube(NA.NUM_ARR_MEANS_X_MR).partitions[0] np.testing.assert_almost_equal( @@ -95,7 +95,7 @@ def test_num_arr_means_x_mr(self): slice_.columns_base, [[38, 14, 6, 18, 38], [38, 14, 6, 18, 38]] ) - def test_num_arr_means_no_grouping(self): + def it_provides_means_for_numeric_array_with_no_grouping(self): """Test means on no-dimensions measure of numeric array.""" strand = Cube(NA.NUM_ARR_MEANS_NO_GROUPING).partitions[0] @@ -104,7 +104,7 @@ def test_num_arr_means_no_grouping(self): assert strand.unweighted_bases.tolist() == [6, 6] assert strand.table_base_range.tolist() == [6, 6] - def test_num_arr_sum_grouped_by_cat(self): + def it_provides_sum_for_num_array_grouped_by_cat(self): """Test sum on numeric array, grouped by single categorical dimension.""" slice_ = Cube(NA.NUM_ARR_SUM_GROUPED_BY_CAT).partitions[0] diff --git a/tests/unit/test_cube.py b/tests/unit/test_cube.py index b6f464201..00c68b5f9 100644 --- a/tests/unit/test_cube.py +++ b/tests/unit/test_cube.py @@ -41,9 +41,9 @@ class DescribeCubeSet(object): ), ) def it_knows_whether_it_can_show_pairwise( - self, request, cubes_dimtypes, expected_value, _cubes_prop_ + self, request, cubes_dimtypes, expected_value, cubes_prop_ ): - _cubes_prop_.return_value = tuple( + cubes_prop_.return_value = tuple( instance_mock( request, Cube, dimension_types=cube_dimtypes, ndim=len(cube_dimtypes) ) @@ -55,9 +55,9 @@ def it_knows_whether_it_can_show_pairwise( assert can_show_pairwise is expected_value - def it_knows_its_description(self, _cubes_prop_, cube_): + def it_knows_its_description(self, cubes_prop_, cube_): cube_.description = "Are you male or female?" - _cubes_prop_.return_value = (cube_,) + cubes_prop_.return_value = (cube_,) cube_set = CubeSet(None, None, None, None) description = cube_set.description @@ -68,10 +68,10 @@ def it_knows_its_description(self, _cubes_prop_, cube_): ("first_cube_has_w_counts", "expected_value"), ((True, True), (False, False)) ) def it_knows_whether_it_has_weighted_counts( - self, first_cube_has_w_counts, expected_value, _cubes_prop_, cube_ + self, first_cube_has_w_counts, expected_value, cubes_prop_, cube_ ): cube_.is_weighted = first_cube_has_w_counts - _cubes_prop_.return_value = (cube_,) + cubes_prop_.return_value = (cube_,) cube_set = CubeSet(None, None, None, None) has_weighted_counts = cube_set.has_weighted_counts @@ -79,12 +79,10 @@ def it_knows_whether_it_has_weighted_counts( assert has_weighted_counts == expected_value @pytest.mark.parametrize(("ncubes", "expected_value"), ((2, True), (1, False))) - def it_knows_when_it_is_ca_as_0th( - self, ncubes, expected_value, _cubes_prop_, cube_ - ): + def it_knows_when_it_is_ca_as_0th(self, ncubes, expected_value, cubes_prop_, cube_): cubes_ = (cube_,) * ncubes cubes_[0].dimension_types = (DT.CA_SUBVAR,) * ncubes - _cubes_prop_.return_value = cubes_ + cubes_prop_.return_value = cubes_ cube_set = CubeSet(cubes_, None, None, None) is_ca_as_0th = cube_set.is_ca_as_0th @@ -95,19 +93,19 @@ def it_knows_when_it_is_ca_as_0th( ("first_cube_missing_count", "expected_value"), ((34, 34), (0, 0)) ) def it_knows_its_missing_count( - self, first_cube_missing_count, expected_value, _cubes_prop_, cube_ + self, first_cube_missing_count, expected_value, cubes_prop_, cube_ ): cube_.missing = first_cube_missing_count - _cubes_prop_.return_value = (cube_,) + cubes_prop_.return_value = (cube_,) cube_set = CubeSet(None, None, None, None) missing_count = cube_set.missing_count assert missing_count == expected_value - def it_knows_its_name(self, _cubes_prop_, cube_): + def it_knows_its_name(self, cubes_prop_, cube_): cube_.name = "Beverage" - _cubes_prop_.return_value = (cube_,) + cubes_prop_.return_value = (cube_,) cube_set = CubeSet(None, None, None, None) name = cube_set.name @@ -125,10 +123,10 @@ def it_knows_its_name(self, _cubes_prop_, cube_): ), ) def it_provides_access_to_the_partition_sets( - self, cube_partitions, expected_value, _cubes_prop_, cube_ + self, cube_partitions, expected_value, cubes_prop_, cube_ ): cube_.partitions = cube_partitions - _cubes_prop_.return_value = (cube_,) + cubes_prop_.return_value = (cube_,) cube_set = CubeSet(None, None, None, None) partition_sets = cube_set.partition_sets @@ -140,10 +138,10 @@ def it_provides_access_to_the_partition_sets( ((1.0, 1.0), (0.54, 0.54), (np.nan, np.nan)), ) def it_has_proper_population_fraction( - self, population_fraction, expected_value, cube_, _cubes_prop_ + self, population_fraction, expected_value, cube_, cubes_prop_ ): cube_.population_fraction = population_fraction - _cubes_prop_.return_value = (cube_,) + cubes_prop_.return_value = (cube_,) cube_set = CubeSet(None, None, None, None) cubeset_population_fraction = cube_set.population_fraction @@ -163,7 +161,7 @@ def it_constructs_its_sequence_of_cube_objects_to_help( min_base=10, ) - cubes = cube_set._cubes + cubes = cube_set.cubes assert Cube_.call_args_list == [ call( @@ -204,7 +202,7 @@ def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload( min_base=10, ) - cubes = cube_set._cubes + cubes = cube_set.cubes assert Cube_.call_args_list == [ call( @@ -255,33 +253,24 @@ def it_knows_whether_it_is_numeric_measure_to_help( assert Cube_.call_args_list == ([call({"cube": 0})] if is_multi_cube else []) assert is_numeric_mean == expected_value - def it_knows_its_valid_counts_summary_to_help(self, _cubes_prop_, cube_): + def it_knows_its_valid_counts_summary_to_help(self, cubes_prop_, cube_): cube_.valid_counts_summary = np.array([1, 2, 3]) - _cubes_prop_.return_value = (cube_,) + cubes_prop_.return_value = (cube_,) cube_set = CubeSet(None, None, None, None) valid_counts_summary = cube_set.valid_counts_summary np.testing.assert_array_equal(valid_counts_summary, [1, 2, 3]) - def it_knows_its_n_reposnes_to_help(self, _cubes_prop_, cube_): + def it_knows_its_n_reposnes_to_help(self, cubes_prop_, cube_): cube_.n_responses = 6 - _cubes_prop_.return_value = (cube_,) + cubes_prop_.return_value = (cube_,) cube_set = CubeSet(None, None, None, None) n_responses = cube_set.n_responses assert n_responses == 6 - def it_is_subscriptable(self, _cubes_prop_, cube_): - cube_.description = "Hello" - _cubes_prop_.return_value = (cube_,) - cube_set = CubeSet(None, None, None, None) - - first_cube = cube_set[0] - - assert first_cube.description == "Hello" - # fixture components --------------------------------------------- @pytest.fixture @@ -293,8 +282,8 @@ def cube_(self, request): return instance_mock(request, Cube) @pytest.fixture - def _cubes_prop_(self, request): - return property_mock(request, CubeSet, "_cubes") + def cubes_prop_(self, request): + return property_mock(request, CubeSet, "cubes") @pytest.fixture def _is_multi_cube_prop_(self, request): From e04b7c33cf91f620ead3f47a7aafdb657fa23766 Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Thu, 4 Mar 2021 14:42:16 +0100 Subject: [PATCH 10/10] refactoring sum and mean measure. ._cube_result_matrix has been moved to the new self._assemble_matrix(self._measures..blocks) logic address steve's comments 2nd iteration --- src/cr/cube/matrix/assembler.py | 9 +- src/cr/cube/matrix/cubemeasure.py | 208 +++++-- src/cr/cube/matrix/measure.py | 38 +- src/cr/cube/stripe/cubemeasure.py | 25 +- src/cr/cube/stripe/measure.py | 16 +- tests/fixtures/mr-x-mr-means.json | 795 ++++++++++++++++++++++++++ tests/fixtures/mr-x-mr-sum.json | 795 ++++++++++++++++++++++++++ tests/integration/test_cubepart.py | 31 + tests/unit/matrix/test_assembler.py | 50 +- tests/unit/matrix/test_cubemeasure.py | 100 ---- tests/unit/stripe/test_cubemeasure.py | 16 +- 11 files changed, 1846 insertions(+), 237 deletions(-) create mode 100644 tests/fixtures/mr-x-mr-means.json create mode 100644 tests/fixtures/mr-x-mr-sum.json diff --git a/src/cr/cube/matrix/assembler.py b/src/cr/cube/matrix/assembler.py index 3beccb0fa..492470c90 100644 --- a/src/cr/cube/matrix/assembler.py +++ b/src/cr/cube/matrix/assembler.py @@ -164,9 +164,7 @@ def means(self): Raises `ValueError` if the cube-result does not include a means cube-measure. """ - return self._assemble_matrix( - NanSubtotals.blocks(self._cube_result_matrix.means, self._dimensions) - ) + return self._assemble_matrix(self._measures.means.blocks) @lazyproperty def pvalues(self): @@ -255,11 +253,10 @@ def rows_margin(self): @lazyproperty def sum(self): """2D optional np.float64 ndarray of sum for each cell. + Raises `ValueError` if the cube-result does not include a sum cube-measure. """ - return self._assemble_matrix( - NanSubtotals.blocks(self._cube_result_matrix.sum, self._dimensions) - ) + return self._assemble_matrix(self._measures.sum.blocks) @lazyproperty def table_base(self): diff --git a/src/cr/cube/matrix/cubemeasure.py b/src/cr/cube/matrix/cubemeasure.py index e936387e1..2b589f61e 100644 --- a/src/cr/cube/matrix/cubemeasure.py +++ b/src/cr/cube/matrix/cubemeasure.py @@ -23,6 +23,16 @@ def __init__(self, cube, dimensions, slice_idx): self._dimensions = dimensions self._slice_idx = slice_idx + @lazyproperty + def means(self): + """_BaseCubeMeans subclass object for this cube-result.""" + return _BaseCubeMeans.factory(self._cube, self._dimensions, self._slice_idx) + + @lazyproperty + def sum(self): + """_BaseCubeSums subclass object for this cube-result.""" + return _BaseCubeSums.factory(self._cube, self._dimensions, self._slice_idx) + @lazyproperty def unweighted_cube_counts(self): """_BaseUnweightedCubeCounts subclass object for this cube-result.""" @@ -68,6 +78,150 @@ def _slice_idx_expr(cls, cube, slice_idx): return np.s_[slice_idx] +# === MEANS === +class _BaseCubeMeans(_BaseCubeMeasure): + """Base class for mean cube-measure variants.""" + + def __init__(self, dimensions, means): + super(_BaseCubeMeans, self).__init__(dimensions) + self._means = means + + @classmethod + def factory(cls, cube, dimensions, slice_idx): + """Return _BaseCubeMeans subclass instance appropriate to `cube`.""" + dimension_types = cube.dimension_types[-2:] + CubeMeansCls = ( + _MrXMrCubeMeans + if dimension_types == (DT.MR, DT.MR) + else _MrXCatCubeMeans + if dimension_types[0] == DT.MR + else _CatXMrCubeMeans + if dimension_types[1] == DT.MR + else _CatXCatCubeMeans + ) + if cube.means is None: + raise ValueError( + "`.means` is undefined for a cube-result without a means measure" + ) + return CubeMeansCls( + dimensions, cube.means[cls._slice_idx_expr(cube, slice_idx)] + ) + + +class _CatXCatCubeMeans(_BaseCubeMeans): + """Means cube-measure for a slice with no MR dimensions.""" + + @lazyproperty + def means(self): + """2D np.float64 ndarray of means for each valid matrix cell.""" + return self._means + + +class _CatXMrCubeMeans(_BaseCubeMeans): + """Means cube-measure for a NOT_MR_X_MR slice. + + Note that the rows-dimensions need not actually be CAT. + """ + + @lazyproperty + def means(self): + """2D np.float64 ndarray of means for each valid matrix cell.""" + return self._means[:, :, 0] + + +class _MrXCatCubeMeans(_BaseCubeMeans): + """Means cube-measure for an MR_X_NOT_MR slice. + + Note that the columns-dimension need not actually be CAT. + """ + + @lazyproperty + def means(self): + """2D np.float64 ndarray of means for each valid matrix cell.""" + return self._means[:, 0, :] + + +class _MrXMrCubeMeans(_BaseCubeMeans): + """Means cube-measure for an MR_X_MR slice.""" + + @lazyproperty + def means(self): + """2D np.float64 ndarray of means for each valid matrix cell.""" + # --- indexing is: all-rows, sel-only, all-cols, sel-only --- + return self._means[:, 0, :, 0] + + +# === SUMS === +class _BaseCubeSums(_BaseCubeMeasure): + """Base class for sum cube-measure variants.""" + + def __init__(self, dimensions, sums): + super(_BaseCubeSums, self).__init__(dimensions) + self._sums = sums + + @classmethod + def factory(cls, cube, dimensions, slice_idx): + """Return _BaseCubeSums subclass instance appropriate to `cube`.""" + dimension_types = cube.dimension_types[-2:] + CubeSumsCls = ( + _MrXMrCubeSums + if dimension_types == (DT.MR, DT.MR) + else _MrXCatCubeSums + if dimension_types[0] == DT.MR + else _CatXMrCubeSums + if dimension_types[1] == DT.MR + else _CatXCatCubeSums + ) + if cube.sum is None: + raise ValueError( + "`.sum` is undefined for a cube-result without a sum measure" + ) + return CubeSumsCls(dimensions, cube.sum[cls._slice_idx_expr(cube, slice_idx)]) + + +class _CatXCatCubeSums(_BaseCubeSums): + """Sum cube-measure for a slice with no MR dimensions.""" + + @lazyproperty + def sum(self): + """2D np.float64 ndarray of sum for each valid matrix cell.""" + return self._sums + + +class _CatXMrCubeSums(_BaseCubeSums): + """Sum cube-measure for a NOT_MR_X_MR slice. + + Note that the rows-dimensions need not actually be CAT. + """ + + @lazyproperty + def sum(self): + """2D np.float64 ndarray of sum for each valid matrix cell.""" + return self._sums[:, :, 0] + + +class _MrXCatCubeSums(_BaseCubeSums): + """Sum cube-measure for an MR_X_NOT_MR slice. + + Note that the columns-dimension need not actually be CAT. + """ + + @lazyproperty + def sum(self): + """2D np.float64 ndarray of sum for each valid matrix cell.""" + return self._sums[:, 0, :] + + +class _MrXMrCubeSums(_BaseCubeSums): + """Sum cube-measure for an MR_X_MR slice.""" + + @lazyproperty + def sum(self): + """2D np.float64 ndarray of sum for each valid matrix cell.""" + # --- indexing is: all-rows, sel-only, all-cols, sel-only --- + return self._sums[:, 0, :, 0] + + # === UNWEIGHTED COUNTS === @@ -1042,15 +1196,6 @@ def columns_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=0) - @lazyproperty - def means(self): - """2D np.float64 ndarray of mean for each valid matrix cell.""" - if self._means is None: - raise ValueError( - "`.means` is undefined for a cube-result without a means measure" - ) - return self._means - @lazyproperty def rows_base(self): """1D ndarray of np.int64 unweighted-N for each matrix row.""" @@ -1073,15 +1218,6 @@ def rows_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=1) - @lazyproperty - def sum(self): - """2D np.float64 ndarray of sum for each valid matrix cell.""" - if self._sum is None: - raise ValueError( - "`.sum` is undefined for a cube-result without a sum measure" - ) - return self._sum - @lazyproperty def table_base(self): """np.int64 count of actual respondents who answered both questions. @@ -1234,15 +1370,6 @@ def columns_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=(0, 2)) - @lazyproperty - def means(self): - """2D np.float64 ndarray of mean for each valid matrix cell.""" - if self._means is None: - raise ValueError( - "`.means` is undefined for a cube-result without a means measure" - ) - return self._means[:, :, 0] - @lazyproperty def rows_base(self): """2D np.int64 ndarray of row-wise unweighted-N for this matrix. @@ -1274,15 +1401,6 @@ def rows_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=(1, 2)) - @lazyproperty - def sum(self): - """2D np.float64 ndarray of mean for each valid matrix cell.""" - if self._sum is None: - raise ValueError( - "`.sum` is undefined for a cube-result without a sum measure" - ) - return self._sum[:, :, 0] - @lazyproperty def table_base(self): """1D np.int64 unweighted N for each column of matrix. @@ -1424,15 +1542,6 @@ def columns_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=(0, 1)) - @lazyproperty - def means(self): - """2D np.float64 ndarray of mean for each valid matrix cell.""" - if self._means is None: - raise ValueError( - "`.means` is undefined for a cube-result without a means measure" - ) - return self._means[:, 0, :] - @lazyproperty def rows_base(self): """1D ndarray of np.int64 unweighted-N for each matrix row. @@ -1459,15 +1568,6 @@ def rows_pruning_base(self): """ return np.sum(self._unweighted_counts, axis=(1, 2)) - @lazyproperty - def sum(self): - """2D np.float64 ndarray of sum for each valid matrix cell.""" - if self._sum is None: - raise ValueError( - "`.sum` is undefined for a cube-result without a sum measure" - ) - return self._sum[:, 0, :] - @lazyproperty def table_base(self): """1D np.int64 ndarray of unweighted N for each row of matrix. diff --git a/src/cr/cube/matrix/measure.py b/src/cr/cube/matrix/measure.py index 6e28cd103..8a080e555 100644 --- a/src/cr/cube/matrix/measure.py +++ b/src/cr/cube/matrix/measure.py @@ -7,7 +7,7 @@ import numpy as np from cr.cube.matrix.cubemeasure import CubeMeasures -from cr.cube.matrix.subtotals import SumSubtotals +from cr.cube.matrix.subtotals import SumSubtotals, NanSubtotals from cr.cube.util import lazyproperty @@ -45,6 +45,11 @@ def columns_pruning_base(self): """1D np.int64 ndarray of unweighted-N for each matrix column.""" return self._cube_measures.unweighted_cube_counts.columns_pruning_base + @lazyproperty + def means(self): + """1D np.float ndarray of means for each matrix row.""" + return _Mean(self._dimensions, self, self._cube_measures) + @lazyproperty def row_unweighted_bases(self): """_RowUnweightedBases measure object for this cube-result.""" @@ -60,6 +65,11 @@ def rows_pruning_base(self): """1D np.int64 ndarray of unweighted-N for each matrix row.""" return self._cube_measures.unweighted_cube_counts.rows_pruning_base + @lazyproperty + def sum(self): + """1D np.float ndarray of sum for each matrix row.""" + return _Sum(self._dimensions, self, self._cube_measures) + @lazyproperty def table_unweighted_bases(self): """_TableUnweightedBases measure object for this cube-result.""" @@ -132,6 +142,10 @@ def _intersections(self): "%s must implement `._intersections`" % type(self).__name__ ) + @lazyproperty + def _means(self): + return self._cube_measures.means + @lazyproperty def _subtotal_columns(self): """2D np.int64 ndarray of inserted column proportions denominator value. @@ -152,6 +166,10 @@ def _subtotal_rows(self): "%s must implement `._subtotal_rows`" % type(self).__name__ ) + @lazyproperty + def _sum(self): + return self._cube_measures.sum + @lazyproperty def _unweighted_cube_counts(self): """_BaseUnweightedCubeCounts subclass instance for this measure. @@ -341,6 +359,15 @@ def _subtotal_rows(self): ) +class _Mean(_BaseSecondOrderMeasure): + """Provides the mean measure for a matrix.""" + + @lazyproperty + def blocks(self): + """2D array of the four 2D "blocks" making up this measure.""" + return NanSubtotals.blocks(self._means.means, self._dimensions) + + class _RowUnweightedBases(_BaseSecondOrderMeasure): """Provides the row-unweighted-bases measure for a matrix. @@ -471,6 +498,15 @@ def _subtotal_rows(self): return SumSubtotals.subtotal_rows(self._base_values, self._dimensions) +class _Sum(_BaseSecondOrderMeasure): + """Provides the sum measure for a matrix.""" + + @lazyproperty + def blocks(self): + """2D array of the four 2D "blocks" making up this measure.""" + return NanSubtotals.blocks(self._sum.sum, self._dimensions) + + class _TableUnweightedBases(_BaseSecondOrderMeasure): """Provides the table-unweighted-bases measure for a matrix. diff --git a/src/cr/cube/stripe/cubemeasure.py b/src/cr/cube/stripe/cubemeasure.py index f715b7b7f..bbb3e24b7 100644 --- a/src/cr/cube/stripe/cubemeasure.py +++ b/src/cr/cube/stripe/cubemeasure.py @@ -31,7 +31,7 @@ def cube_means(self): @lazyproperty def cube_sum(self): """_BaseCubeMeans subclass object for this stripe.""" - return _BaseCubeSum.factory(self._cube, self._rows_dimension) + return _BaseCubeSums.factory(self._cube, self._rows_dimension) @lazyproperty def unweighted_cube_counts(self): @@ -114,11 +114,11 @@ def means(self): # === SUM === -class _BaseCubeSum(_BaseCubeMeasure): - """Base class for means cube-measure variants.""" +class _BaseCubeSums(_BaseCubeMeasure): + """Base class for sum cube-measure variants.""" def __init__(self, rows_dimension, sum): - super(_BaseCubeSum, self).__init__(rows_dimension) + super(_BaseCubeSums, self).__init__(rows_dimension) self._sum = sum @classmethod @@ -136,7 +136,7 @@ def sum(self): ) # pragma: no cover -class _CatCubeSum(_BaseCubeSum): +class _CatCubeSum(_BaseCubeSums): """Means cube-measure for a non-MR stripe.""" @lazyproperty @@ -149,7 +149,7 @@ def sum(self): return self._sum -class _MrCubeSum(_BaseCubeSum): +class _MrCubeSum(_BaseCubeSums): """Means cube-measure for an MR stripe. Its `.means` is a 2D ndarray with axes (rows, sel/not). """ @@ -278,23 +278,16 @@ def unweighted_counts(self): class _NumArrUnweightedCubeCounts(_BaseUnweightedCubeCounts): - """Unweighted-counts cube-measure for a non-MR stripe.""" + """Unweighted-counts cube-measure for a numeric array stripe.""" @lazyproperty def bases(self): - """1D np.int64 ndarray of table-proportion denonimator (base) for each row. - - Each row in a CAT stripe has the same base (the table-base). - """ + """1D np.int64 ndarray of table-proportion denonimator for each cell.""" return self._unweighted_counts @lazyproperty def pruning_base(self): - """1D np.int64 ndarray of unweighted-N for each matrix row. - - Because this matrix has no MR dimension, this is simply the unweighted count for - each row. - """ + """1D np.int64 ndarray of unweighted-N for each matrix row.""" return self._unweighted_counts @lazyproperty diff --git a/src/cr/cube/stripe/measure.py b/src/cr/cube/stripe/measure.py index 09827c34e..bf1d6ca6c 100644 --- a/src/cr/cube/stripe/measure.py +++ b/src/cr/cube/stripe/measure.py @@ -311,31 +311,25 @@ def _weighted_counts(self): class _Sum(_BaseSecondOrderMeasure): - """Provides the means measure for a stripe. - Relies on the presence of a means cube-measure in the cube-result. + """Provides the sum measure for a stripe. + + Relies on the presence of a sum cube-measure in the cube-result. """ @lazyproperty def base_values(self): """1D np.float64 ndarray of sum for each row.""" - return self._cube_sum.sum + return self._cube_measures.cube_sum.sum @lazyproperty def subtotal_values(self): """1D ndarray of np.nan for each row-subtotal. + Sum values cannot be subtotaled and each subtotal value is unconditionally np.nan. """ return NanSubtotals.subtotal_values(self.base_values, self._rows_dimension) - @lazyproperty - def _cube_sum(self): - """_BaseCubeMeans subclass instance for this measure. - Provides the means measures from the cube-result, encapsulating variation based - on dimensionality. - """ - return self._cube_measures.cube_sum - class _TableProportionStddevs(_BaseSecondOrderMeasure): """Provides the table-proportion standard-deviation measure for a stripe.""" diff --git a/tests/fixtures/mr-x-mr-means.json b/tests/fixtures/mr-x-mr-means.json new file mode 100644 index 000000000..75c2d21ea --- /dev/null +++ b/tests/fixtures/mr-x-mr-means.json @@ -0,0 +1,795 @@ +{ + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + }, + "mean": { + "function": "cube_mean", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM000002/" + } + ] + } + }, + "dimensions": [ + { + "function": "dimension", + "args": [ + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + }, + { + "value": "subvariables" + } + ] + }, + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + }, + { + "function": "dimension", + "args": [ + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + }, + { + "value": "subvariables" + } + ] + }, + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + } + ] + }, + "result": { + "dimensions": [ + { + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "description": "My multiple response set", + "name": "mymrset", + "alias": "mymrset" + }, + "derived": true, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + "id": "0004" + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + "id": "0005" + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + }, + "id": "0006" + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "alias": "mymrset", + "description": "My multiple response set", + "name": "mymrset" + }, + "type": { + "ordinal": false, + "subvariables": [ + "0004", + "0005", + "0006" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "description": "My multiple response set", + "name": "mymrset", + "alias": "mymrset" + }, + "derived": true, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + "id": "0004" + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + "id": "0005" + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + }, + "id": "0006" + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "alias": "mymrset", + "description": "My multiple response set", + "name": "mymrset" + }, + "type": { + "ordinal": false, + "subvariables": [ + "0004", + "0005", + "0006" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 0, + "measures": { + "count": { + "data": [ + 3, + 0, + 0, + 2, + 1, + 0, + 0, + 3, + 0, + 0, + 2, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 1, + 0, + 2, + 1, + 1, + 4, + 0, + 0, + 0, + 4, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 2, + 1, + 4, + 2, + 0, + 0, + 6, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + }, + "mean": { + "data": [ + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 1.234, + { + "?": -8 + }, + 1.234, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 1.234, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 3.14159, + 3.14159, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 3.14159, + { + "?": -8 + }, + { + "?": -8 + }, + 1.234, + 3.14159, + 2.187795, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 2.187795, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 1.234, + 3.14159, + 2.187795, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 2.187795, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + } + ], + "n_missing": 4, + "metadata": { + "references": { + "alias": "z", + "format": { + "data": { + "digits": 2 + }, + "summary": { + "digits": 2 + } + }, + "description": "Numeric variable with missing value range", + "name": "z" + }, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1, + "NaN": -8 + }, + "class": "numeric" + } + } + } + }, + "n": 6, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 6, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 6, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 6, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 6, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 6, + "weighted_n": 6 + }, + "filtered": { + "unweighted_n": 6, + "weighted_n": 6 + }, + "counts": [ + 3, + 0, + 0, + 2, + 1, + 0, + 0, + 3, + 0, + 0, + 2, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 1, + 0, + 2, + 1, + 1, + 4, + 0, + 0, + 0, + 4, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 2, + 1, + 4, + 2, + 0, + 0, + 6, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "element": "crunch:cube" + } +} diff --git a/tests/fixtures/mr-x-mr-sum.json b/tests/fixtures/mr-x-mr-sum.json new file mode 100644 index 000000000..e26979df3 --- /dev/null +++ b/tests/fixtures/mr-x-mr-sum.json @@ -0,0 +1,795 @@ +{ + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + }, + "sum": { + "function": "cube_sum", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM000002/" + } + ] + } + }, + "dimensions": [ + { + "function": "dimension", + "args": [ + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + }, + { + "value": "subvariables" + } + ] + }, + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + }, + { + "function": "dimension", + "args": [ + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + }, + { + "value": "subvariables" + } + ] + }, + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + } + ] + }, + "result": { + "dimensions": [ + { + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "description": "My multiple response set", + "name": "mymrset", + "alias": "mymrset" + }, + "derived": true, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + "id": "0004" + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + "id": "0005" + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + }, + "id": "0006" + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "alias": "mymrset", + "description": "My multiple response set", + "name": "mymrset" + }, + "type": { + "ordinal": false, + "subvariables": [ + "0004", + "0005", + "0006" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "description": "My multiple response set", + "name": "mymrset", + "alias": "mymrset" + }, + "derived": true, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + "id": "0004" + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + "id": "0005" + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + }, + "id": "0006" + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "alias": "mymrset", + "description": "My multiple response set", + "name": "mymrset" + }, + "type": { + "ordinal": false, + "subvariables": [ + "0004", + "0005", + "0006" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 0, + "measures": { + "count": { + "data": [ + 3, + 0, + 0, + 2, + 1, + 0, + 0, + 3, + 0, + 0, + 2, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 1, + 0, + 2, + 1, + 1, + 4, + 0, + 0, + 0, + 4, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 2, + 1, + 4, + 2, + 0, + 0, + 6, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + }, + "sum": { + "data": [ + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 1, + { + "?": -8 + }, + 4, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 1, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 3, + 3, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 3, + { + "?": -8 + }, + { + "?": -8 + }, + 1, + 3, + 2, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 2, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 1, + 3, + 2, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 2, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + } + ], + "n_missing": 4, + "metadata": { + "references": { + "alias": "z", + "format": { + "data": { + "digits": 2 + }, + "summary": { + "digits": 2 + } + }, + "description": "Numeric variable with missing value range", + "name": "z" + }, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1, + "NaN": -8 + }, + "class": "numeric" + } + } + } + }, + "n": 6, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 6, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 6, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 6, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 6, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 6, + "weighted_n": 6 + }, + "filtered": { + "unweighted_n": 6, + "weighted_n": 6 + }, + "counts": [ + 3, + 0, + 0, + 2, + 1, + 0, + 0, + 3, + 0, + 0, + 2, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 1, + 0, + 2, + 1, + 1, + 4, + 0, + 0, + 0, + 4, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 2, + 1, + 4, + 2, + 0, + 0, + 6, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "element": "crunch:cube" + } +} diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index 69055360e..76aeeb896 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -340,6 +340,23 @@ def it_provides_values_for_mean_cat_x_cat_hs(self): assert slice_.rows_margin.tolist() == [1500.0] assert slice_.columns_margin.tolist() == [189, 395, 584, 606, 310] + def it_provides_values_for_mr_x_mr_means(self): + slice_ = Cube(CR.MR_X_MR_MEANS).partitions[0] + + assert slice_.counts == pytest.approx( + np.array([[3, 2, 0], [2, 4, 0], [0, 0, 0]]) + ) + assert slice_.means == pytest.approx( + np.array( + [ + [np.nan, np.nan, np.nan], + [np.nan, 2.187795, np.nan], + [np.nan, np.nan, np.nan], + ] + ), + nan_ok=True, + ) + def it_provides_values_for_mr_x_cat_hs(self): slice_ = Cube(CR.MR_X_CAT_HS_MT).partitions[0] @@ -807,6 +824,20 @@ def it_knows_its_selected_categories(self): assert slice_.selected_category_labels == ("Very Favorable",) + def it_provides_sum_measure_for_mr_x_mr(self): + slice_ = Cube(CR.MR_X_MR_SUM).partitions[0] + + assert slice_.sum == pytest.approx( + np.array( + [ + [np.nan, np.nan, np.nan], + [np.nan, 2.0, np.nan], + [np.nan, np.nan, np.nan], + ] + ), + nan_ok=True, + ) + class Describe_Strand(object): """Integration-test suite for `cr.cube.cubepart._Strand` object.""" diff --git a/tests/unit/matrix/test_assembler.py b/tests/unit/matrix/test_assembler.py index c9e5f0a43..6a37c63fa 100644 --- a/tests/unit/matrix/test_assembler.py +++ b/tests/unit/matrix/test_assembler.py @@ -248,51 +248,19 @@ def it_knows_the_inserted_row_idxs(self, _row_order_prop_): _row_order_prop_.return_value = [0, 1, -2, 2, -1, 3] assert Assembler(None, None, None).inserted_row_idxs == (2, 4) - def it_knows_the_means( - self, - request, - cube_, - _cube_result_matrix_prop_, - dimensions_, - NanSubtotals_, - _assemble_matrix_, - ): - cube_result_matrix_ = instance_mock( - request, _CatXCatMatrix, means=[[1, 2], [3, 4]] + def it_knows_the_means(self, request, dimensions_): + property_mock( + request, Assembler, "means", return_value=np.array([1.2, 1.34, 3.3]) ) - _cube_result_matrix_prop_.return_value = cube_result_matrix_ - NanSubtotals_.blocks.return_value = [[[3], [2]], [[4], [1]]] - _assemble_matrix_.return_value = [[1, 2, 3], [4, 5, 6]] - assembler = Assembler(cube_, dimensions_, None) - - means = assembler.means - - NanSubtotals_.blocks.assert_called_once_with([[1, 2], [3, 4]], dimensions_) - _assemble_matrix_.assert_called_once_with(assembler, [[[3], [2]], [[4], [1]]]) - assert means == [[1, 2, 3], [4, 5, 6]] + assembler = Assembler(None, dimensions_, None) - def it_knows_the_sum( - self, - request, - cube_, - _cube_result_matrix_prop_, - dimensions_, - NanSubtotals_, - _assemble_matrix_, - ): - cube_result_matrix_ = instance_mock( - request, _CatXCatMatrix, sum=[[1, 2], [3, 4]] - ) - _cube_result_matrix_prop_.return_value = cube_result_matrix_ - NanSubtotals_.blocks.return_value = [[[3], [2]], [[4], [1]]] - _assemble_matrix_.return_value = [[1, 2, 3], [4, 5, 6]] - assembler = Assembler(cube_, dimensions_, None) + assert assembler.means == pytest.approx([1.2, 1.34, 3.3]) - sum = assembler.sum + def it_knows_the_sum(self, request, dimensions_): + property_mock(request, Assembler, "sum", return_value=np.array([4, 5, 6])) + assembler = Assembler(None, dimensions_, None) - NanSubtotals_.blocks.assert_called_once_with([[1, 2], [3, 4]], dimensions_) - _assemble_matrix_.assert_called_once_with(assembler, [[[3], [2]], [[4], [1]]]) - assert sum == [[1, 2, 3], [4, 5, 6]] + assert assembler.sum == pytest.approx([4, 5, 6]) def it_knows_the_pvalues(self, request): property_mock( diff --git a/tests/unit/matrix/test_cubemeasure.py b/tests/unit/matrix/test_cubemeasure.py index 0462e490e..66ffafd1e 100644 --- a/tests/unit/matrix/test_cubemeasure.py +++ b/tests/unit/matrix/test_cubemeasure.py @@ -1282,38 +1282,6 @@ def it_knows_its_table_proportion_variances_to_help(self, request): np.array([[0.0, 0.0622222, 0.1155556], [0.16, 0.1955556, 0.2222222]]), ) - def it_knows_its_means(self): - cube_means = np.array([[2, 3, 1], [5, 6, 4]]) - matrix = _CatXCatMatrix(None, None, None, means=cube_means) - - assert matrix.means.tolist() == [[2, 3, 1], [5, 6, 4]] - - def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): - matrix = _CatXCatMatrix(None, None, None, means=None) - with pytest.raises(ValueError) as e: - matrix.means - - assert ( - str(e.value) - == "`.means` is undefined for a cube-result without a means measure" - ) - - def it_knows_its_sum(self): - cube_sum = np.array([[4, 3, 1], [5, 9, 4]]) - matrix = _CatXCatMatrix(None, None, None, sum=cube_sum) - - assert matrix.sum.tolist() == [[4, 3, 1], [5, 9, 4]] - - def but_it_raises_value_error_when_the_cube_result_does_not_contain_sum(self): - matrix = _CatXCatMatrix(None, None, None, sum=None) - with pytest.raises(ValueError) as e: - matrix.sum - - assert ( - str(e.value) - == "`.sum` is undefined for a cube-result without a sum measure" - ) - class Describe_CatXMrMatrix(object): """Unit test suite for `cr.cube.matrix._CatXMrMatrix` object.""" @@ -1473,40 +1441,6 @@ def it_knows_its_table_proportion_variances_to_help(self, request): np.array([[0.0, 0.0826446, 0.1155556], [0.244898, 0.231405, 0.2222222]]), ) - def it_knows_its_means(self): - means = np.array([[[1, 6], [2, 5], [3, 4]], [[5, 3], [6, 2], [7, 1]]]) - np.testing.assert_equal( - _CatXMrMatrix(None, None, None, means=means).means, - np.array([[1, 2, 3], [5, 6, 7]]), - ) - - def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): - matrix = _CatXMrMatrix(None, None, None, means=None) - with pytest.raises(ValueError) as e: - matrix.means - - assert ( - str(e.value) - == "`.means` is undefined for a cube-result without a means measure" - ) - - def it_knows_its_sum(self): - sum = np.array([[[4, 6], [2, 5], [1, 4]], [[5, 3], [9, 2], [7, 1]]]) - np.testing.assert_equal( - _CatXMrMatrix(None, None, None, sum=sum).sum, - np.array([[4, 2, 1], [5, 9, 7]]), - ) - - def but_it_raises_value_error_when_the_cube_result_does_not_contain_sum(self): - matrix = _CatXMrMatrix(None, None, None, sum=None) - with pytest.raises(ValueError) as e: - matrix.sum - - assert ( - str(e.value) - == "`.sum` is undefined for a cube-result without a sum measure" - ) - class Describe_MrXCatMatrix(object): """Unit test suite for `cr.cube.matrix._MrXCatMatrix` object.""" @@ -1720,40 +1654,6 @@ def it_knows_its_table_proportion_variances_to_help(self, request): np.array([[0.0, 0.0622222, 0.1155556], [0.1038062, 0.118416, 0.1322568]]), ) - def it_knows_its_means(self): - means = np.arange(24).reshape((3, 2, 4)) - np.testing.assert_equal( - _MrXCatMatrix(None, None, None, means=means).means, - np.array([[0, 1, 2, 3], [8, 9, 10, 11], [16, 17, 18, 19]]), - ) - - def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): - matrix = _MrXCatMatrix(None, None, None, means=None) - with pytest.raises(ValueError) as e: - matrix.means - - assert ( - str(e.value) - == "`.means` is undefined for a cube-result without a means measure" - ) - - def it_knows_its_sum(self): - sum = np.arange(24).reshape((3, 2, 4)) - np.testing.assert_equal( - _MrXCatMatrix(None, None, None, sum=sum).sum, - np.array([[0, 1, 2, 3], [8, 9, 10, 11], [16, 17, 18, 19]]), - ) - - def but_it_raises_value_error_when_the_cube_result_does_not_contain_sum(self): - matrix = _MrXCatMatrix(None, None, None, sum=None) - with pytest.raises(ValueError) as e: - matrix.sum - - assert ( - str(e.value) - == "`.sum` is undefined for a cube-result without a sum measure" - ) - class Describe_MrXMrMatrix(object): """Unit test suite for `cr.cube.matrix._MrXMrMatrix` object.""" diff --git a/tests/unit/stripe/test_cubemeasure.py b/tests/unit/stripe/test_cubemeasure.py index 092f3e84c..b804b90c6 100644 --- a/tests/unit/stripe/test_cubemeasure.py +++ b/tests/unit/stripe/test_cubemeasure.py @@ -10,7 +10,7 @@ from cr.cube.enums import DIMENSION_TYPE as DT from cr.cube.stripe.cubemeasure import ( _BaseCubeMeans, - _BaseCubeSum, + _BaseCubeSums, _BaseUnweightedCubeCounts, _BaseWeightedCubeCounts, _CatCubeMeans, @@ -48,14 +48,14 @@ def it_provides_access_to_the_cube_means_object( def it_provides_access_to_the_cube_sum_object( self, request, cube_, rows_dimension_ ): - cube_sum_ = instance_mock(request, _BaseCubeSum) - _BaseCubeSum_ = class_mock(request, "cr.cube.stripe.cubemeasure._BaseCubeSum") - _BaseCubeSum_.factory.return_value = cube_sum_ + cube_sum_ = instance_mock(request, _BaseCubeSums) + _BaseCubeSums_ = class_mock(request, "cr.cube.stripe.cubemeasure._BaseCubeSums") + _BaseCubeSums_.factory.return_value = cube_sum_ cube_measures = CubeMeasures(cube_, rows_dimension_, None, None) cube_sum = cube_measures.cube_sum - _BaseCubeSum_.factory.assert_called_once_with(cube_, rows_dimension_) + _BaseCubeSums_.factory.assert_called_once_with(cube_, rows_dimension_) assert cube_sum is cube_sum_ def it_provides_access_to_the_unweighted_cube_counts_object( @@ -175,8 +175,8 @@ def but_it_raises_value_error_when_the_cube_result_does_not_contain_means(self): # === SUM === -class Describe_BaseCubeSum(object): - """Unit test suite for `cr.cube.matrix.cubemeasure._BaseCubeSum`.""" +class Describe_BaseCubeSums(object): + """Unit test suite for `cr.cube.matrix.cubemeasure._BaseCubeSums`.""" @pytest.mark.parametrize( "rows_dimension_type, CubeSumCls, sum", @@ -199,7 +199,7 @@ def it_provides_a_factory_for_constructing_unweighted_cube_count_objects( return_value=cube_sum_, ) - cube_sum = _BaseCubeSum.factory(cube_, rows_dimension_) + cube_sum = _BaseCubeSums.factory(cube_, rows_dimension_) CubeSumCls_.assert_called_once_with(rows_dimension_, sum) assert cube_sum is cube_sum_