diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index e8b9e2d51..0e2b4eac9 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -2,7 +2,7 @@ """Provides the CrunchCube class. -CrunchCube is the main API class for manipulating the Crunch.io JSON cube +CrunchCube is the main API class for manipulating Crunch.io JSON cube responses. """ @@ -61,17 +61,7 @@ def __init__(self, response): So we need to check its type, and convert it to a dictionary if it's JSON, if possible. """ - try: - if not isinstance(response, dict): - response = json.loads(response) - self._cube = response.get('value', response) - except TypeError: - # If an unexpected type is provided raise descriptive exception. - if not isinstance(response, dict): - raise TypeError(( - 'Unsupported type provided: {}. ' - 'A `cube` must be JSON or `dict`.' - ).format(type(response))) + self._cube_response_arg = response def __repr__(self): """Provide text representation suitable for working at console. @@ -95,8 +85,8 @@ def __repr__(self): except Exception: return super(CrunchCube, self).__repr__() - def as_array(self, include_missing=False, weighted=True, adjusted=False, - include_transforms_for_dims=None, prune=False, margin=False): + def as_array(self, include_missing=False, weighted=True, + include_transforms_for_dims=None, prune=False): """Return `ndarray` representing cube values. Returns the tabular representation of the crunch cube. The returned @@ -128,9 +118,7 @@ def as_array(self, include_missing=False, weighted=True, adjusted=False, array = self._as_array( include_missing=include_missing, weighted=weighted, - adjusted=adjusted, - include_transforms_for_dims=include_transforms_for_dims, - margin=margin + include_transforms_for_dims=include_transforms_for_dims ) # ---prune array if pruning was requested--- @@ -142,18 +130,11 @@ def as_array(self, include_missing=False, weighted=True, adjusted=False, return self._drop_mr_cat_dims(array) def count(self, weighted=True): - """Get cube's count with automatic weighted/unweighted selection.""" - if weighted and self.is_weighted: - return sum( - self._cube['result']['measures'].get('count', {}).get('data') - ) - return self._cube['result']['n'] - - @lazyproperty - def counts(self): - unfiltered = self._cube['result'].get('unfiltered') - filtered = self._cube['result'].get('filtered') - return unfiltered, filtered + """Return numberic count of rows considered for cube response.""" + return ( + self._measures.weighted_n if weighted + else self._measures.unweighted_n + ) @lazyproperty def description(self): @@ -183,7 +164,7 @@ def dimensions(self): @lazyproperty def filter_annotation(self): """Get cube's filter annotation.""" - return self._cube.get('filter_names', []) + return self._cube_dict.get('filter_names', []) def get_slices(self, ca_as_0th=False): """Return list of :class:`.CubeSlice` objects. @@ -203,11 +184,8 @@ def get_slices(self, ca_as_0th=False): @lazyproperty def has_means(self): - """True if cube contains means data.""" - measures = self._cube.get('result', {}).get('measures') - if not measures: - return False - return measures.get('mean', None) is not None + """True if cube includes a means measure.""" + return self._measures.means is not None @lazyproperty def has_mr(self): @@ -268,15 +246,8 @@ def is_univariate_ca(self): @lazyproperty def is_weighted(self): - """Check if the cube dataset is weighted.""" - weighted = self._cube.get('query', {}).get('weight', None) is not None - weighted = weighted or self._cube.get('weight_var', None) is not None - weighted = weighted or self._cube.get('weight_url', None) is not None - weighted = weighted or ( - self._cube['result']['counts'] != - self._cube['result']['measures'].get('count', {}).get('data') - ) - return weighted + """True if cube response contains weighted data.""" + return self._measures.is_weighted def labels(self, include_missing=False, include_transforms_for_dims=False): """Gets labels for each cube's dimension. @@ -358,7 +329,7 @@ def hs_dims_for_den(hs_dims, axis): axis = [axis] return [dim for dim in hs_dims if dim not in axis] - table = self._data(weighted=weighted, margin=True) + table = self._counts(weighted).raw_cube_array new_axis = self._adjust_axis(axis) index = tuple( None if i in new_axis else slice(None) @@ -418,9 +389,7 @@ def hs_dims_for_den(hs_dims, axis): @lazyproperty def missing(self): """Get missing count of a cube.""" - if self.has_means: - return self._cube['result']['measures']['mean']['n_missing'] - return self._cube['result'].get('missing') + return self._measures.missing_count @lazyproperty def mr_dim_ind(self): @@ -538,15 +507,14 @@ def population_counts(self, population_size, weighted=True, @lazyproperty def population_fraction(self): - try: - unfiltered, filtered = self.counts - num = filtered.get('weighted_n') - den = unfiltered.get('weighted_n') - return num / den - except ZeroDivisionError: - return np.nan - except Exception: - return 1 + """The filtered/unfiltered ratio for cube response. + + This value is required for properly calculating population on a cube + where a filter has been applied. Returns 1.0 for an unfiltered cube. + Returns `np.nan` if the unfiltered count is zero, which would + otherwise result in a divide-by-zero error. + """ + return self._measures.population_fraction def proportions(self, axis=None, weighted=True, include_transforms_for_dims=None, include_missing=False, @@ -619,7 +587,7 @@ def hs_dims_for_den(hs_dims, axis): axis = [axis] return [dim for dim in hs_dims if dim not in axis] - table = self._data(weighted) + table = self._measure(weighted).raw_cube_array new_axis = self._adjust_axis(axis) index = tuple( None if i in new_axis else slice(None) @@ -847,7 +815,7 @@ def _all_dimensions(self): dimension-pair is suppressed). """ return AllDimensions( - dimension_dicts=self._cube['result']['dimensions'] + dimension_dicts=self._cube_dict['result']['dimensions'] ) def _apply_missings_and_insertions(self, res, include_transforms_for_dims, @@ -900,36 +868,24 @@ def _apply_missings_and_insertions(self, res, include_transforms_for_dims, return res[np.ix_(*new_valids)] if new_valids else res def _as_array(self, include_missing=False, get_non_selected=False, - weighted=True, adjusted=False, - include_transforms_for_dims=False, margin=False): + weighted=True, include_transforms_for_dims=False): """Get crunch cube as ndarray. Args include_missing (bool): Include rows/cols for missing values. get_non_selected (bool): Get non-selected slices for MR vars. weighted (bool): Take weighted or unweighted counts. - adjusted (bool): If adjusted, add + 1 to the resulting array. This - is needed when calculating statistical significance. include_transforms_for_dims (list): For which dims to include headings & subtotals (H&S) transformations. - margin (bool): Designates whether array is intented for margin - calculation. This essentially tells the CrunchCube to use - counts (and not means, or other measures) as the result. Returns res (ndarray): Tabular representation of crunch cube """ - res = self._apply_missings_and_insertions( - self._raw_cube_array(weighted, margin), + return self._apply_missings_and_insertions( + self._measure(weighted).raw_cube_array, include_transforms_for_dims, include_missing=include_missing ) - # ---prepare resulting array for sig-testing if requested--- - if adjusted: - res += 1 - - return res - @classmethod def _calculate_constraints_sum(cls, prop_table, prop_margin, axis): """Calculate sum of constraints (part of the standard error equation). @@ -984,6 +940,19 @@ def _calculate_std_res(self, counts, total, colsum, rowsum, slice_): def _col_direction_axis(self): return self.ndim - 2 + def _counts(self, weighted): + """Return _BaseMeasure subclass for *weighted* counts. + + The return value is a _WeightedCountMeasure object if *weighted* is + True and the cube response is weighted. Otherwise it is an + _UnweightedCountMeasure object. Any means measure that may be present + is not considered. Contrast with `._measure()` below. + """ + return ( + self._measures.weighted_counts if weighted else + self._measures.unweighted_counts + ) + @staticmethod def _create_mask(res, row_prune_inds, col_prune_inds): mask_rows = np.repeat( @@ -994,16 +963,23 @@ def _create_mask(res, row_prune_inds, col_prune_inds): ) return np.logical_or(mask_rows, mask_cols) - def _data(self, weighted, margin=False): - """Get the data in non-flattened shape. - - Converts the flattened shape (original response) into non-flattened - shape (count of elements per cube dimension). E.g. for a CAT x CAT - cube, with 2 categories in each dimension (variable), we end up with - a ndarray of shape (2, 2). - """ - values = self._flat_values(weighted, margin) - return np.array(values).reshape(self._shape) + @lazyproperty + def _cube_dict(self): + """dict containing raw cube response, parsed from JSON payload.""" + try: + cube_response = self._cube_response_arg + # ---parse JSON to a dict when constructed with JSON--- + cube_dict = ( + cube_response if isinstance(cube_response, dict) + else json.loads(cube_response) + ) + # ---cube is 'value' item in a shoji response--- + return cube_dict.get('value', cube_dict) + except TypeError: + raise TypeError( + 'Unsupported type <%s> provided. Cube response must be JSON ' + '(str) or dict.' % type(self._cube_response_arg).__name__ + ) def _drop_mr_cat_dims(self, array, fix_valids=False): """Return ndarray reflecting *array* with MR_CAT dims dropped. @@ -1068,25 +1044,6 @@ def _fix_valid_indices(cls, valid_indices, insertion_index, dim): valid_indices[dim] = indices.tolist() return valid_indices - def _flat_values(self, weighted, margin=False): - """Return list of measure values as found in cube response. - - If *weighted* is True, weighted counts are returned if present in the - cube. Otherwise, unweighted counts are returned. If *margin* is True, - counts are returned even if mean values are present, which may be - preferred for example when calculating a margin. - """ - values = self._cube['result']['counts'] - if self.has_means and not margin: - mean = self._cube['result']['measures'].get('mean', {}) - values = mean.get('data', values) - elif weighted and self.is_weighted: - count = self._cube['result']['measures'].get('count', {}) - values = count.get('data', values) - values = [(val if not type(val) is dict else np.nan) - for val in values] - return values - def _inserted_dim_inds(self, transform_dims, axis): dim_ind = axis if self.ndim < 3 else axis + 1 if not transform_dims or dim_ind not in transform_dims: @@ -1177,6 +1134,32 @@ def _margin_pruned_indices(margin, inserted_ind, axis): return pruned_ind + def _measure(self, weighted): + """_BaseMeasure subclass representing primary measure for this cube. + + If the cube response includes a means measure, the return value is + means. Otherwise it is counts, with the choice between weighted or + unweighted determined by *weighted*. + + Note that weighted counts are provided on an "as-available" basis. + When *weighted* is True and the cube response is not weighted, + unweighted counts are returned. + """ + return ( + self._measures.means if self._measures.means is not None else + self._measures.weighted_counts if weighted else + self._measures.unweighted_counts + ) + + @lazyproperty + def _measures(self): + """_Measures object for this cube. + + Provides access to unweighted counts, and weighted counts and/or means + when available. + """ + return _Measures(self._cube_dict, self._all_dimensions) + def _prune_3d_body(self, res, transforms): """Return masked array where mask indicates pruned vectors. @@ -1373,26 +1356,6 @@ def _pruning_base(self, axis=None, hs_dims=None): include_transforms_for_dims=hs_dims, ) - def _raw_cube_array(self, weighted, margin): - """Return ndarray of measure values from cube-response. - - The shape of the ndarray mirrors the shape of the (raw) cube - response. Specifically, in includes values for missing elements, any - MR_CAT dimensions, and any prunable rows and columns. - - The choice among available measures in the cube response is - determined by *weighted* and *margin*, according to the same rules as - `._flat_values()`. - """ - return ( - np.array(self._flat_values(weighted, margin)) - .reshape(self._all_dimensions.shape) - ) - - @lazyproperty - def _shape(self): - return tuple([dim.shape for dim in self._all_dimensions]) - def _update_result(self, result, insertions, dimension_index, valid_indices): """Insert subtotals into resulting ndarray.""" @@ -1409,3 +1372,188 @@ def _update_result(self, result, insertions, dimension_index, ) ) return result, valid_indices + + +class _Measures(object): + """Provides access to measures contained in cube response.""" + + def __init__(self, cube_dict, all_dimensions): + self._cube_dict = cube_dict + self._all_dimensions = all_dimensions + + @lazyproperty + def is_weighted(self): + """True if weights have been applied to the measure(s) for this cube. + + Unweighted counts are available for all cubes. Weighting applies to + any other measures provided by the cube. + """ + cube_dict = self._cube_dict + if cube_dict.get('query', {}).get('weight') is not None: + return True + if cube_dict.get('weight_var') is not None: + return True + if cube_dict.get('weight_url') is not None: + return True + unweighted_counts = cube_dict['result']['counts'] + count_data = ( + cube_dict['result']['measures'].get('count', {}).get('data') + ) + if unweighted_counts != count_data: + return True + return False + + @lazyproperty + def means(self): + """_MeanMeasure object providing access to means values. + + None when the cube response does not contain a mean measure. + """ + mean_measure_dict = ( + self._cube_dict + .get('result', {}) + .get('measures', {}) + .get('mean') + ) + if mean_measure_dict is None: + return None + return _MeanMeasure(self._cube_dict, self._all_dimensions) + + @lazyproperty + def missing_count(self): + """numeric representing count of missing rows in cube response.""" + if self.means: + return self.means.missing_count + return self._cube_dict['result'].get('missing', 0) + + @lazyproperty + def population_fraction(self): + """The filtered/unfiltered ratio for cube response. + + This value is required for properly calculating population on a cube + where a filter has been applied. Returns 1.0 for an unfiltered cube. + Returns `np.nan` if the unfiltered count is zero, which would + otherwise result in a divide-by-zero error. + """ + numerator = ( + self._cube_dict['result'] + .get('filtered', {}) + .get('weighted_n') + ) + denominator = ( + self._cube_dict['result'] + .get('unfiltered', {}) + .get('weighted_n') + ) + try: + return numerator / denominator + except ZeroDivisionError: + return np.nan + except Exception: + return 1.0 + + @lazyproperty + def unweighted_counts(self): + """_UnweightedCountMeasure object for this cube. + + This object provides access to unweighted counts for this cube, + whether or not the cube contains weighted counts. + """ + return _UnweightedCountMeasure(self._cube_dict, self._all_dimensions) + + @lazyproperty + def unweighted_n(self): + """int count of actual rows represented by query response.""" + return self._cube_dict['result']['n'] + + @lazyproperty + def weighted_counts(self): + """_WeightedCountMeasure object for this cube. + + This object provides access to weighted counts for this cube, if + available. If the cube response is not weighted, the + _UnweightedCountMeasure object for this cube is returned. + """ + if not self.is_weighted: + return self.unweighted_counts + return _WeightedCountMeasure(self._cube_dict, self._all_dimensions) + + @lazyproperty + def weighted_n(self): + """float count of returned rows adjusted for weighting.""" + if not self.is_weighted: + return float(self.unweighted_n) + return float( + sum(self._cube_dict['result']['measures']['count']['data']) + ) + + +class _BaseMeasure(object): + """Base class for measure objects.""" + + def __init__(self, cube_dict, all_dimensions): + self._cube_dict = cube_dict + self._all_dimensions = all_dimensions + + @lazyproperty + def raw_cube_array(self): + """Return read-only ndarray of measure values from cube-response. + + The shape of the ndarray mirrors the shape of the (raw) cube + response. Specifically, it includes values for missing elements, any + MR_CAT dimensions, and any prunable rows and columns. + """ + array = np.array(self._flat_values).reshape(self._all_dimensions.shape) + # ---must be read-only to avoid hard-to-find bugs--- + array.flags.writeable = False + return array + + @lazyproperty + def _flat_values(self): + """Return tuple of mean values as found in cube response. + + This property must be implemented by each subclass. + """ + raise NotImplementedError('must be implemented by each subclass') + + +class _MeanMeasure(_BaseMeasure): + """Statistical mean values from a cube-response.""" + + @lazyproperty + def missing_count(self): + """numeric representing count of missing rows reflected in response.""" + return ( + self._cube_dict['result']['measures']['mean'].get('n_missing', 0) + ) + + @lazyproperty + def _flat_values(self): + """Return tuple of mean values as found in cube response. + + Mean data may include missing items represented by a dict like + {'?': -1} in the cube response. These are replaced by np.nan in the + returned value. + """ + return tuple( + np.nan if type(x) is dict else x + for x in self._cube_dict['result']['measures']['mean']['data'] + ) + + +class _UnweightedCountMeasure(_BaseMeasure): + """Unweighted counts for cube.""" + + @lazyproperty + def _flat_values(self): + """tuple of int counts before weighting.""" + return tuple(self._cube_dict['result']['counts']) + + +class _WeightedCountMeasure(_BaseMeasure): + """Weighted counts for cube.""" + + @lazyproperty + def _flat_values(self): + """tuple of numeric counts after weighting.""" + return tuple(self._cube_dict['result']['measures']['count']['data']) diff --git a/tests/fixtures/cat-x-cat-mean-wgtd.json b/tests/fixtures/cat-x-cat-mean-wgtd.json new file mode 100644 index 000000000..ffbf0dd9d --- /dev/null +++ b/tests/fixtures/cat-x-cat-mean-wgtd.json @@ -0,0 +1,379 @@ +{ + "query": { + "dimensions": [ + { + "variable": "https://alpha.crunch.io/api/datasets/aefd8b1483754d639b46f5ee8f6d7821/variables/000000/" + }, + { + "variable": "https://alpha.crunch.io/api/datasets/aefd8b1483754d639b46f5ee8f6d7821/variables/000004/" + } + ], + "measures": { + "count": { + "args": [], + "function": "cube_count" + }, + "mean": { + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/aefd8b1483754d639b46f5ee8f6d7821/variables/00000c/" + } + ], + "function": "cube_mean" + } + }, + "weight": "https://alpha.crunch.io/api/datasets/aefd8b1483754d639b46f5ee8f6d7821/variables/0af6163c3e694b9b80071ed691fd4f28/" + }, + "query_environment": { + "filter": [] + }, + "result": { + "counts": [ + 156, + 129, + 0, + 0, + 0, + 197, + 199, + 0, + 0, + 0, + 101, + 141, + 0, + 0, + 0, + 2, + 4, + 0, + 0, + 0, + 26, + 42, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "dimensions": [ + { + "derived": false, + "references": { + "alias": "ShutdownBlame", + "description": "If President Obama and the Republicans in Congress do not reach a budget agreement in time to avoid a shutdown of the federal government, who do you think will more to blame--President Obama or the Republican Congress?", + "name": "ShutdownBlame", + "view": { + "column_width": null, + "include_missing": true, + "show_counts": false, + "show_numeric_values": false, + "transforms": { + "insertions": [ + { + "anchor": 5, + "args": [ + 5, + 4 + ], + "function": "subtotal", + "name": "test new" + } + ] + } + } + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "President Obama", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Republicans in Congress", + "numeric_value": 2 + }, + { + "id": 3, + "missing": false, + "name": "Both", + "numeric_value": 3 + }, + { + "id": 4, + "missing": false, + "name": "Neither", + "numeric_value": 4 + }, + { + "id": 5, + "missing": false, + "name": "Not sure", + "numeric_value": 5 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + }, + { + "derived": false, + "references": { + "alias": "Gender", + "description": "Are you male or female?", + "name": "Gender" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Male", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Female", + "numeric_value": 2 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + } + ], + "element": "crunch:cube", + "measures": { + "count": { + "data": [ + 160.82474226804146, + 125.24271844660174, + 0, + 0, + 0, + 203.0927835051553, + 193.20388349514528, + 0, + 0, + 0, + 104.12371134020616, + 136.89320388349492, + 0, + 0, + 0, + 2.061855670103093, + 3.883495145631068, + 0, + 0, + 0, + 26.804123711340207, + 40.776699029126206, + 0, + 0, + 0, + 3.0927835051546397, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 3 + }, + "mean": { + "data": [ + 52.78205128205122, + 49.9069767441861, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 50.43654822335009, + 48.20100502512572, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 51.5643564356436, + 47.602836879432715, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 58, + 29, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 37.53846153846155, + 39.45238095238095, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 36.666666666666664, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + } + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "NaN": -8, + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 3 + } + }, + "missing": 3, + "n": 1000 + } +} diff --git a/tests/fixtures/cat-x-cat-wgtd.json b/tests/fixtures/cat-x-cat-wgtd.json new file mode 100644 index 000000000..89781b25a --- /dev/null +++ b/tests/fixtures/cat-x-cat-wgtd.json @@ -0,0 +1,232 @@ +{ + "result": { + "counts": [ + 32, + 85, + 171, + 114, + 70, + 13, + 0, + 0, + 0, + 40, + 97, + 205, + 106, + 40, + 27, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "dimensions": [ + { + "derived": false, + "references": { + "alias": "Gender", + "description": "Are you male or female?", + "name": "Gender" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Male", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Female", + "numeric_value": 2 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + }, + { + "derived": false, + "references": { + "alias": "RespondentIdeology", + "description": "In general, how would you describe your own political viewpoint?", + "name": "RespondentIdeology" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Very liberal", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Liberal", + "numeric_value": 2 + }, + { + "id": 3, + "missing": false, + "name": "Moderate", + "numeric_value": 3 + }, + { + "id": 4, + "missing": false, + "name": "Conservative", + "numeric_value": 4 + }, + { + "id": 5, + "missing": false, + "name": "Very Conservative", + "numeric_value": 5 + }, + { + "id": 6, + "missing": false, + "name": "Not sure", + "numeric_value": 6 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + } + ], + "element": "crunch:cube", + "measures": { + "count": { + "data": [ + 32.9, + 87.6, + 176.2, + 117.5, + 72.1, + 13.4, + 0, + 0, + 0, + 38.8, + 94.1, + 199.0128, + 102.9, + 38.8305, + 26.2135, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 0 + } + }, + "missing": 0, + "n": 1000 + } +} diff --git a/tests/fixtures/cat-x-cat-x-cat-wgtd.json b/tests/fixtures/cat-x-cat-x-cat-wgtd.json new file mode 100644 index 000000000..32180fa02 --- /dev/null +++ b/tests/fixtures/cat-x-cat-x-cat-wgtd.json @@ -0,0 +1,1038 @@ +{ + "query": { + "dimensions": [ + { + "variable": "/api/datasets/123/variables/000004/" + }, + { + "variable": "/api/datasets/123/variables/000000/" + }, + { + "variable": "/api/datasets/123/variables/000001/" + } + ], + "measures": { + "count": { + "args": [], + "function": "cube_count" + } + }, + "weight": "w" + }, + "result": { + "counts": [ + 3, + 3, + 11, + 15, + 13, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 10, + 19, + 19, + 14, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "dimensions": [ + { + "derived": false, + "references": { + "alias": "Gender", + "description": "Are you male or female?", + "name": "Gender" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Male", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Female", + "numeric_value": 2 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + }, + { + "derived": false, + "references": { + "alias": "ShutdownBlame", + "description": "If President Obama and the Republicans in Congress do not reach a budget agreement in time to avoid a shutdown of the federal government, who do you think will more to blame--President Obama or the Republican Congress?", + "name": "ShutdownBlame", + "view": { + "column_width": null, + "include_missing": false, + "show_counts": false, + "show_numeric_values": false + } + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "President Obama", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Republicans in Congress", + "numeric_value": 2 + }, + { + "id": 3, + "missing": false, + "name": "Both", + "numeric_value": 3 + }, + { + "id": 4, + "missing": false, + "name": "Neither", + "numeric_value": 4 + }, + { + "id": 5, + "missing": true, + "name": "Not sure", + "numeric_value": 5 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + }, + { + "derived": false, + "references": { + "alias": "RespondentIdeology", + "description": "In general, how would you describe your own political viewpoint?", + "is_subvar": true, + "name": "RespondentIdeology" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Very liberal", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Liberal", + "numeric_value": 2 + }, + { + "id": 3, + "missing": false, + "name": "Moderate", + "numeric_value": 3 + }, + { + "id": 4, + "missing": false, + "name": "Conservative", + "numeric_value": 4 + }, + { + "id": 5, + "missing": false, + "name": "Very Conservative", + "numeric_value": 5 + }, + { + "id": 6, + "missing": false, + "name": "Not sure", + "numeric_value": 6 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + }, + { + "id": 100, + "missing": false, + "name": "Very conservative", + "numeric_value": 5 + } + ], + "class": "categorical", + "ordinal": false + } + } + ], + "element": "crunch:cube", + "measures": { + "count": { + "data": [ + 1, + 4, + 41, + 65, + 43, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 2, + 10, + 39, + 49, + 24, + 5, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": true, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 0 + } + }, + "missing": 0, + "n": 285 + }, + "weight_var": "the_weight" +} diff --git a/tests/fixtures/scale_means/__init__.py b/tests/fixtures/scale_means/__init__.py deleted file mode 100644 index f0ffedf3d..000000000 --- a/tests/fixtures/scale_means/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -import os -from functools import partial - -from cr.cube.util import load_fixture - -CUBES_DIR = os.path.dirname(os.path.abspath(__file__)) - - -def _load(cube_file): - load = partial(load_fixture, CUBES_DIR) - return load(cube_file) - - -CA_CAT_X_ITEMS = _load('ca-cat-x-items.json') -CA_ITEMS_X_CAT = _load('ca-items-x-cat.json') -CA_X_MR = _load('ca-x-mr.json') -CAT_X_CA_CAT_X_ITEMS = _load('cat-x-ca-cat-x-items.json') -CAT_X_CAT = _load('cat-x-cat.json') -CAT_X_MR = _load('cat-x-mr.json') -MR_X_CAT = _load('mr-x-cat.json') -UNIVARIATE_CAT = _load('univariate-cat.json') -CAT_X_CAT_SM_MARGIN = _load('cat-x-cat-sm-margin.json') diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 978d38bd4..3d4a16664 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -2,8 +2,12 @@ import numpy as np import pytest -from cr.cube.crunch_cube import CrunchCube +from cr.cube.crunch_cube import ( + CrunchCube, _MeanMeasure, _Measures, _UnweightedCountMeasure, + _WeightedCountMeasure +) from cr.cube.enum import DIMENSION_TYPE as DT +from cr.cube.measures.index import Index from cr.cube.util import compress_pruned from ..fixtures import CR # ---mnemonic: CR = 'cube-response'--- @@ -76,11 +80,188 @@ def dimensions_fixture(self, request): return cube_response, expected_dimension_types +class DescribeIntegrated_Measures(object): + + def it_knows_when_its_measures_are_weighted(self, is_weighted_fixture): + cube_dict, expected_value = is_weighted_fixture + measures = _Measures(cube_dict, None) + + is_weighted = measures.is_weighted + + assert is_weighted == expected_value + + def it_provides_access_to_the_mean_measure(self): + cube_dict = CR.CAT_X_CAT_MEAN_WGTD + measures = _Measures(cube_dict, None) + + means = measures.means + + assert type(means).__name__ == '_MeanMeasure' + + def but_only_when_the_cube_response_contains_means(self): + cube_dict = CR.CAT_X_CAT + measures = _Measures(cube_dict, None) + + means = measures.means + + assert means is None + + def it_provides_the_means_missing_count_when_means_are_available(self): + measures = _Measures(CR.CAT_X_CAT_MEAN_WGTD, None) + missing_count = measures.missing_count + assert missing_count == 3 + + def but_provides_the_general_missing_count_otherwise(self): + measures = _Measures(CR.CAT_X_CAT, None) + missing_count = measures.missing_count + assert missing_count == 5 + + def it_knows_the_population_fraction(self, pop_frac_fixture): + cube_dict, expected_value = pop_frac_fixture + measures = _Measures(cube_dict, None) + + population_fraction = measures.population_fraction + + assert population_fraction == expected_value + + def it_provides_access_to_the_unweighted_count_measure(self): + measures = _Measures(None, None) + + unweighted_counts = measures.unweighted_counts + + assert type(unweighted_counts).__name__ == '_UnweightedCountMeasure' + + def it_knows_the_unweighted_n(self): + measures = _Measures(CR.CAT_X_CAT, None) + unweighted_n = measures.unweighted_n + assert unweighted_n == 20 + + def it_provides_access_to_the_weighted_count_measure( + self, wgtd_counts_fixture): + cube_dict, expected_type_name = wgtd_counts_fixture + measures = _Measures(cube_dict, None) + + weighted_counts = measures.weighted_counts + + assert type(weighted_counts).__name__ == expected_type_name + + def it_knows_the_weighted_n(self, wgtd_n_fixture): + cube_dict, expected_value = wgtd_n_fixture + measures = _Measures(cube_dict, None) + + weighted_n = measures.weighted_n + + assert round(weighted_n, 3) == expected_value + + # fixtures ------------------------------------------------------- + + @pytest.fixture(params=[ + # ---has {'query': {'weight': url}}--- + (CR.ADMIT_X_GENDER_WEIGHTED, True), + # ---has {'weight_var': weight_name_str}--- + (CR.CAT_X_CAT_X_CAT_WGTD, True), + # ---unweighted_counts == measure_count_data--- + (CR.ADMIT_X_DEPT_UNWEIGHTED, False), + ]) + def is_weighted_fixture(self, request): + cube_response, expected_value = request.param + cube_dict = cube_response.get('value', cube_response) + return cube_dict, expected_value + + @pytest.fixture(params=[ + # ---filtered case--- + (CR.CAT_X_CAT_FILT, 0.254), + # ---unfiltered case--- + (CR.CAT_X_CAT, 1.0), + ]) + def pop_frac_fixture(self, request): + cube_dict, expected_value = request.param + return cube_dict, expected_value + + @pytest.fixture(params=[ + # ---weighted case--- + (CR.CAT_X_CAT_WGTD, '_WeightedCountMeasure'), + # ---unweighted case--- + (CR.CAT_X_CAT, '_UnweightedCountMeasure'), + ]) + def wgtd_counts_fixture(self, request): + cube_dict, expected_type_name = request.param + return cube_dict, expected_type_name + + @pytest.fixture(params=[ + # ---weighted case--- + (CR.CAT_X_CAT_WGTD, 999.557), + # ---unweighted case--- + (CR.CAT_X_CAT, 20.0), + ]) + def wgtd_n_fixture(self, request): + cube_dict, expected_type = request.param + return cube_dict, expected_type + + +class DescribeIntegrated_MeanMeasure(object): + + def it_provides_access_to_its_raw_cube_array(self): + cube_dict = CR.CAT_X_CAT_MEAN_WGTD + cube = CrunchCube(cube_dict) + measure = _MeanMeasure(cube_dict, cube._all_dimensions) + + raw_cube_array = measure.raw_cube_array + + np.testing.assert_array_almost_equal( + raw_cube_array, + [[52.78205128, 49.90697674, np.nan, np.nan, np.nan], + [50.43654822, 48.20100503, np.nan, np.nan, np.nan], + [51.56435644, 47.60283688, np.nan, np.nan, np.nan], + [58.0, 29.0, np.nan, np.nan, np.nan], + [37.53846154, 39.45238095, np.nan, np.nan, np.nan], + [36.66666667, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan]] + ) + + +class DescribeIntegrated_UnweightedCountMeasure(object): + + def it_provides_access_to_its_raw_cube_array(self): + cube_dict = CR.CAT_X_CAT + cube = CrunchCube(cube_dict) + measure = _UnweightedCountMeasure(cube_dict, cube._all_dimensions) + + raw_cube_array = measure.raw_cube_array + + np.testing.assert_array_almost_equal( + raw_cube_array, + [[5, 3, 2, 0], + [5, 2, 3, 0], + [0, 0, 0, 0]] + ) + + +class DescribeIntegrated_WeightedCountMeasure(object): + + def it_provides_access_to_its_raw_cube_array(self): + cube_dict = CR.CAT_X_CAT_WGTD + cube = CrunchCube(cube_dict) + measure = _WeightedCountMeasure(cube_dict, cube._all_dimensions) + + raw_cube_array = measure.raw_cube_array + + np.testing.assert_array_almost_equal( + raw_cube_array, + [[32.9, 87.6, 176.2, 117.5, 72.1, 13.4, 0.0, 0.0, 0.0], + [38.8, 94.1, 199.0128, 102.9, 38.8305, 26.2135, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] + ) + + class TestCrunchCube(TestCase): def test_crunch_cube_loads_data(self): cube = CrunchCube(CR.CAT_X_CAT) - cube_dict = cube._cube + cube_dict = cube._cube_dict self.assertEqual(cube_dict, CR.CAT_X_CAT) def test_as_array_univariate_cat_exclude_missing(self): @@ -89,36 +270,18 @@ def test_as_array_univariate_cat_exclude_missing(self): actual = cube.as_array() np.testing.assert_array_equal(actual, expected) - def test_as_array_univariate_cat_exclude_missing_adjusted(self): - cube = CrunchCube(CR.UNIVARIATE_CATEGORICAL) - expected = np.array([11, 6]) - actual = cube.as_array(adjusted=True) - np.testing.assert_array_equal(actual, expected) - def test_as_array_numeric(self): cube = CrunchCube(CR.VOTER_REGISTRATION) expected = np.array([885, 105, 10]) actual = cube.as_array() np.testing.assert_array_equal(actual, expected) - def test_as_array_numeric_adjusted(self): - cube = CrunchCube(CR.VOTER_REGISTRATION) - expected = np.array([886, 106, 11]) - actual = cube.as_array(adjusted=True) - np.testing.assert_array_equal(actual, expected) - def test_as_array_datetime(self): cube = CrunchCube(CR.SIMPLE_DATETIME) expected = np.array([1, 1, 1, 1]) actual = cube.as_array() np.testing.assert_array_equal(actual, expected) - def test_as_array_datetime_adjusted(self): - cube = CrunchCube(CR.SIMPLE_DATETIME) - expected = np.array([2, 2, 2, 2]) - actual = cube.as_array(adjusted=True) - np.testing.assert_array_equal(actual, expected) - def test_as_array_text(self): cube = CrunchCube(CR.SIMPLE_TEXT) expected = np.array([1, 1, 1, 1, 1, 1]) @@ -134,15 +297,6 @@ def test_as_array_cat_x_cat_exclude_missing(self): actual = cube.as_array() np.testing.assert_array_equal(actual, expected) - def test_as_array_cat_x_cat_exclude_missing_adjusted(self): - cube = CrunchCube(CR.CAT_X_CAT) - expected = np.array([ - [6, 3], - [6, 4], - ]) - actual = cube.as_array(adjusted=True) - np.testing.assert_array_equal(actual, expected) - def test_as_array_cat_x_cat_unweighted(self): cube = CrunchCube(CR.CAT_X_CAT) expected = np.array([ @@ -1024,7 +1178,7 @@ def test_econ_x_ideology_index_by_col(self): 1.32339565, ], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) def test_econ_x_ideology_index_by_row(self): @@ -1047,7 +1201,7 @@ def test_econ_x_ideology_index_by_row(self): 1.32339565, ], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) def test_fruit_x_pets_proportions_by_cell(self): @@ -1347,7 +1501,6 @@ def test_cat_x_cat_props_by_cell_prune_cols(self): for i, actual in enumerate(pruned): np.testing.assert_array_equal(pruned[i], pruned_expected[i]) - @pytest.mark.filterwarnings('ignore:DeprecationWarning') def test_cat_x_cat_index_by_col_prune_cols(self): cube = CrunchCube(CR.CAT_X_CAT_WITH_EMPTY_COLS) expected = np.array([ @@ -1358,7 +1511,7 @@ def test_cat_x_cat_index_by_col_prune_cols(self): [0., 1.16666667, np.nan, 1.16666667], [0., 1.75, np.nan, 0.] ]) - actual = cube.index(prune=False) + actual = Index.data(cube, weighted=True, prune=False) # Assert index without pruning np.testing.assert_almost_equal(actual, expected) @@ -1369,7 +1522,7 @@ def test_cat_x_cat_index_by_col_prune_cols(self): [0., 1.16666667, 1.16666667], [0., 1.75, 0.] ]) - table = cube.index(prune=True) + table = Index.data(cube, weighted=True, prune=True) # Assert index witih pruning actual = table[:, ~table.mask.all(axis=0)][~table.mask.all(axis=1), :] np.testing.assert_almost_equal(actual, expected) diff --git a/tests/integration/test_headers_and_subtotals.py b/tests/integration/test_headers_and_subtotals.py index 5bbfce4f4..7b03e24eb 100644 --- a/tests/integration/test_headers_and_subtotals.py +++ b/tests/integration/test_headers_and_subtotals.py @@ -400,18 +400,6 @@ def test_ca_margin_with_hs(self): actual = cube.margin(include_transforms_for_dims=[0, 1], axis=1) np.testing.assert_almost_equal(actual, expected) - def test_count_unweighted(self): - cube = CrunchCube(CR.ADMIT_X_GENDER_WEIGHTED) - expected = 4526 - actual = cube.count(weighted=False) - self.assertEqual(actual, expected) - - def test_count_weighted(self): - cube = CrunchCube(CR.ADMIT_X_GENDER_WEIGHTED) - expected = 4451.955438803242 - actual = cube.count(weighted=True) - self.assertEqual(actual, expected) - def test_hs_with_anchor_on_zero_position_labels(self): cube = CrunchCube(CR.ECON_US_PROBLEM_X_BIGGER_PROBLEM) expected = [ diff --git a/tests/integration/test_index.py b/tests/integration/test_index.py index 55e84805d..178338909 100644 --- a/tests/integration/test_index.py +++ b/tests/integration/test_index.py @@ -5,6 +5,7 @@ from unittest import TestCase from cr.cube.crunch_cube import CrunchCube +from cr.cube.measures.index import Index from ..fixtures import CR @@ -21,7 +22,7 @@ def test_mr_x_cat_index(self): [1.14466510106092, 0.8606566846476], [0.99292572005336, 1.0068293374540] ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) def test_cat_x_mr_index(self): @@ -30,7 +31,7 @@ def test_cat_x_mr_index(self): [0.8571429, 1.1152941, 0.9610984], [1.0769231, 0.9466231, 1.019037], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) def test_mr_x_mr_index(self): @@ -40,7 +41,7 @@ def test_mr_x_mr_index(self): [0.8529411764705883, 1.0000000000000000, 0.763157894736842], [1.1176470588235294, 1.310344827586207, 1.0000000000000000] ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) def test_ca_items_x_ca_cat_x_cat_index(self): @@ -63,7 +64,7 @@ def test_ca_items_x_ca_cat_x_cat_index(self): [0., 2., np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan]], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) # Test pruning mask @@ -83,5 +84,5 @@ def test_ca_items_x_ca_cat_x_cat_index(self): [False, False, True, True, True], [True, True, True, True, True]], ]) - actual = cube.index(prune=True).mask + actual = Index.data(cube, weighted=True, prune=True).mask np.testing.assert_array_equal(actual, expected) diff --git a/tests/integration/test_multiple_response.py b/tests/integration/test_multiple_response.py index d256c959f..ee43daaad 100644 --- a/tests/integration/test_multiple_response.py +++ b/tests/integration/test_multiple_response.py @@ -3,6 +3,7 @@ import pytest from cr.cube.crunch_cube import CrunchCube +from cr.cube.measures.index import Index from ..fixtures import CR @@ -280,7 +281,7 @@ def test_cat_x_mr_index_by_row(): [.8571429, 1.1152941, .9610984], [1.0769231, .9466231, 1.019037], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) @@ -290,7 +291,7 @@ def test_cat_x_mr_index_by_cell(): [.8571429, 1.1152941, .9610984], [1.0769231, .9466231, 1.019037], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) @@ -300,7 +301,7 @@ def test_cat_x_mr_index_by_col(): [.8571429, 1.1152941, .9610984], [1.0769231, .9466231, 1.019037], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) @@ -308,7 +309,7 @@ def test_cat_x_mr_index_by_col(): def test_cat_x_mr_index_bad_direction(): cube = CrunchCube(CR.CAT_X_MR) with pytest.raises(ValueError): - cube.index() + Index.data(cube, weighted=True, prune=False) def test_mr_x_single_wave(): diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index cdf6a1385..e6487e34f 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -7,28 +7,40 @@ from unittest import TestCase -from cr.cube.crunch_cube import CrunchCube +from cr.cube.crunch_cube import ( + _BaseMeasure, CrunchCube, _MeanMeasure, _Measures, + _UnweightedCountMeasure, _WeightedCountMeasure +) from cr.cube.dimension import AllDimensions, _ApparentDimensions, Dimension from cr.cube.enum import DIMENSION_TYPE as DT from ..unitutil import ( - class_mock, instance_mock, method_mock, Mock, patch, property_mock + class_mock, function_mock, instance_mock, method_mock, Mock, patch, + property_mock ) class DescribeCrunchCube(object): def it_provides_a_default_repr(self): - cube = CrunchCube({}) + cube = CrunchCube(None) repr_ = repr(cube) assert repr_.startswith('