From 6722cdae132c8d7886325feea3fa27144bdddcbb Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Fri, 2 Nov 2018 16:48:52 -0700 Subject: [PATCH 01/30] rfctr: rename CrunchCube._cube to _cube_dict --- src/cr/cube/crunch_cube.py | 51 ++++++++++++++++----------- tests/integration/test_crunch_cube.py | 2 +- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index e8b9e2d51..dbb321e6b 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -64,7 +64,7 @@ def __init__(self, response): try: if not isinstance(response, dict): response = json.loads(response) - self._cube = response.get('value', response) + self._cube_dict = response.get('value', response) except TypeError: # If an unexpected type is provided raise descriptive exception. if not isinstance(response, dict): @@ -145,14 +145,16 @@ def count(self, weighted=True): """Get cube's count with automatic weighted/unweighted selection.""" if weighted and self.is_weighted: return sum( - self._cube['result']['measures'].get('count', {}).get('data') + self._cube_dict['result']['measures'] + .get('count', {}) + .get('data') ) - return self._cube['result']['n'] + return self._cube_dict['result']['n'] @lazyproperty def counts(self): - unfiltered = self._cube['result'].get('unfiltered') - filtered = self._cube['result'].get('filtered') + unfiltered = self._cube_dict['result'].get('unfiltered') + filtered = self._cube_dict['result'].get('filtered') return unfiltered, filtered @lazyproperty @@ -183,7 +185,7 @@ def dimensions(self): @lazyproperty def filter_annotation(self): """Get cube's filter annotation.""" - return self._cube.get('filter_names', []) + return self._cube_dict.get('filter_names', []) def get_slices(self, ca_as_0th=False): """Return list of :class:`.CubeSlice` objects. @@ -204,7 +206,7 @@ def get_slices(self, ca_as_0th=False): @lazyproperty def has_means(self): """True if cube contains means data.""" - measures = self._cube.get('result', {}).get('measures') + measures = self._cube_dict.get('result', {}).get('measures') if not measures: return False return measures.get('mean', None) is not None @@ -268,15 +270,22 @@ def is_univariate_ca(self): @lazyproperty def is_weighted(self): - """Check if the cube dataset is weighted.""" - weighted = self._cube.get('query', {}).get('weight', None) is not None - weighted = weighted or self._cube.get('weight_var', None) is not None - weighted = weighted or self._cube.get('weight_url', None) is not None - weighted = weighted or ( - self._cube['result']['counts'] != - self._cube['result']['measures'].get('count', {}).get('data') + """True if cube response contains weighted data.""" + if self._cube_dict.get('query', {}).get('weight') is not None: + return True + if self._cube_dict.get('weight_var') is not None: + return True + if self._cube_dict.get('weight_url') is not None: + return True + unweighted_counts = self._cube_dict['result']['counts'] + weighted_counts = ( + self._cube_dict['result']['measures'] + .get('count', {}) + .get('data') ) - return weighted + if unweighted_counts != weighted_counts: + return True + return False def labels(self, include_missing=False, include_transforms_for_dims=False): """Gets labels for each cube's dimension. @@ -419,8 +428,8 @@ def hs_dims_for_den(hs_dims, axis): def missing(self): """Get missing count of a cube.""" if self.has_means: - return self._cube['result']['measures']['mean']['n_missing'] - return self._cube['result'].get('missing') + return self._cube_dict['result']['measures']['mean']['n_missing'] + return self._cube_dict['result'].get('missing') @lazyproperty def mr_dim_ind(self): @@ -847,7 +856,7 @@ def _all_dimensions(self): dimension-pair is suppressed). """ return AllDimensions( - dimension_dicts=self._cube['result']['dimensions'] + dimension_dicts=self._cube_dict['result']['dimensions'] ) def _apply_missings_and_insertions(self, res, include_transforms_for_dims, @@ -1076,12 +1085,12 @@ def _flat_values(self, weighted, margin=False): counts are returned even if mean values are present, which may be preferred for example when calculating a margin. """ - values = self._cube['result']['counts'] + values = self._cube_dict['result']['counts'] if self.has_means and not margin: - mean = self._cube['result']['measures'].get('mean', {}) + mean = self._cube_dict['result']['measures'].get('mean', {}) values = mean.get('data', values) elif weighted and self.is_weighted: - count = self._cube['result']['measures'].get('count', {}) + count = self._cube_dict['result']['measures'].get('count', {}) values = count.get('data', values) values = [(val if not type(val) is dict else np.nan) for val in values] diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 978d38bd4..3b76c5b11 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -80,7 +80,7 @@ class TestCrunchCube(TestCase): def test_crunch_cube_loads_data(self): cube = CrunchCube(CR.CAT_X_CAT) - cube_dict = cube._cube + cube_dict = cube._cube_dict self.assertEqual(cube_dict, CR.CAT_X_CAT) def test_as_array_univariate_cat_exclude_missing(self): From 130a01d4e81229f830461f5ffaa5606e50cdfb7c Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Fri, 2 Nov 2018 17:48:07 -0700 Subject: [PATCH 02/30] rfctr: remove real-work from initializer * Move work of resolving type of cube-response and parsing JSON to a helper method (._cube_dict). * Change all instances in unit tests of `CrunchCube({})` to `CrunchCube(None)`. This is a mild case, but shows the main reason to avoid real work in the constructor, because all unit tests have to satisfy the arguments required by the real-work. --- src/cr/cube/crunch_cube.py | 30 +++++++++++------- tests/unit/test_crunch_cube.py | 58 +++++++++++++++++----------------- 2 files changed, 48 insertions(+), 40 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index dbb321e6b..dffe63372 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -61,17 +61,7 @@ def __init__(self, response): So we need to check its type, and convert it to a dictionary if it's JSON, if possible. """ - try: - if not isinstance(response, dict): - response = json.loads(response) - self._cube_dict = response.get('value', response) - except TypeError: - # If an unexpected type is provided raise descriptive exception. - if not isinstance(response, dict): - raise TypeError(( - 'Unsupported type provided: {}. ' - 'A `cube` must be JSON or `dict`.' - ).format(type(response))) + self._cube_response_arg = response def __repr__(self): """Provide text representation suitable for working at console. @@ -1003,6 +993,24 @@ def _create_mask(res, row_prune_inds, col_prune_inds): ) return np.logical_or(mask_rows, mask_cols) + @lazyproperty + def _cube_dict(self): + """dict containing raw cube response, parsed from JSON payload.""" + try: + cube_response = self._cube_response_arg + # ---parse JSON to a dict when constructed with JSON--- + cube_dict = ( + cube_response if isinstance(cube_response, dict) + else json.loads(cube_response) + ) + # ---cube is 'value' item in a shoji response--- + return cube_dict.get('value', cube_dict) + except TypeError: + raise TypeError( + 'Unsupported type <%s> provided. Cube response must be JSON ' + '(str) or dict.' % type(self._cube_response_arg).__name__ + ) + def _data(self, weighted, margin=False): """Get the data in non-flattened shape. diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index cdf6a1385..6f651c670 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -19,7 +19,7 @@ class DescribeCrunchCube(object): def it_provides_a_default_repr(self): - cube = CrunchCube({}) + cube = CrunchCube(None) repr_ = repr(cube) assert repr_.startswith(' Date: Fri, 2 Nov 2018 20:50:54 -0700 Subject: [PATCH 03/30] test: remove deprecated cube.index() calls Get rid of warning-spew that occurred on test runs from calling deprecated CrunchCube.index() in tests. --- tests/integration/test_crunch_cube.py | 10 +++++----- tests/integration/test_index.py | 11 ++++++----- tests/integration/test_multiple_response.py | 9 +++++---- tests/unit/test_crunch_cube.py | 19 ++++++++++++++++++- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 3b76c5b11..221daa3ff 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -4,6 +4,7 @@ from cr.cube.crunch_cube import CrunchCube from cr.cube.enum import DIMENSION_TYPE as DT +from cr.cube.measures.index import Index from cr.cube.util import compress_pruned from ..fixtures import CR # ---mnemonic: CR = 'cube-response'--- @@ -1024,7 +1025,7 @@ def test_econ_x_ideology_index_by_col(self): 1.32339565, ], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) def test_econ_x_ideology_index_by_row(self): @@ -1047,7 +1048,7 @@ def test_econ_x_ideology_index_by_row(self): 1.32339565, ], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) def test_fruit_x_pets_proportions_by_cell(self): @@ -1347,7 +1348,6 @@ def test_cat_x_cat_props_by_cell_prune_cols(self): for i, actual in enumerate(pruned): np.testing.assert_array_equal(pruned[i], pruned_expected[i]) - @pytest.mark.filterwarnings('ignore:DeprecationWarning') def test_cat_x_cat_index_by_col_prune_cols(self): cube = CrunchCube(CR.CAT_X_CAT_WITH_EMPTY_COLS) expected = np.array([ @@ -1358,7 +1358,7 @@ def test_cat_x_cat_index_by_col_prune_cols(self): [0., 1.16666667, np.nan, 1.16666667], [0., 1.75, np.nan, 0.] ]) - actual = cube.index(prune=False) + actual = Index.data(cube, weighted=True, prune=False) # Assert index without pruning np.testing.assert_almost_equal(actual, expected) @@ -1369,7 +1369,7 @@ def test_cat_x_cat_index_by_col_prune_cols(self): [0., 1.16666667, 1.16666667], [0., 1.75, 0.] ]) - table = cube.index(prune=True) + table = Index.data(cube, weighted=True, prune=True) # Assert index witih pruning actual = table[:, ~table.mask.all(axis=0)][~table.mask.all(axis=1), :] np.testing.assert_almost_equal(actual, expected) diff --git a/tests/integration/test_index.py b/tests/integration/test_index.py index 55e84805d..178338909 100644 --- a/tests/integration/test_index.py +++ b/tests/integration/test_index.py @@ -5,6 +5,7 @@ from unittest import TestCase from cr.cube.crunch_cube import CrunchCube +from cr.cube.measures.index import Index from ..fixtures import CR @@ -21,7 +22,7 @@ def test_mr_x_cat_index(self): [1.14466510106092, 0.8606566846476], [0.99292572005336, 1.0068293374540] ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) def test_cat_x_mr_index(self): @@ -30,7 +31,7 @@ def test_cat_x_mr_index(self): [0.8571429, 1.1152941, 0.9610984], [1.0769231, 0.9466231, 1.019037], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) def test_mr_x_mr_index(self): @@ -40,7 +41,7 @@ def test_mr_x_mr_index(self): [0.8529411764705883, 1.0000000000000000, 0.763157894736842], [1.1176470588235294, 1.310344827586207, 1.0000000000000000] ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) def test_ca_items_x_ca_cat_x_cat_index(self): @@ -63,7 +64,7 @@ def test_ca_items_x_ca_cat_x_cat_index(self): [0., 2., np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan]], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) # Test pruning mask @@ -83,5 +84,5 @@ def test_ca_items_x_ca_cat_x_cat_index(self): [False, False, True, True, True], [True, True, True, True, True]], ]) - actual = cube.index(prune=True).mask + actual = Index.data(cube, weighted=True, prune=True).mask np.testing.assert_array_equal(actual, expected) diff --git a/tests/integration/test_multiple_response.py b/tests/integration/test_multiple_response.py index d256c959f..ee43daaad 100644 --- a/tests/integration/test_multiple_response.py +++ b/tests/integration/test_multiple_response.py @@ -3,6 +3,7 @@ import pytest from cr.cube.crunch_cube import CrunchCube +from cr.cube.measures.index import Index from ..fixtures import CR @@ -280,7 +281,7 @@ def test_cat_x_mr_index_by_row(): [.8571429, 1.1152941, .9610984], [1.0769231, .9466231, 1.019037], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) @@ -290,7 +291,7 @@ def test_cat_x_mr_index_by_cell(): [.8571429, 1.1152941, .9610984], [1.0769231, .9466231, 1.019037], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) @@ -300,7 +301,7 @@ def test_cat_x_mr_index_by_col(): [.8571429, 1.1152941, .9610984], [1.0769231, .9466231, 1.019037], ]) - actual = cube.index() + actual = Index.data(cube, weighted=True, prune=False) np.testing.assert_almost_equal(actual, expected) @@ -308,7 +309,7 @@ def test_cat_x_mr_index_by_col(): def test_cat_x_mr_index_bad_direction(): cube = CrunchCube(CR.CAT_X_MR) with pytest.raises(ValueError): - cube.index() + Index.data(cube, weighted=True, prune=False) def test_mr_x_single_wave(): diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 6f651c670..657a2a79e 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -12,7 +12,8 @@ from cr.cube.enum import DIMENSION_TYPE as DT from ..unitutil import ( - class_mock, instance_mock, method_mock, Mock, patch, property_mock + class_mock, function_mock, instance_mock, method_mock, Mock, patch, + property_mock ) @@ -65,6 +66,22 @@ def it_knows_when_it_has_an_mr_dimension( assert has_mr is expected_value + def it_has_a_deprecated_index_method_that_forwards_to_Index_data( + self, request): + Index_ = class_mock(request, 'cr.cube.crunch_cube.Index') + index_ = Index_.data.return_value + warn_ = function_mock(request, 'cr.cube.crunch_cube.warnings.warn') + cube = CrunchCube(None) + + index = cube.index() + + warn_.assert_called_once_with( + "CrunchCube.index() is deprecated. Use CubeSlice.index_table().", + DeprecationWarning + ) + Index_.data.assert_called_once_with(cube, True, False) + assert index is index_ + def it_can_adjust_an_axis_to_help( self, request, adjust_fixture, dimensions_prop_): dimension_types, axis_cases = adjust_fixture From b2dce84899eae6b93490c6719ed099f791c45df9 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Sun, 4 Nov 2018 20:21:16 -0800 Subject: [PATCH 04/30] rfctr: remove 'adjusted' arg from ._as_array() There are no callers using the optional `adjusted` argument. Having such an argument conflates two separate operations. If adjustment is required, it can easily be performed by the caller on the end-result of as_array(). --- src/cr/cube/crunch_cube.py | 15 ++++----------- tests/integration/test_crunch_cube.py | 27 --------------------------- 2 files changed, 4 insertions(+), 38 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index dffe63372..755b5b93b 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -85,7 +85,7 @@ def __repr__(self): except Exception: return super(CrunchCube, self).__repr__() - def as_array(self, include_missing=False, weighted=True, adjusted=False, + def as_array(self, include_missing=False, weighted=True, include_transforms_for_dims=None, prune=False, margin=False): """Return `ndarray` representing cube values. @@ -118,7 +118,6 @@ def as_array(self, include_missing=False, weighted=True, adjusted=False, array = self._as_array( include_missing=include_missing, weighted=weighted, - adjusted=adjusted, include_transforms_for_dims=include_transforms_for_dims, margin=margin ) @@ -899,8 +898,8 @@ def _apply_missings_and_insertions(self, res, include_transforms_for_dims, return res[np.ix_(*new_valids)] if new_valids else res def _as_array(self, include_missing=False, get_non_selected=False, - weighted=True, adjusted=False, - include_transforms_for_dims=False, margin=False): + weighted=True, include_transforms_for_dims=False, + margin=False): """Get crunch cube as ndarray. Args @@ -917,18 +916,12 @@ def _as_array(self, include_missing=False, get_non_selected=False, Returns res (ndarray): Tabular representation of crunch cube """ - res = self._apply_missings_and_insertions( + return self._apply_missings_and_insertions( self._raw_cube_array(weighted, margin), include_transforms_for_dims, include_missing=include_missing ) - # ---prepare resulting array for sig-testing if requested--- - if adjusted: - res += 1 - - return res - @classmethod def _calculate_constraints_sum(cls, prop_table, prop_margin, axis): """Calculate sum of constraints (part of the standard error equation). diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 221daa3ff..b86390dbb 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -90,36 +90,18 @@ def test_as_array_univariate_cat_exclude_missing(self): actual = cube.as_array() np.testing.assert_array_equal(actual, expected) - def test_as_array_univariate_cat_exclude_missing_adjusted(self): - cube = CrunchCube(CR.UNIVARIATE_CATEGORICAL) - expected = np.array([11, 6]) - actual = cube.as_array(adjusted=True) - np.testing.assert_array_equal(actual, expected) - def test_as_array_numeric(self): cube = CrunchCube(CR.VOTER_REGISTRATION) expected = np.array([885, 105, 10]) actual = cube.as_array() np.testing.assert_array_equal(actual, expected) - def test_as_array_numeric_adjusted(self): - cube = CrunchCube(CR.VOTER_REGISTRATION) - expected = np.array([886, 106, 11]) - actual = cube.as_array(adjusted=True) - np.testing.assert_array_equal(actual, expected) - def test_as_array_datetime(self): cube = CrunchCube(CR.SIMPLE_DATETIME) expected = np.array([1, 1, 1, 1]) actual = cube.as_array() np.testing.assert_array_equal(actual, expected) - def test_as_array_datetime_adjusted(self): - cube = CrunchCube(CR.SIMPLE_DATETIME) - expected = np.array([2, 2, 2, 2]) - actual = cube.as_array(adjusted=True) - np.testing.assert_array_equal(actual, expected) - def test_as_array_text(self): cube = CrunchCube(CR.SIMPLE_TEXT) expected = np.array([1, 1, 1, 1, 1, 1]) @@ -135,15 +117,6 @@ def test_as_array_cat_x_cat_exclude_missing(self): actual = cube.as_array() np.testing.assert_array_equal(actual, expected) - def test_as_array_cat_x_cat_exclude_missing_adjusted(self): - cube = CrunchCube(CR.CAT_X_CAT) - expected = np.array([ - [6, 3], - [6, 4], - ]) - actual = cube.as_array(adjusted=True) - np.testing.assert_array_equal(actual, expected) - def test_as_array_cat_x_cat_unweighted(self): cube = CrunchCube(CR.CAT_X_CAT) expected = np.array([ From 648eced6e2e94a613717a4fe16d2493e24081e7c Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 13:45:48 -0800 Subject: [PATCH 05/30] cube: add xfails to drive _Measures TDD --- src/cr/cube/crunch_cube.py | 4 + tests/fixtures/cat-x-cat-mean-wgtd.json | 379 ++++++++ tests/fixtures/cat-x-cat-wgtd.json | 232 +++++ tests/fixtures/cat-x-cat-x-cat-wgtd.json | 1038 ++++++++++++++++++++++ tests/integration/test_crunch_cube.py | 131 ++- 5 files changed, 1783 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/cat-x-cat-mean-wgtd.json create mode 100644 tests/fixtures/cat-x-cat-wgtd.json create mode 100644 tests/fixtures/cat-x-cat-x-cat-wgtd.json diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 755b5b93b..b15076f38 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1419,3 +1419,7 @@ def _update_result(self, result, insertions, dimension_index, ) ) return result, valid_indices + + +class _Measures(object): + """Provides access to measures contained in cube response.""" diff --git a/tests/fixtures/cat-x-cat-mean-wgtd.json b/tests/fixtures/cat-x-cat-mean-wgtd.json new file mode 100644 index 000000000..ffbf0dd9d --- /dev/null +++ b/tests/fixtures/cat-x-cat-mean-wgtd.json @@ -0,0 +1,379 @@ +{ + "query": { + "dimensions": [ + { + "variable": "https://alpha.crunch.io/api/datasets/aefd8b1483754d639b46f5ee8f6d7821/variables/000000/" + }, + { + "variable": "https://alpha.crunch.io/api/datasets/aefd8b1483754d639b46f5ee8f6d7821/variables/000004/" + } + ], + "measures": { + "count": { + "args": [], + "function": "cube_count" + }, + "mean": { + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/aefd8b1483754d639b46f5ee8f6d7821/variables/00000c/" + } + ], + "function": "cube_mean" + } + }, + "weight": "https://alpha.crunch.io/api/datasets/aefd8b1483754d639b46f5ee8f6d7821/variables/0af6163c3e694b9b80071ed691fd4f28/" + }, + "query_environment": { + "filter": [] + }, + "result": { + "counts": [ + 156, + 129, + 0, + 0, + 0, + 197, + 199, + 0, + 0, + 0, + 101, + 141, + 0, + 0, + 0, + 2, + 4, + 0, + 0, + 0, + 26, + 42, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "dimensions": [ + { + "derived": false, + "references": { + "alias": "ShutdownBlame", + "description": "If President Obama and the Republicans in Congress do not reach a budget agreement in time to avoid a shutdown of the federal government, who do you think will more to blame--President Obama or the Republican Congress?", + "name": "ShutdownBlame", + "view": { + "column_width": null, + "include_missing": true, + "show_counts": false, + "show_numeric_values": false, + "transforms": { + "insertions": [ + { + "anchor": 5, + "args": [ + 5, + 4 + ], + "function": "subtotal", + "name": "test new" + } + ] + } + } + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "President Obama", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Republicans in Congress", + "numeric_value": 2 + }, + { + "id": 3, + "missing": false, + "name": "Both", + "numeric_value": 3 + }, + { + "id": 4, + "missing": false, + "name": "Neither", + "numeric_value": 4 + }, + { + "id": 5, + "missing": false, + "name": "Not sure", + "numeric_value": 5 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + }, + { + "derived": false, + "references": { + "alias": "Gender", + "description": "Are you male or female?", + "name": "Gender" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Male", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Female", + "numeric_value": 2 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + } + ], + "element": "crunch:cube", + "measures": { + "count": { + "data": [ + 160.82474226804146, + 125.24271844660174, + 0, + 0, + 0, + 203.0927835051553, + 193.20388349514528, + 0, + 0, + 0, + 104.12371134020616, + 136.89320388349492, + 0, + 0, + 0, + 2.061855670103093, + 3.883495145631068, + 0, + 0, + 0, + 26.804123711340207, + 40.776699029126206, + 0, + 0, + 0, + 3.0927835051546397, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 3 + }, + "mean": { + "data": [ + 52.78205128205122, + 49.9069767441861, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 50.43654822335009, + 48.20100502512572, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 51.5643564356436, + 47.602836879432715, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 58, + 29, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 37.53846153846155, + 39.45238095238095, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 36.666666666666664, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + } + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "NaN": -8, + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 3 + } + }, + "missing": 3, + "n": 1000 + } +} diff --git a/tests/fixtures/cat-x-cat-wgtd.json b/tests/fixtures/cat-x-cat-wgtd.json new file mode 100644 index 000000000..89781b25a --- /dev/null +++ b/tests/fixtures/cat-x-cat-wgtd.json @@ -0,0 +1,232 @@ +{ + "result": { + "counts": [ + 32, + 85, + 171, + 114, + 70, + 13, + 0, + 0, + 0, + 40, + 97, + 205, + 106, + 40, + 27, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "dimensions": [ + { + "derived": false, + "references": { + "alias": "Gender", + "description": "Are you male or female?", + "name": "Gender" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Male", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Female", + "numeric_value": 2 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + }, + { + "derived": false, + "references": { + "alias": "RespondentIdeology", + "description": "In general, how would you describe your own political viewpoint?", + "name": "RespondentIdeology" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Very liberal", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Liberal", + "numeric_value": 2 + }, + { + "id": 3, + "missing": false, + "name": "Moderate", + "numeric_value": 3 + }, + { + "id": 4, + "missing": false, + "name": "Conservative", + "numeric_value": 4 + }, + { + "id": 5, + "missing": false, + "name": "Very Conservative", + "numeric_value": 5 + }, + { + "id": 6, + "missing": false, + "name": "Not sure", + "numeric_value": 6 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + } + ], + "element": "crunch:cube", + "measures": { + "count": { + "data": [ + 32.9, + 87.6, + 176.2, + 117.5, + 72.1, + 13.4, + 0, + 0, + 0, + 38.8, + 94.1, + 199.0128, + 102.9, + 38.8305, + 26.2135, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 0 + } + }, + "missing": 0, + "n": 1000 + } +} diff --git a/tests/fixtures/cat-x-cat-x-cat-wgtd.json b/tests/fixtures/cat-x-cat-x-cat-wgtd.json new file mode 100644 index 000000000..32180fa02 --- /dev/null +++ b/tests/fixtures/cat-x-cat-x-cat-wgtd.json @@ -0,0 +1,1038 @@ +{ + "query": { + "dimensions": [ + { + "variable": "/api/datasets/123/variables/000004/" + }, + { + "variable": "/api/datasets/123/variables/000000/" + }, + { + "variable": "/api/datasets/123/variables/000001/" + } + ], + "measures": { + "count": { + "args": [], + "function": "cube_count" + } + }, + "weight": "w" + }, + "result": { + "counts": [ + 3, + 3, + 11, + 15, + 13, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 10, + 19, + 19, + 14, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "dimensions": [ + { + "derived": false, + "references": { + "alias": "Gender", + "description": "Are you male or female?", + "name": "Gender" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Male", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Female", + "numeric_value": 2 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + }, + { + "derived": false, + "references": { + "alias": "ShutdownBlame", + "description": "If President Obama and the Republicans in Congress do not reach a budget agreement in time to avoid a shutdown of the federal government, who do you think will more to blame--President Obama or the Republican Congress?", + "name": "ShutdownBlame", + "view": { + "column_width": null, + "include_missing": false, + "show_counts": false, + "show_numeric_values": false + } + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "President Obama", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Republicans in Congress", + "numeric_value": 2 + }, + { + "id": 3, + "missing": false, + "name": "Both", + "numeric_value": 3 + }, + { + "id": 4, + "missing": false, + "name": "Neither", + "numeric_value": 4 + }, + { + "id": 5, + "missing": true, + "name": "Not sure", + "numeric_value": 5 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + }, + { + "derived": false, + "references": { + "alias": "RespondentIdeology", + "description": "In general, how would you describe your own political viewpoint?", + "is_subvar": true, + "name": "RespondentIdeology" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Very liberal", + "numeric_value": 1 + }, + { + "id": 2, + "missing": false, + "name": "Liberal", + "numeric_value": 2 + }, + { + "id": 3, + "missing": false, + "name": "Moderate", + "numeric_value": 3 + }, + { + "id": 4, + "missing": false, + "name": "Conservative", + "numeric_value": 4 + }, + { + "id": 5, + "missing": false, + "name": "Very Conservative", + "numeric_value": 5 + }, + { + "id": 6, + "missing": false, + "name": "Not sure", + "numeric_value": 6 + }, + { + "id": 8, + "missing": true, + "name": "Skipped", + "numeric_value": 8 + }, + { + "id": 9, + "missing": true, + "name": "Not Asked", + "numeric_value": 9 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + }, + { + "id": 100, + "missing": false, + "name": "Very conservative", + "numeric_value": 5 + } + ], + "class": "categorical", + "ordinal": false + } + } + ], + "element": "crunch:cube", + "measures": { + "count": { + "data": [ + 1, + 4, + 41, + 65, + 43, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 2, + 10, + 39, + 49, + 24, + 5, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": true, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 0 + } + }, + "missing": 0, + "n": 285 + }, + "weight_var": "the_weight" +} diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index b86390dbb..f16dc4bbd 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -2,7 +2,7 @@ import numpy as np import pytest -from cr.cube.crunch_cube import CrunchCube +from cr.cube.crunch_cube import CrunchCube, _Measures from cr.cube.enum import DIMENSION_TYPE as DT from cr.cube.measures.index import Index from cr.cube.util import compress_pruned @@ -77,6 +77,135 @@ def dimensions_fixture(self, request): return cube_response, expected_dimension_types +class DescribeIntegrated_Measures(object): + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_knows_when_its_measures_are_weighted(self, is_weighted_fixture): + cube_dict, expected_value = is_weighted_fixture + measures = _Measures(cube_dict, None) + + is_weighted = measures.is_weighted + + assert is_weighted == expected_value + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_provides_access_to_the_mean_measure(self): + cube_dict = CR.CAT_X_CAT_MEAN_WGTD + measures = _Measures(cube_dict, None) + + means = measures.means + + assert type(means).__name__ == '_MeanMeasure' + + @pytest.mark.xfail(reason='WIP', strict=True) + def but_only_when_the_cube_response_contains_means(self): + cube_dict = CR.CAT_X_CAT + measures = _Measures(cube_dict, None) + + means = measures.means + + assert means is None + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_provides_the_means_missing_count_when_means_are_available(self): + measures = _Measures(CR.CAT_X_CAT_MEAN_WGTD, None) + missing_count = measures.missing_count + assert missing_count == 3 + + @pytest.mark.xfail(reason='WIP', strict=True) + def but_provides_the_general_missing_count_otherwise(self): + measures = _Measures(CR.CAT_X_CAT, None) + missing_count = measures.missing_count + assert missing_count == 5 + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_knows_the_population_fraction(self, pop_frac_fixture): + cube_dict, expected_value = pop_frac_fixture + measures = _Measures(cube_dict, None) + + population_fraction = measures.population_fraction + + assert population_fraction == expected_value + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_provides_access_to_the_unweighted_count_measure(self): + measures = _Measures(None, None) + + unweighted_counts = measures.unweighted_counts + + assert type(unweighted_counts).__name__ == '_UnweightedCountMeasure' + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_knows_the_unweighted_n(self): + measures = _Measures(CR.CAT_X_CAT, None) + unweighted_n = measures.unweighted_n + assert unweighted_n == 20 + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_provides_access_to_the_weighted_count_measure( + self, wgtd_counts_fixture): + cube_dict, expected_type_name = wgtd_counts_fixture + measures = _Measures(cube_dict, None) + + weighted_counts = measures.weighted_counts + + assert type(weighted_counts).__name__ == expected_type_name + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_knows_the_weighted_n(self, wgtd_n_fixture): + cube_dict, expected_value = wgtd_n_fixture + measures = _Measures(cube_dict, None) + + weighted_n = measures.weighted_n + + assert round(weighted_n, 3) == expected_value + + # fixtures ------------------------------------------------------- + + @pytest.fixture(params=[ + # ---has {'query': {'weight': url}}--- + (CR.ADMIT_X_GENDER_WEIGHTED, True), + # ---has {'weight_var': weight_name_str}--- + (CR.CAT_X_CAT_X_CAT_WGTD, True), + # ---unweighted_counts == measure_count_data--- + (CR.ADMIT_X_DEPT_UNWEIGHTED, False), + ]) + def is_weighted_fixture(self, request): + cube_response, expected_value = request.param + cube_dict = cube_response.get('value', cube_response) + return cube_dict, expected_value + + @pytest.fixture(params=[ + # ---filtered case--- + (CR.CAT_X_CAT_FILT, 0.254), + # ---unfiltered case--- + (CR.CAT_X_CAT, 1.0), + ]) + def pop_frac_fixture(self, request): + cube_dict, expected_value = request.param + return cube_dict, expected_value + + @pytest.fixture(params=[ + # ---weighted case--- + (CR.CAT_X_CAT_WGTD, '_WeightedCountMeasure'), + # ---unweighted case--- + (CR.CAT_X_CAT, '_UnweightedCountMeasure'), + ]) + def wgtd_counts_fixture(self, request): + cube_dict, expected_type_name = request.param + return cube_dict, expected_type_name + + @pytest.fixture(params=[ + # ---weighted case--- + (CR.CAT_X_CAT_WGTD, 999.557), + # ---unweighted case--- + (CR.CAT_X_CAT, 20.0), + ]) + def wgtd_n_fixture(self, request): + cube_dict, expected_type = request.param + return cube_dict, expected_type + + class TestCrunchCube(TestCase): def test_crunch_cube_loads_data(self): From 63cd83920093375fe79256dae0efaa5f50ee6230 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 17:46:01 -0800 Subject: [PATCH 06/30] cube: add _Measures.is_weighted --- src/cr/cube/crunch_cube.py | 26 +++++++++++++++++++++++ tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 30 ++++++++++++++++++++++++++- 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index b15076f38..d3a43d44d 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1423,3 +1423,29 @@ def _update_result(self, result, insertions, dimension_index, class _Measures(object): """Provides access to measures contained in cube response.""" + + def __init__(self, cube_dict, all_dimensions): + self._cube_dict = cube_dict + self._all_dimensions = all_dimensions + + @lazyproperty + def is_weighted(self): + """True if weights have been applied to the measure(s) for this cube. + + Unweighted counts are available for all cubes. Weighting applies to + any other measures provided by the cube. + """ + cube_dict = self._cube_dict + if cube_dict.get('query', {}).get('weight') is not None: + return True + if cube_dict.get('weight_var') is not None: + return True + if cube_dict.get('weight_url') is not None: + return True + unweighted_counts = cube_dict['result']['counts'] + count_data = ( + cube_dict['result']['measures'].get('count', {}).get('data') + ) + if unweighted_counts != count_data: + return True + return False diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index f16dc4bbd..711a67c5a 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -79,7 +79,6 @@ def dimensions_fixture(self, request): class DescribeIntegrated_Measures(object): - @pytest.mark.xfail(reason='WIP', strict=True) def it_knows_when_its_measures_are_weighted(self, is_weighted_fixture): cube_dict, expected_value = is_weighted_fixture measures = _Measures(cube_dict, None) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 657a2a79e..eb9c89122 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -7,7 +7,7 @@ from unittest import TestCase -from cr.cube.crunch_cube import CrunchCube +from cr.cube.crunch_cube import CrunchCube, _Measures from cr.cube.dimension import AllDimensions, _ApparentDimensions, Dimension from cr.cube.enum import DIMENSION_TYPE as DT @@ -290,6 +290,34 @@ def mr_dim_ind_prop_(self, request): return property_mock(request, CrunchCube, 'mr_dim_ind') +class Describe_Measures(object): + + def it_knows_when_a_measure_is_weighted(self, is_weighted_fixture): + cube_dict, expected_value = is_weighted_fixture + measures = _Measures(cube_dict, None) + + is_weighted = measures.is_weighted + + assert is_weighted is expected_value + + # fixtures ------------------------------------------------------- + + @pytest.fixture(params=[ + ({'query': {'weight': 'https://x'}}, True), + ({'weight_var': 'weight'}, True), + ({'weight_url': 'https://y'}, True), + ({'result': { + 'counts': [1, 2, 3], + 'measures': {'count': {'data': [2, 4, 6]}}}}, True), + ({'result': { + 'counts': [1, 2, 3], + 'measures': {'count': {'data': [1, 2, 3]}}}}, False), + ]) + def is_weighted_fixture(self, request): + cube_dict, expected_value = request.param + return cube_dict, expected_value + + # pylint: disable=invalid-name, no-self-use, protected-access @patch('cr.cube.crunch_cube.CrunchCube.get_slices', lambda x: None) class TestCrunchCube(TestCase): From 4067b5fede29b9639af286e93ed4c13877c5d53a Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 18:01:26 -0800 Subject: [PATCH 07/30] cube: add _Measures.means --- src/cr/cube/crunch_cube.py | 30 ++++++++++++++++++++++++- tests/integration/test_crunch_cube.py | 2 -- tests/unit/test_crunch_cube.py | 32 ++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index d3a43d44d..4db976c4f 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -2,7 +2,7 @@ """Provides the CrunchCube class. -CrunchCube is the main API class for manipulating the Crunch.io JSON cube +CrunchCube is the main API class for manipulating Crunch.io JSON cube responses. """ @@ -1449,3 +1449,31 @@ def is_weighted(self): if unweighted_counts != count_data: return True return False + + @lazyproperty + def means(self): + """_MeanMeasure object providing access to means values. + + None when the cube response does not contain a mean measure. + """ + mean_measure_dict = ( + self._cube_dict + .get('result', {}) + .get('measures', {}) + .get('mean') + ) + if mean_measure_dict is None: + return None + return _MeanMeasure(self._cube_dict, self._all_dimensions) + + +class _BaseMeasure(object): + """Base class for measure objects.""" + + def __init__(self, cube_dict, all_dimensions): + self._cube_dict = cube_dict + self._all_dimensions = all_dimensions + + +class _MeanMeasure(_BaseMeasure): + """Statistical mean values from a cube-response.""" diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 711a67c5a..d1933db37 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -87,7 +87,6 @@ def it_knows_when_its_measures_are_weighted(self, is_weighted_fixture): assert is_weighted == expected_value - @pytest.mark.xfail(reason='WIP', strict=True) def it_provides_access_to_the_mean_measure(self): cube_dict = CR.CAT_X_CAT_MEAN_WGTD measures = _Measures(cube_dict, None) @@ -96,7 +95,6 @@ def it_provides_access_to_the_mean_measure(self): assert type(means).__name__ == '_MeanMeasure' - @pytest.mark.xfail(reason='WIP', strict=True) def but_only_when_the_cube_response_contains_means(self): cube_dict = CR.CAT_X_CAT measures = _Measures(cube_dict, None) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index eb9c89122..9cb4905df 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -7,7 +7,7 @@ from unittest import TestCase -from cr.cube.crunch_cube import CrunchCube, _Measures +from cr.cube.crunch_cube import CrunchCube, _MeanMeasure, _Measures from cr.cube.dimension import AllDimensions, _ApparentDimensions, Dimension from cr.cube.enum import DIMENSION_TYPE as DT @@ -300,6 +300,22 @@ def it_knows_when_a_measure_is_weighted(self, is_weighted_fixture): assert is_weighted is expected_value + def it_provides_access_to_the_means_measure( + self, _MeanMeasure_, mean_measure_, all_dimensions_): + cube_dict = {'result': {'measures': {'mean': {}}}} + _MeanMeasure_.return_value = mean_measure_ + measures = _Measures(cube_dict, all_dimensions_) + + means = measures.means + + _MeanMeasure_.assert_called_once_with(cube_dict, all_dimensions_) + assert means is mean_measure_ + + def but_only_when_the_cube_has_a_mean_measure(self): + measures = _Measures({'result': {'measures': {}}}, None) + means = measures.means + assert means is None + # fixtures ------------------------------------------------------- @pytest.fixture(params=[ @@ -317,6 +333,20 @@ def is_weighted_fixture(self, request): cube_dict, expected_value = request.param return cube_dict, expected_value + # fixture components --------------------------------------------- + + @pytest.fixture + def all_dimensions_(self, request): + return instance_mock(request, AllDimensions) + + @pytest.fixture + def _MeanMeasure_(self, request): + return class_mock(request, 'cr.cube.crunch_cube._MeanMeasure') + + @pytest.fixture + def mean_measure_(self, request): + return instance_mock(request, _MeanMeasure) + # pylint: disable=invalid-name, no-self-use, protected-access @patch('cr.cube.crunch_cube.CrunchCube.get_slices', lambda x: None) From 6e1b177715d786bbff19d237732a89d3b12e3211 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 18:33:14 -0800 Subject: [PATCH 08/30] cube: add _Measures.missing_count --- src/cr/cube/crunch_cube.py | 12 ++++++++++++ tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 24 ++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 4db976c4f..d3b4d3f30 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1466,6 +1466,13 @@ def means(self): return None return _MeanMeasure(self._cube_dict, self._all_dimensions) + @lazyproperty + def missing_count(self): + """numeric representing count of missing rows in cube response.""" + if self.means: + return self.means.missing_count + return self._cube_dict['result'].get('missing', 0) + class _BaseMeasure(object): """Base class for measure objects.""" @@ -1477,3 +1484,8 @@ def __init__(self, cube_dict, all_dimensions): class _MeanMeasure(_BaseMeasure): """Statistical mean values from a cube-response.""" + + @lazyproperty + def missing_count(self): + """numeric representing count of missing rows reflected in response.""" + raise NotImplementedError diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index d1933db37..61674b137 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -109,7 +109,6 @@ def it_provides_the_means_missing_count_when_means_are_available(self): missing_count = measures.missing_count assert missing_count == 3 - @pytest.mark.xfail(reason='WIP', strict=True) def but_provides_the_general_missing_count_otherwise(self): measures = _Measures(CR.CAT_X_CAT, None) missing_count = measures.missing_count diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 9cb4905df..a1dcf101b 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -316,6 +316,15 @@ def but_only_when_the_cube_has_a_mean_measure(self): means = measures.means assert means is None + def it_knows_the_missing_count(self, missing_count_fixture, means_prop_): + means, cube_dict, expected_value = missing_count_fixture + means_prop_.return_value = means + measures = _Measures(cube_dict, None) + + missing_count = measures.missing_count + + assert missing_count == expected_value + # fixtures ------------------------------------------------------- @pytest.fixture(params=[ @@ -333,6 +342,17 @@ def is_weighted_fixture(self, request): cube_dict, expected_value = request.param return cube_dict, expected_value + @pytest.fixture(params=[ + ({}, True, 37), + ({'result': {'missing': 42}}, False, 42), + ({'result': {}}, False, 0), + ]) + def missing_count_fixture(self, request, mean_measure_): + cube_dict, has_means, expected_value = request.param + mean_measure_.missing_count = expected_value if has_means else None + means = mean_measure_ if has_means else None + return means, cube_dict, expected_value + # fixture components --------------------------------------------- @pytest.fixture @@ -347,6 +367,10 @@ def _MeanMeasure_(self, request): def mean_measure_(self, request): return instance_mock(request, _MeanMeasure) + @pytest.fixture + def means_prop_(self, request): + return property_mock(request, _Measures, 'means') + # pylint: disable=invalid-name, no-self-use, protected-access @patch('cr.cube.crunch_cube.CrunchCube.get_slices', lambda x: None) From 16c112dce8764872e09dd1bc9f0a4d089d9002a4 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 18:50:28 -0800 Subject: [PATCH 09/30] cube: add _MeanMeasure.missing_count --- src/cr/cube/crunch_cube.py | 4 +++- tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index d3b4d3f30..5c6a87f72 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1488,4 +1488,6 @@ class _MeanMeasure(_BaseMeasure): @lazyproperty def missing_count(self): """numeric representing count of missing rows reflected in response.""" - raise NotImplementedError + return ( + self._cube_dict['result']['measures']['mean'].get('n_missing', 0) + ) diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 61674b137..76c9c418d 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -103,7 +103,6 @@ def but_only_when_the_cube_response_contains_means(self): assert means is None - @pytest.mark.xfail(reason='WIP', strict=True) def it_provides_the_means_missing_count_when_means_are_available(self): measures = _Measures(CR.CAT_X_CAT_MEAN_WGTD, None) missing_count = measures.missing_count diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index a1dcf101b..159f70cef 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -372,6 +372,27 @@ def means_prop_(self, request): return property_mock(request, _Measures, 'means') +class Describe_MeanMeasure(object): + + def it_knows_the_missing_count(self, missing_count_fixture): + cube_dict, expected_value = missing_count_fixture + mean_measure = _MeanMeasure(cube_dict, None) + + missing_count = mean_measure.missing_count + + assert missing_count == expected_value + + # fixtures ------------------------------------------------------- + + @pytest.fixture(params=[ + ({'result': {'measures': {'mean': {}}}}, 0), + ({'result': {'measures': {'mean': {'n_missing': 42}}}}, 42), + ]) + def missing_count_fixture(self, request): + cube_dict, expected_value = request.param + return cube_dict, expected_value + + # pylint: disable=invalid-name, no-self-use, protected-access @patch('cr.cube.crunch_cube.CrunchCube.get_slices', lambda x: None) class TestCrunchCube(TestCase): From 7e87398b7862a69883449aa620743edf346d71b6 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 18:57:01 -0800 Subject: [PATCH 10/30] cube: add _Measures.population_fraction --- src/cr/cube/crunch_cube.py | 26 ++++++++++++++++++++++++++ tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 24 ++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 5c6a87f72..6a31a7912 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1473,6 +1473,32 @@ def missing_count(self): return self.means.missing_count return self._cube_dict['result'].get('missing', 0) + @lazyproperty + def population_fraction(self): + """The filtered/unfiltered ratio for cube response. + + This value is required for properly calculating population on a cube + where a filter has been applied. Returns 1.0 for an unfiltered cube. + Returns `np.nan` if the unfiltered count is zero, which would + otherwise result in a divide-by-zero error. + """ + numerator = ( + self._cube_dict['result'] + .get('filtered', {}) + .get('weighted_n') + ) + denominator = ( + self._cube_dict['result'] + .get('unfiltered', {}) + .get('weighted_n') + ) + try: + return numerator / denominator + except ZeroDivisionError: + return np.nan + except Exception: + return 1.0 + class _BaseMeasure(object): """Base class for measure objects.""" diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 76c9c418d..3f311d5ed 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -113,7 +113,6 @@ def but_provides_the_general_missing_count_otherwise(self): missing_count = measures.missing_count assert missing_count == 5 - @pytest.mark.xfail(reason='WIP', strict=True) def it_knows_the_population_fraction(self, pop_frac_fixture): cube_dict, expected_value = pop_frac_fixture measures = _Measures(cube_dict, None) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 159f70cef..c987075c4 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -325,6 +325,15 @@ def it_knows_the_missing_count(self, missing_count_fixture, means_prop_): assert missing_count == expected_value + def it_knows_the_population_fraction(self, pop_frac_fixture): + cube_dict, expected_value = pop_frac_fixture + measures = _Measures(cube_dict, None) + + population_fraction = measures.population_fraction + + # ---works for np.nan, which doesn't equal itself--- + assert population_fraction in (expected_value,) + # fixtures ------------------------------------------------------- @pytest.fixture(params=[ @@ -353,6 +362,21 @@ def missing_count_fixture(self, request, mean_measure_): means = mean_measure_ if has_means else None return means, cube_dict, expected_value + @pytest.fixture(params=[ + ({'result': {}}, 1.0), + ({'result': { + 'filtered': {'weighted_n': 21}, + 'unfiltered': {'weighted_n': 42}}}, 0.5), + ({'result': { + 'filtered': {'weighted_n': 0}, + 'unfiltered': {'weighted_n': 0}}}, np.nan), + ({'result': {'filtered': {'weighted_n': 43}}}, 1.0), + ({'result': {'unfiltered': {'weighted_n': 44}}}, 1.0), + ]) + def pop_frac_fixture(self, request): + cube_dict, expected_value = request.param + return cube_dict, expected_value + # fixture components --------------------------------------------- @pytest.fixture From 263fe2dd04e547561836d7cb0ef5922931e5dee3 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 19:01:32 -0800 Subject: [PATCH 11/30] cube: add _Measures.unweighted_counts --- src/cr/cube/crunch_cube.py | 13 +++++++++++++ tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 28 ++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 6a31a7912..3433345f5 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1499,6 +1499,15 @@ def population_fraction(self): except Exception: return 1.0 + @lazyproperty + def unweighted_counts(self): + """_UnweightedCountMeasure object for this cube. + + This object provides access to unweighted counts for this cube, + whether or not the cube contains weighted counts. + """ + return _UnweightedCountMeasure(self._cube_dict, self._all_dimensions) + class _BaseMeasure(object): """Base class for measure objects.""" @@ -1517,3 +1526,7 @@ def missing_count(self): return ( self._cube_dict['result']['measures']['mean'].get('n_missing', 0) ) + + +class _UnweightedCountMeasure(_BaseMeasure): + """Unweighted counts for cube.""" diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 3f311d5ed..f7fdeb1f7 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -121,7 +121,6 @@ def it_knows_the_population_fraction(self, pop_frac_fixture): assert population_fraction == expected_value - @pytest.mark.xfail(reason='WIP', strict=True) def it_provides_access_to_the_unweighted_count_measure(self): measures = _Measures(None, None) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index c987075c4..b23f48f62 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -7,7 +7,9 @@ from unittest import TestCase -from cr.cube.crunch_cube import CrunchCube, _MeanMeasure, _Measures +from cr.cube.crunch_cube import ( + CrunchCube, _MeanMeasure, _Measures, _UnweightedCountMeasure +) from cr.cube.dimension import AllDimensions, _ApparentDimensions, Dimension from cr.cube.enum import DIMENSION_TYPE as DT @@ -334,6 +336,20 @@ def it_knows_the_population_fraction(self, pop_frac_fixture): # ---works for np.nan, which doesn't equal itself--- assert population_fraction in (expected_value,) + def it_provides_access_to_the_unweighted_count_measure( + self, _UnweightedCountMeasure_, unweighted_count_measure_, + all_dimensions_): + cube_dict = {'cube': 'dict'} + _UnweightedCountMeasure_.return_value = unweighted_count_measure_ + measures = _Measures(cube_dict, all_dimensions_) + + unweighted_counts = measures.unweighted_counts + + _UnweightedCountMeasure_.assert_called_once_with( + cube_dict, all_dimensions_ + ) + assert unweighted_counts is unweighted_count_measure_ + # fixtures ------------------------------------------------------- @pytest.fixture(params=[ @@ -395,6 +411,16 @@ def mean_measure_(self, request): def means_prop_(self, request): return property_mock(request, _Measures, 'means') + @pytest.fixture + def _UnweightedCountMeasure_(self, request): + return class_mock( + request, 'cr.cube.crunch_cube._UnweightedCountMeasure' + ) + + @pytest.fixture + def unweighted_count_measure_(self, request): + return instance_mock(request, _UnweightedCountMeasure) + class Describe_MeanMeasure(object): From c32dea411386342d418b74570c199ac8bf4c49de Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 20:04:04 -0800 Subject: [PATCH 12/30] cube: add _Measures.unweighted_n --- src/cr/cube/crunch_cube.py | 5 +++++ tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 8 ++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 3433345f5..4ec8b26ed 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1508,6 +1508,11 @@ def unweighted_counts(self): """ return _UnweightedCountMeasure(self._cube_dict, self._all_dimensions) + @lazyproperty + def unweighted_n(self): + """int count of actual rows represented by query response.""" + return self._cube_dict['result']['n'] + class _BaseMeasure(object): """Base class for measure objects.""" diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index f7fdeb1f7..537340801 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -128,7 +128,6 @@ def it_provides_access_to_the_unweighted_count_measure(self): assert type(unweighted_counts).__name__ == '_UnweightedCountMeasure' - @pytest.mark.xfail(reason='WIP', strict=True) def it_knows_the_unweighted_n(self): measures = _Measures(CR.CAT_X_CAT, None) unweighted_n = measures.unweighted_n diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index b23f48f62..26d8af705 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -350,6 +350,14 @@ def it_provides_access_to_the_unweighted_count_measure( ) assert unweighted_counts is unweighted_count_measure_ + def it_knows_the_unweighted_n(self): + cube_dict = {'result': {'n': 121}} + measures = _Measures(cube_dict, None) + + unweighted_n = measures.unweighted_n + + assert unweighted_n == 121 + # fixtures ------------------------------------------------------- @pytest.fixture(params=[ From f1075b99b96c42480b914dc39b053eb48f742e49 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 20:12:31 -0800 Subject: [PATCH 13/30] cube: add _Measures.weighted_counts --- src/cr/cube/crunch_cube.py | 16 +++++++++ tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 47 ++++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 4ec8b26ed..dcf730b79 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1513,6 +1513,18 @@ def unweighted_n(self): """int count of actual rows represented by query response.""" return self._cube_dict['result']['n'] + @lazyproperty + def weighted_counts(self): + """_WeightedCountMeasure object for this cube. + + This object provides access to weighted counts for this cube, if + available. If the cube response is not weighted, the + _UnweightedCountMeasure object for this cube is returned. + """ + if not self.is_weighted: + return self.unweighted_counts + return _WeightedCountMeasure(self._cube_dict, self._all_dimensions) + class _BaseMeasure(object): """Base class for measure objects.""" @@ -1535,3 +1547,7 @@ def missing_count(self): class _UnweightedCountMeasure(_BaseMeasure): """Unweighted counts for cube.""" + + +class _WeightedCountMeasure(_BaseMeasure): + """Weighted counts for cube.""" diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 537340801..f9a52d318 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -133,7 +133,6 @@ def it_knows_the_unweighted_n(self): unweighted_n = measures.unweighted_n assert unweighted_n == 20 - @pytest.mark.xfail(reason='WIP', strict=True) def it_provides_access_to_the_weighted_count_measure( self, wgtd_counts_fixture): cube_dict, expected_type_name = wgtd_counts_fixture diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 26d8af705..81cc85b07 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -8,7 +8,8 @@ from unittest import TestCase from cr.cube.crunch_cube import ( - CrunchCube, _MeanMeasure, _Measures, _UnweightedCountMeasure + CrunchCube, _MeanMeasure, _Measures, _UnweightedCountMeasure, + _WeightedCountMeasure ) from cr.cube.dimension import AllDimensions, _ApparentDimensions, Dimension from cr.cube.enum import DIMENSION_TYPE as DT @@ -358,6 +359,32 @@ def it_knows_the_unweighted_n(self): assert unweighted_n == 121 + def it_provides_access_to_the_weighted_count_measure( + self, is_weighted_prop_, _WeightedCountMeasure_, + weighted_count_measure_, all_dimensions_): + cube_dict = {'cube': 'dict'} + is_weighted_prop_.return_value = True + _WeightedCountMeasure_.return_value = weighted_count_measure_ + measures = _Measures(cube_dict, all_dimensions_) + + weighted_counts = measures.weighted_counts + + _WeightedCountMeasure_.assert_called_once_with( + cube_dict, all_dimensions_ + ) + assert weighted_counts is weighted_count_measure_ + + def but_it_returns_unweighted_count_measure_when_cube_is_not_weighted( + self, is_weighted_prop_, unweighted_counts_prop_, + unweighted_count_measure_): + is_weighted_prop_.return_value = False + unweighted_counts_prop_.return_value = unweighted_count_measure_ + measures = _Measures(None, None) + + weighted_counts = measures.weighted_counts + + assert weighted_counts is unweighted_count_measure_ + # fixtures ------------------------------------------------------- @pytest.fixture(params=[ @@ -407,6 +434,10 @@ def pop_frac_fixture(self, request): def all_dimensions_(self, request): return instance_mock(request, AllDimensions) + @pytest.fixture + def is_weighted_prop_(self, request): + return property_mock(request, _Measures, 'is_weighted') + @pytest.fixture def _MeanMeasure_(self, request): return class_mock(request, 'cr.cube.crunch_cube._MeanMeasure') @@ -429,6 +460,20 @@ def _UnweightedCountMeasure_(self, request): def unweighted_count_measure_(self, request): return instance_mock(request, _UnweightedCountMeasure) + @pytest.fixture + def unweighted_counts_prop_(self, request): + return property_mock(request, _Measures, 'unweighted_counts') + + @pytest.fixture + def _WeightedCountMeasure_(self, request): + return class_mock( + request, 'cr.cube.crunch_cube._WeightedCountMeasure' + ) + + @pytest.fixture + def weighted_count_measure_(self, request): + return instance_mock(request, _WeightedCountMeasure) + class Describe_MeanMeasure(object): From c2e6f974934e4c8fcafee8a9713b92d6184fbf7a Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 20:17:52 -0800 Subject: [PATCH 14/30] cube: add _Measures.weighted_n --- src/cr/cube/crunch_cube.py | 9 +++++++++ tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 23 +++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index dcf730b79..bdd3616c7 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1525,6 +1525,15 @@ def weighted_counts(self): return self.unweighted_counts return _WeightedCountMeasure(self._cube_dict, self._all_dimensions) + @lazyproperty + def weighted_n(self): + """float count of returned rows adjusted for weighting.""" + if not self.is_weighted: + return float(self.unweighted_n) + return float( + sum(self._cube_dict['result']['measures']['count']['data']) + ) + class _BaseMeasure(object): """Base class for measure objects.""" diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index f9a52d318..c7a8fcc5a 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -142,7 +142,6 @@ def it_provides_access_to_the_weighted_count_measure( assert type(weighted_counts).__name__ == expected_type_name - @pytest.mark.xfail(reason='WIP', strict=True) def it_knows_the_weighted_n(self, wgtd_n_fixture): cube_dict, expected_value = wgtd_n_fixture measures = _Measures(cube_dict, None) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 81cc85b07..cbbe121a6 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -385,6 +385,14 @@ def but_it_returns_unweighted_count_measure_when_cube_is_not_weighted( assert weighted_counts is unweighted_count_measure_ + def it_knows_the_weighted_n(self, weighted_n_fixture): + cube_dict, is_weighted, expected_value = weighted_n_fixture + measures = _Measures(cube_dict, None) + + weighted_n = measures.weighted_n + + assert weighted_n == expected_value + # fixtures ------------------------------------------------------- @pytest.fixture(params=[ @@ -428,6 +436,17 @@ def pop_frac_fixture(self, request): cube_dict, expected_value = request.param return cube_dict, expected_value + @pytest.fixture(params=[ + ({}, False, 24.0), + ({'result': {'measures': {'count': {'data': [7, 9]}}}}, True, 16.0), + ]) + def weighted_n_fixture(self, request, unweighted_n_prop_, + is_weighted_prop_): + cube_dict, is_weighted, expected_value = request.param + is_weighted_prop_.return_value = is_weighted + unweighted_n_prop_.return_value = 24 + return cube_dict, is_weighted, expected_value + # fixture components --------------------------------------------- @pytest.fixture @@ -464,6 +483,10 @@ def unweighted_count_measure_(self, request): def unweighted_counts_prop_(self, request): return property_mock(request, _Measures, 'unweighted_counts') + @pytest.fixture + def unweighted_n_prop_(self, request): + return property_mock(request, _Measures, 'unweighted_n') + @pytest.fixture def _WeightedCountMeasure_(self, request): return class_mock( From e8ad5157f7a55ca46d844820fa68f05fb03e22b7 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 20:22:48 -0800 Subject: [PATCH 15/30] tdd: add xfails for _BaseMeasure subclasses --- tests/integration/test_crunch_cube.py | 66 ++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index c7a8fcc5a..8fc932232 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -2,7 +2,10 @@ import numpy as np import pytest -from cr.cube.crunch_cube import CrunchCube, _Measures +from cr.cube.crunch_cube import ( + CrunchCube, _MeanMeasure, _Measures, _UnweightedCountMeasure, + _WeightedCountMeasure +) from cr.cube.enum import DIMENSION_TYPE as DT from cr.cube.measures.index import Index from cr.cube.util import compress_pruned @@ -196,6 +199,67 @@ def wgtd_n_fixture(self, request): return cube_dict, expected_type +class DescribeIntegrated_MeanMeasure(object): + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_provides_access_to_its_raw_cube_array(self): + cube_dict = CR.CAT_X_CAT_MEAN_WGTD + cube = CrunchCube(cube_dict) + measure = _MeanMeasure(cube_dict, cube._all_dimensions) + + raw_cube_array = measure.raw_cube_array + + np.testing.assert_array_almost_equal( + raw_cube_array, + [[52.78205128, 49.90697674, np.nan, np.nan, np.nan], + [50.43654822, 48.20100503, np.nan, np.nan, np.nan], + [51.56435644, 47.60283688, np.nan, np.nan, np.nan], + [58.0, 29.0, np.nan, np.nan, np.nan], + [37.53846154, 39.45238095, np.nan, np.nan, np.nan], + [36.66666667, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan]] + ) + + +class DescribeIntegrated_UnweightedCountMeasure(object): + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_provides_access_to_its_raw_cube_array(self): + cube_dict = CR.CAT_X_CAT + cube = CrunchCube(cube_dict) + measure = _UnweightedCountMeasure(cube_dict, cube._all_dimensions) + + raw_cube_array = measure.raw_cube_array + + np.testing.assert_array_almost_equal( + raw_cube_array, + [[5, 3, 2, 0], + [5, 2, 3, 0], + [0, 0, 0, 0]] + ) + + +class DescribeIntegrated_WeightedCountMeasure(object): + + @pytest.mark.xfail(reason='WIP', strict=True) + def it_provides_access_to_its_raw_cube_array(self): + cube_dict = CR.CAT_X_CAT_WGTD + cube = CrunchCube(cube_dict) + measure = _WeightedCountMeasure(cube_dict, cube._all_dimensions) + + raw_cube_array = measure.raw_cube_array + + np.testing.assert_array_almost_equal( + raw_cube_array, + [[32.9, 87.6, 176.2, 117.5, 72.1, 13.4, 0.0, 0.0, 0.0], + [38.8, 94.1, 199.0128, 102.9, 38.8305, 26.2135, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] + ) + + class TestCrunchCube(TestCase): def test_crunch_cube_loads_data(self): From 3a933bd820d8596e3de09c48202a3388c6b30411 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 20:29:44 -0800 Subject: [PATCH 16/30] cube: add _BaseMeasure.raw_cube_array --- src/cr/cube/crunch_cube.py | 21 +++++++++++++++++ tests/unit/test_crunch_cube.py | 43 ++++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index bdd3616c7..f70073181 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1542,6 +1542,27 @@ def __init__(self, cube_dict, all_dimensions): self._cube_dict = cube_dict self._all_dimensions = all_dimensions + @lazyproperty + def raw_cube_array(self): + """Return read-only ndarray of measure values from cube-response. + + The shape of the ndarray mirrors the shape of the (raw) cube + response. Specifically, it includes values for missing elements, any + MR_CAT dimensions, and any prunable rows and columns. + """ + array = np.array(self._flat_values).reshape(self._all_dimensions.shape) + # ---must be read-only to avoid hard-to-find bugs--- + array.flags.writeable = False + return array + + @lazyproperty + def _flat_values(self): + """Return tuple of mean values as found in cube response. + + This property must be implemented by each subclass. + """ + raise NotImplementedError('must be implemented by each subclass') + class _MeanMeasure(_BaseMeasure): """Statistical mean values from a cube-response.""" diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index cbbe121a6..862a502ec 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -8,8 +8,8 @@ from unittest import TestCase from cr.cube.crunch_cube import ( - CrunchCube, _MeanMeasure, _Measures, _UnweightedCountMeasure, - _WeightedCountMeasure + _BaseMeasure, CrunchCube, _MeanMeasure, _Measures, + _UnweightedCountMeasure, _WeightedCountMeasure ) from cr.cube.dimension import AllDimensions, _ApparentDimensions, Dimension from cr.cube.enum import DIMENSION_TYPE as DT @@ -498,6 +498,45 @@ def weighted_count_measure_(self, request): return instance_mock(request, _WeightedCountMeasure) +class Describe_BaseMeasure(object): + + def it_provides_access_to_the_raw_cube_array( + self, _flat_values_prop_, all_dimensions_): + _flat_values_prop_.return_value = ( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 4, 6, 8, 0, 1, 3, 5, 7 + ) + all_dimensions_.shape = (2, 3, 3) + base_measure = _BaseMeasure(None, all_dimensions_) + + raw_cube_array = base_measure.raw_cube_array + + np.testing.assert_array_equal( + raw_cube_array, + np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[2, 4, 6], [8, 0, 1], [3, 5, 7]]]) + ) + assert raw_cube_array.flags.writeable is False + + def it_requires_each_subclass_to_implement_flat_values(self): + base_measure = _BaseMeasure(None, None) + + with pytest.raises(NotImplementedError) as pt_exc_info: + base_measure._flat_values + + exception = pt_exc_info.value + assert str(exception) == 'must be implemented by each subclass' + + # fixture components --------------------------------------------- + + @pytest.fixture + def all_dimensions_(self, request): + return instance_mock(request, AllDimensions) + + @pytest.fixture + def _flat_values_prop_(self, request): + return property_mock(request, _BaseMeasure, '_flat_values') + + class Describe_MeanMeasure(object): def it_knows_the_missing_count(self, missing_count_fixture): From ee91586870a65c2fc0abef8b1c3b0db11e5751c4 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 20:39:57 -0800 Subject: [PATCH 17/30] cube: add _MeanMeasure._flat_values --- src/cr/cube/crunch_cube.py | 13 +++++++++++++ tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 12 ++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index f70073181..ed3bf64a3 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1574,6 +1574,19 @@ def missing_count(self): self._cube_dict['result']['measures']['mean'].get('n_missing', 0) ) + @lazyproperty + def _flat_values(self): + """Return tuple of mean values as found in cube response. + + Mean data may include missing items represented by a dict like + {'?': -1} in the cube response. These are replaced by np.nan in the + returned value. + """ + return tuple( + np.nan if type(x) is dict else x + for x in self._cube_dict['result']['measures']['mean']['data'] + ) + class _UnweightedCountMeasure(_BaseMeasure): """Unweighted counts for cube.""" diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 8fc932232..185645dac 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -201,7 +201,6 @@ def wgtd_n_fixture(self, request): class DescribeIntegrated_MeanMeasure(object): - @pytest.mark.xfail(reason='WIP', strict=True) def it_provides_access_to_its_raw_cube_array(self): cube_dict = CR.CAT_X_CAT_MEAN_WGTD cube = CrunchCube(cube_dict) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 862a502ec..d60593743 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -547,6 +547,18 @@ def it_knows_the_missing_count(self, missing_count_fixture): assert missing_count == expected_value + def it_parses_the_flat_values_to_help(self): + cube_dict = { + 'result': {'measures': {'mean': { + 'data': [1, 2, {'?': -1}, 4] + }}} + } + mean_measure = _MeanMeasure(cube_dict, None) + + flat_values = mean_measure._flat_values + + assert flat_values == (1, 2, np.nan, 4) + # fixtures ------------------------------------------------------- @pytest.fixture(params=[ From 9387d758f000f1398e1e464dcc96f32586dfa011 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 20:51:11 -0800 Subject: [PATCH 18/30] cube: add _UnweightedCountMeasure._flat_values --- src/cr/cube/crunch_cube.py | 5 +++++ tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 11 +++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index ed3bf64a3..a04e28a5e 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1591,6 +1591,11 @@ def _flat_values(self): class _UnweightedCountMeasure(_BaseMeasure): """Unweighted counts for cube.""" + @lazyproperty + def _flat_values(self): + """tuple of int counts before weighting.""" + return tuple(self._cube_dict['result']['counts']) + class _WeightedCountMeasure(_BaseMeasure): """Weighted counts for cube.""" diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 185645dac..9cf83cf63 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -223,7 +223,6 @@ def it_provides_access_to_its_raw_cube_array(self): class DescribeIntegrated_UnweightedCountMeasure(object): - @pytest.mark.xfail(reason='WIP', strict=True) def it_provides_access_to_its_raw_cube_array(self): cube_dict = CR.CAT_X_CAT cube = CrunchCube(cube_dict) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index d60593743..b0384b28b 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -570,6 +570,17 @@ def missing_count_fixture(self, request): return cube_dict, expected_value +class Describe_UnweightedCountMeasure(object): + + def it_parses_the_flat_values_to_help(self): + cube_dict = {'result': {'counts': [1, 2, 3, 4]}} + unweighted_count_measure = _UnweightedCountMeasure(cube_dict, None) + + flat_values = unweighted_count_measure._flat_values + + assert flat_values == (1, 2, 3, 4) + + # pylint: disable=invalid-name, no-self-use, protected-access @patch('cr.cube.crunch_cube.CrunchCube.get_slices', lambda x: None) class TestCrunchCube(TestCase): From 4de0e30a8ad287d874dc65408746ae0496e6141f Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 20:55:24 -0800 Subject: [PATCH 19/30] cube: add _WeightedCountMeasure._flat_values --- src/cr/cube/crunch_cube.py | 5 +++++ tests/integration/test_crunch_cube.py | 1 - tests/unit/test_crunch_cube.py | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index a04e28a5e..517c2417c 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1599,3 +1599,8 @@ def _flat_values(self): class _WeightedCountMeasure(_BaseMeasure): """Weighted counts for cube.""" + + @lazyproperty + def _flat_values(self): + """tuple of numeric counts after weighting.""" + return tuple(self._cube_dict['result']['measures']['count']['data']) diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 9cf83cf63..3d4a16664 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -240,7 +240,6 @@ def it_provides_access_to_its_raw_cube_array(self): class DescribeIntegrated_WeightedCountMeasure(object): - @pytest.mark.xfail(reason='WIP', strict=True) def it_provides_access_to_its_raw_cube_array(self): cube_dict = CR.CAT_X_CAT_WGTD cube = CrunchCube(cube_dict) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index b0384b28b..289dfd416 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -581,6 +581,21 @@ def it_parses_the_flat_values_to_help(self): assert flat_values == (1, 2, 3, 4) +class Describe_WeightedCountMeasure(object): + + def it_parses_the_flat_values_to_help(self): + cube_dict = { + 'result': {'measures': {'count': { + 'data': [1.1, 2.2, 3.3, 4.4] + }}} + } + weighted_count_measure = _WeightedCountMeasure(cube_dict, None) + + flat_values = weighted_count_measure._flat_values + + assert flat_values == (1.1, 2.2, 3.3, 4.4) + + # pylint: disable=invalid-name, no-self-use, protected-access @patch('cr.cube.crunch_cube.CrunchCube.get_slices', lambda x: None) class TestCrunchCube(TestCase): From f642554dddcd2507a30489c3cf909f77be1502b0 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 21:10:34 -0800 Subject: [PATCH 20/30] rfctr: reimplement CrunchCube.count() Remove redundant (copies) integration tests from test_headers_and_subtotals.py --- src/cr/cube/crunch_cube.py | 22 +++-- tests/integration/test_crunch_cube.py | 2 + .../integration/test_headers_and_subtotals.py | 12 --- tests/unit/test_crunch_cube.py | 99 +++++-------------- 4 files changed, 43 insertions(+), 92 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 517c2417c..f5e2b148e 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -131,14 +131,11 @@ def as_array(self, include_missing=False, weighted=True, return self._drop_mr_cat_dims(array) def count(self, weighted=True): - """Get cube's count with automatic weighted/unweighted selection.""" - if weighted and self.is_weighted: - return sum( - self._cube_dict['result']['measures'] - .get('count', {}) - .get('data') - ) - return self._cube_dict['result']['n'] + """Return numberic count of rows considered for cube response.""" + return ( + self._measures.weighted_n if weighted + else self._measures.unweighted_n + ) @lazyproperty def counts(self): @@ -1187,6 +1184,15 @@ def _margin_pruned_indices(margin, inserted_ind, axis): return pruned_ind + @lazyproperty + def _measures(self): + """_Measures object for this cube. + + Provides access to unweighted counts, and weighted counts and/or means + when available. + """ + raise NotImplementedError + def _prune_3d_body(self, res, transforms): """Return masked array where mask indicates pruned vectors. diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 3d4a16664..1078ecd68 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -1146,12 +1146,14 @@ def test_pets_array_margin_by_col_not_allowed_across_items(self): with self.assertRaises(ValueError): cube.margin(axis=0) + @pytest.mark.xfail(reason='WIP', strict=True) def test_count_unweighted(self): cube = CrunchCube(CR.ADMIT_X_GENDER_WEIGHTED) expected = 4526 actual = cube.count(weighted=False) self.assertEqual(actual, expected) + @pytest.mark.xfail(reason='WIP', strict=True) def test_count_weighted(self): cube = CrunchCube(CR.ADMIT_X_GENDER_WEIGHTED) expected = 4451.955438803242 diff --git a/tests/integration/test_headers_and_subtotals.py b/tests/integration/test_headers_and_subtotals.py index 5bbfce4f4..7b03e24eb 100644 --- a/tests/integration/test_headers_and_subtotals.py +++ b/tests/integration/test_headers_and_subtotals.py @@ -400,18 +400,6 @@ def test_ca_margin_with_hs(self): actual = cube.margin(include_transforms_for_dims=[0, 1], axis=1) np.testing.assert_almost_equal(actual, expected) - def test_count_unweighted(self): - cube = CrunchCube(CR.ADMIT_X_GENDER_WEIGHTED) - expected = 4526 - actual = cube.count(weighted=False) - self.assertEqual(actual, expected) - - def test_count_weighted(self): - cube = CrunchCube(CR.ADMIT_X_GENDER_WEIGHTED) - expected = 4451.955438803242 - actual = cube.count(weighted=True) - self.assertEqual(actual, expected) - def test_hs_with_anchor_on_zero_position_labels(self): cube = CrunchCube(CR.ECON_US_PROBLEM_X_BIGGER_PROBLEM) expected = [ diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 289dfd416..a954f6fa2 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -27,6 +27,14 @@ def it_provides_a_default_repr(self): repr_ = repr(cube) assert repr_.startswith(' Date: Mon, 5 Nov 2018 21:26:12 -0800 Subject: [PATCH 21/30] cube: add CrunchCube._measures --- src/cr/cube/crunch_cube.py | 2 +- tests/integration/test_crunch_cube.py | 2 -- tests/unit/test_crunch_cube.py | 17 +++++++++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index f5e2b148e..90f43c00d 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1191,7 +1191,7 @@ def _measures(self): Provides access to unweighted counts, and weighted counts and/or means when available. """ - raise NotImplementedError + return _Measures(self._cube_dict, self._all_dimensions) def _prune_3d_body(self, res, transforms): """Return masked array where mask indicates pruned vectors. diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 1078ecd68..3d4a16664 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -1146,14 +1146,12 @@ def test_pets_array_margin_by_col_not_allowed_across_items(self): with self.assertRaises(ValueError): cube.margin(axis=0) - @pytest.mark.xfail(reason='WIP', strict=True) def test_count_unweighted(self): cube = CrunchCube(CR.ADMIT_X_GENDER_WEIGHTED) expected = 4526 actual = cube.count(weighted=False) self.assertEqual(actual, expected) - @pytest.mark.xfail(reason='WIP', strict=True) def test_count_weighted(self): cube = CrunchCube(CR.ADMIT_X_GENDER_WEIGHTED) expected = 4451.955438803242 diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index a954f6fa2..af725dae1 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -156,6 +156,19 @@ def it_knows_whether_an_axis_is_marginable_to_help( axis_is_marginable = cube._is_axis_allowed(axis) assert axis_is_marginable is expected_value + def it_provides_access_to_its_Measures_object_to_help( + self, _all_dimensions_prop_, all_dimensions_, _Measures_, + measures_): + cube_dict = {'cube': 'dict'} + _all_dimensions_prop_.return_value = all_dimensions_ + _Measures_.return_value = measures_ + cube = CrunchCube(cube_dict) + + measures = cube._measures + + _Measures_.assert_called_once_with(cube_dict, all_dimensions_) + assert measures is measures_ + # fixtures ------------------------------------------------------- @pytest.fixture(params=[ @@ -307,6 +320,10 @@ def dimensions_prop_(self, request): def _is_axis_allowed_(self, request): return method_mock(request, CrunchCube, '_is_axis_allowed') + @pytest.fixture + def _Measures_(self, request): + return class_mock(request, 'cr.cube.crunch_cube._Measures') + @pytest.fixture def measures_(self, request): return instance_mock(request, _Measures) From f083e75f9796c8a07a279947be7defaf0021be20 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 21:37:55 -0800 Subject: [PATCH 22/30] rfctr: reimplement CrunchCube.has_means --- src/cr/cube/crunch_cube.py | 7 ++----- tests/unit/test_crunch_cube.py | 31 ++++++++++++++++--------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 90f43c00d..2cdd44aca 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -191,11 +191,8 @@ def get_slices(self, ca_as_0th=False): @lazyproperty def has_means(self): - """True if cube contains means data.""" - measures = self._cube_dict.get('result', {}).get('measures') - if not measures: - return False - return measures.get('mean', None) is not None + """True if cube includes a means measure.""" + return self._measures.means is not None @lazyproperty def has_mr(self): diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index af725dae1..708104c32 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -59,9 +59,12 @@ def it_knows_the_types_of_its_dimension(self, request, dimensions_prop_): assert dim_types == (DT.CAT, DT.CA_SUBVAR, DT.MR, DT.MR_CAT) - def it_knows_when_it_contains_means_data(self, has_means_fixture): - cube_response, expected_value = has_means_fixture - cube = CrunchCube(cube_response) + def it_knows_when_it_contains_means_data( + self, has_means_fixture, _measures_prop_, measures_): + means, expected_value = has_means_fixture + _measures_prop_.return_value = measures_ + measures_.means = means + cube = CrunchCube(None) has_means = cube.has_means @@ -276,13 +279,13 @@ def count_fixture(self, request, _measures_prop_, measures_): return weighted, expected_value @pytest.fixture(params=[ - ({'result': {}}, False), - ({'result': {'measures': {}}}, False), - ({'result': {'measures': {'mean': {}}}}, True), + (True, True), + (False, False), ]) - def has_means_fixture(self, request): - cube_response, expected_value = request.param - return cube_response, expected_value + def has_means_fixture(self, request, mean_measure_): + has_mean_measure, expected_value = request.param + means = mean_measure_ if has_mean_measure else None + return means, expected_value @pytest.fixture(params=[ (None, False), @@ -320,6 +323,10 @@ def dimensions_prop_(self, request): def _is_axis_allowed_(self, request): return method_mock(request, CrunchCube, '_is_axis_allowed') + @pytest.fixture + def mean_measure_(self, request): + return instance_mock(request, _MeanMeasure) + @pytest.fixture def _Measures_(self, request): return class_mock(request, 'cr.cube.crunch_cube._Measures') @@ -813,12 +820,6 @@ def test_is_weighted_invoked(self): actual = cube.is_weighted assert actual == 'fake_val' - @patch('cr.cube.crunch_cube.CrunchCube.has_means', 'fake_val') - def test_has_means_invoked(self): - cube = CrunchCube(None) - actual = cube.has_means - assert actual == 'fake_val' - def test_margin_pruned_indices_without_insertions(self): table = np.array([0, 1, 0, 2, 3, 4]) expected = np.array([True, False, True, False, False, False]) From 440063870ac50f56ba5a9fdca3fe3de853e6c992 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 21:43:06 -0800 Subject: [PATCH 23/30] rfctr: reimplement CrunchCube.is_weighted --- src/cr/cube/crunch_cube.py | 16 +--------------- tests/unit/test_crunch_cube.py | 25 +++++++++++++++++++------ 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 2cdd44aca..5441e004c 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -254,21 +254,7 @@ def is_univariate_ca(self): @lazyproperty def is_weighted(self): """True if cube response contains weighted data.""" - if self._cube_dict.get('query', {}).get('weight') is not None: - return True - if self._cube_dict.get('weight_var') is not None: - return True - if self._cube_dict.get('weight_url') is not None: - return True - unweighted_counts = self._cube_dict['result']['counts'] - weighted_counts = ( - self._cube_dict['result']['measures'] - .get('count', {}) - .get('data') - ) - if unweighted_counts != weighted_counts: - return True - return False + return self._measures.is_weighted def labels(self, include_missing=False, include_transforms_for_dims=False): """Gets labels for each cube's dimension. diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 708104c32..8ec65a46a 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -96,6 +96,17 @@ def it_has_a_deprecated_index_method_that_forwards_to_Index_data( Index_.data.assert_called_once_with(cube, True, False) assert index is index_ + def it_knows_when_it_contains_weighted_data( + self, is_weighted_fixture, _measures_prop_, measures_): + is_weighted, expected_value = is_weighted_fixture + _measures_prop_.return_value = measures_ + measures_.is_weighted = is_weighted + cube = CrunchCube(None) + + is_weighted = cube.is_weighted + + assert is_weighted is expected_value + def it_can_adjust_an_axis_to_help( self, request, adjust_fixture, dimensions_prop_): dimension_types, axis_cases = adjust_fixture @@ -297,6 +308,14 @@ def has_mr_fixture(self, request): mr_dim_indices, expected_value = request.param return mr_dim_indices, expected_value + @pytest.fixture(params=[ + (True, True), + (False, False), + ]) + def is_weighted_fixture(self, request): + is_weighted, expected_value = request.param + return is_weighted, expected_value + # fixture components --------------------------------------------- @pytest.fixture @@ -814,12 +833,6 @@ def test_test_filter_annotation(self): actual = CrunchCube(mock_cube).filter_annotation self.assertEqual(actual, expected) - @patch('cr.cube.crunch_cube.CrunchCube.is_weighted', 'fake_val') - def test_is_weighted_invoked(self): - cube = CrunchCube(None) - actual = cube.is_weighted - assert actual == 'fake_val' - def test_margin_pruned_indices_without_insertions(self): table = np.array([0, 1, 0, 2, 3, 4]) expected = np.array([True, False, True, False, False, False]) From 76a6bca2dbced1b8ada9d40d02f6a28ebc761bec Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 21:47:57 -0800 Subject: [PATCH 24/30] rfctr: reimplement CrunchCube.missing --- src/cr/cube/crunch_cube.py | 4 +--- tests/unit/test_crunch_cube.py | 33 +++++++++------------------------ 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 5441e004c..eaca4e3bd 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -396,9 +396,7 @@ def hs_dims_for_den(hs_dims, axis): @lazyproperty def missing(self): """Get missing count of a cube.""" - if self.has_means: - return self._cube_dict['result']['measures']['mean']['n_missing'] - return self._cube_dict['result'].get('missing') + return self._measures.missing_count @lazyproperty def mr_dim_ind(self): diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 8ec65a46a..164e0acca 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -107,6 +107,15 @@ def it_knows_when_it_contains_weighted_data( assert is_weighted is expected_value + def it_knows_its_missing_count(self, _measures_prop_, measures_): + _measures_prop_.return_value = measures_ + measures_.missing_count = 36 + cube = CrunchCube(None) + + missing = cube.missing + + assert missing == 36 + def it_can_adjust_an_axis_to_help( self, request, adjust_fixture, dimensions_prop_): dimension_types, axis_cases = adjust_fixture @@ -753,14 +762,6 @@ def test_description_with_one_dimension(self, mock_dims): actual = cube.description self.assertEqual(actual, expected) - @patch('cr.cube.crunch_cube.CrunchCube.has_means', False) - def test_missing_when_there_are_none(self): - fake_cube = {'result': {}} - cube = CrunchCube(fake_cube) - expected = None - actual = cube.missing - self.assertEqual(actual, expected) - def test_fix_valid_indices_subsequent(self): initial_indices = [[1, 2, 3]] insertion_index = 2 @@ -811,22 +812,6 @@ def test_has_description(self, mock_dims): actual = CrunchCube(None).description self.assertEqual(actual, expected) - @patch('cr.cube.crunch_cube.CrunchCube.has_means', False) - def test_missing(self): - missing = Mock() - fake_cube = {'result': {'missing': missing}} - expected = missing - actual = CrunchCube(fake_cube).missing - self.assertEqual(actual, expected) - - @patch('cr.cube.crunch_cube.CrunchCube.has_means', True) - def test_missing_with_means(self): - missing = Mock() - fake_cube = {'result': {'measures': {'mean': {'n_missing': missing}}}} - expected = missing - actual = CrunchCube(fake_cube).missing - self.assertEqual(actual, expected) - def test_test_filter_annotation(self): mock_cube = {'filter_names': Mock()} expected = mock_cube['filter_names'] From 1da96ceedcc4b4a625b7a3fa461424c4cc18acd4 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 5 Nov 2018 21:52:49 -0800 Subject: [PATCH 25/30] rfctr: reimplement CrunchCube.population_fraction --- src/cr/cube/crunch_cube.py | 23 +++++-------- tests/unit/test_crunch_cube.py | 61 +++++----------------------------- 2 files changed, 17 insertions(+), 67 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index eaca4e3bd..67f030148 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -137,12 +137,6 @@ def count(self, weighted=True): else self._measures.unweighted_n ) - @lazyproperty - def counts(self): - unfiltered = self._cube_dict['result'].get('unfiltered') - filtered = self._cube_dict['result'].get('filtered') - return unfiltered, filtered - @lazyproperty def description(self): """Return the description of the cube.""" @@ -514,15 +508,14 @@ def population_counts(self, population_size, weighted=True, @lazyproperty def population_fraction(self): - try: - unfiltered, filtered = self.counts - num = filtered.get('weighted_n') - den = unfiltered.get('weighted_n') - return num / den - except ZeroDivisionError: - return np.nan - except Exception: - return 1 + """The filtered/unfiltered ratio for cube response. + + This value is required for properly calculating population on a cube + where a filter has been applied. Returns 1.0 for an unfiltered cube. + Returns `np.nan` if the unfiltered count is zero, which would + otherwise result in a divide-by-zero error. + """ + return self._measures.population_fraction def proportions(self, axis=None, weighted=True, include_transforms_for_dims=None, include_missing=False, diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 164e0acca..0bf625408 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -116,6 +116,15 @@ def it_knows_its_missing_count(self, _measures_prop_, measures_): assert missing == 36 + def it_knows_its_population_fraction(self, _measures_prop_, measures_): + _measures_prop_.return_value = measures_ + measures_.population_fraction = 0.42 + cube = CrunchCube(None) + + population_fraction = cube.population_fraction + + assert population_fraction == 0.42 + def it_can_adjust_an_axis_to_help( self, request, adjust_fixture, dimensions_prop_): dimension_types, axis_cases = adjust_fixture @@ -712,22 +721,6 @@ def test_calculate_constraints_sum_raises_value_error_for_bad_axis(self): with self.assertRaises(ValueError): CrunchCube._calculate_constraints_sum(Mock(), Mock(), 2) - def test_cube_counts(self): - cube = CrunchCube({'result': {}}) - assert cube.counts == (None, None) - - fake_count = Mock() - cube = CrunchCube({'result': {'unfiltered': fake_count}}) - assert cube.counts == (fake_count, None) - - cube = CrunchCube({'result': {'filtered': fake_count}}) - assert cube.counts == (None, fake_count) - - cube = CrunchCube( - {'result': {'unfiltered': fake_count, 'filtered': fake_count}} - ) - assert cube.counts == (fake_count, fake_count) - @patch('cr.cube.crunch_cube.CrunchCube.dimensions', None) def test_name_with_no_dimensions(self): fake_cube = {} @@ -867,39 +860,3 @@ def test_inserted_inds(self, mock_inserted_hs_indices, mock_inserted_hs_indices.assert_not_called() actual = cc._inserted_dim_inds([0], 0) mock_inserted_hs_indices.assert_called_once() - - def test_population_fraction(self): - # Assert fraction is 1 when none of the counts are specified - cc = CrunchCube(None) - actual = cc.population_fraction - assert actual == 1 - - # Assert fraction is 1 when only some counts are specified - cc = CrunchCube({'result': {'unfiltered': {'unweighted_n': 10}}}) - assert cc.population_fraction == 1 - cc = CrunchCube({'result': {'unfiltered': {'weighted_n': 10}}}) - assert cc.population_fraction == 1 - cc = CrunchCube({'result': {'unfiltered': { - 'weighted_n': 10, 'unweighted_n': 10}}}) - assert cc.population_fraction == 1 - cc = CrunchCube({'result': {'filtered': { - 'weighted_n': 10, 'unweighted_n': 10}}}) - assert cc.population_fraction == 1 - - # Assert fraction is calculated when correct counts are specified - cc = CrunchCube({ - 'result': { - 'filtered': {'weighted_n': 5}, - 'unfiltered': {'weighted_n': 10}, - } - }) - assert cc.population_fraction == 0.5 - - # Assert fraction is NaN, when denominator is zero - cc = CrunchCube({ - 'result': { - 'filtered': {'weighted_n': 5}, - 'unfiltered': {'weighted_n': 0}, - } - }) - assert np.isnan(cc.population_fraction) From bcc1144e3590e7ec228677b814063f95fb286960 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Tue, 6 Nov 2018 10:31:56 -0800 Subject: [PATCH 26/30] cube: add CrunchCube._measure() --- src/cr/cube/crunch_cube.py | 17 +++++++++++++ tests/unit/test_crunch_cube.py | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 67f030148..58b212d32 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1158,6 +1158,23 @@ def _margin_pruned_indices(margin, inserted_ind, axis): return pruned_ind + def _measure(self, weighted): + """_BaseMeasure subclass representing primary measure for this cube. + + If the cube response includes a means measure, the return value is + means. Otherwise it is counts, with the choice between weighted or + unweighted determined by *weighted*. + + Note that weighted counts are provided on an "as-available" basis. + When *weighted* is True and the cube response is not weighted, + unweighted counts are returned. + """ + return ( + self._measures.means if self._measures.means is not None else + self._measures.weighted_counts if weighted else + self._measures.unweighted_counts + ) + @lazyproperty def _measures(self): """_Measures object for this cube. diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 0bf625408..fb9f2ac6d 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -188,6 +188,16 @@ def it_knows_whether_an_axis_is_marginable_to_help( axis_is_marginable = cube._is_axis_allowed(axis) assert axis_is_marginable is expected_value + def it_selects_the_best_match_measure_to_help( + self, measure_fixture, _measures_prop_): + weighted, measures_, expected_measure_ = measure_fixture + _measures_prop_.return_value = measures_ + cube = CrunchCube(None) + + measure = cube._measure(weighted) + + assert measure is expected_measure_ + def it_provides_access_to_its_Measures_object_to_help( self, _all_dimensions_prop_, all_dimensions_, _Measures_, measures_): @@ -326,6 +336,34 @@ def has_mr_fixture(self, request): mr_dim_indices, expected_value = request.param return mr_dim_indices, expected_value + @pytest.fixture(params=[ + (True, False, False, '_MeanMeasure'), + (True, False, True, '_MeanMeasure'), + (True, True, False, '_MeanMeasure'), + (True, True, True, '_MeanMeasure'), + (False, False, False, '_UnweightedCountMeasure'), + (False, False, True, '_UnweightedCountMeasure'), + (False, True, False, '_UnweightedCountMeasure'), + (False, True, True, '_WeightedCountMeasure'), + ]) + def measure_fixture(self, request, measures_, weighted_count_measure_, + unweighted_count_measure_, mean_measure_): + has_means, weighted, is_weighted, expected_type = request.param + # --weighted indicates the caller has requested that weighted values + # --be used by passing (weighted=True) to method. + measures_.means = mean_measure_ if has_means else None + measures_.weighted_counts = ( + weighted_count_measure_ if is_weighted else + unweighted_count_measure_ + ) + measures_.unweighted_counts = unweighted_count_measure_ + expected_measure = { + '_MeanMeasure': mean_measure_, + '_UnweightedCountMeasure': unweighted_count_measure_, + '_WeightedCountMeasure': weighted_count_measure_ + }[expected_type] + return weighted, measures_, expected_measure + @pytest.fixture(params=[ (True, True), (False, False), @@ -380,6 +418,14 @@ def _measures_prop_(self, request): def mr_dim_ind_prop_(self, request): return property_mock(request, CrunchCube, 'mr_dim_ind') + @pytest.fixture + def unweighted_count_measure_(self, request): + return instance_mock(request, _UnweightedCountMeasure) + + @pytest.fixture + def weighted_count_measure_(self, request): + return instance_mock(request, _WeightedCountMeasure) + class Describe_Measures(object): From 05b6c154e7a50893196d1a88eb92a53bd350e31a Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Tue, 6 Nov 2018 10:53:25 -0800 Subject: [PATCH 27/30] rfctr: remove `margin` param from _as_array() * Remove unused `margin` parameter from CrunchCube.as_array(). If in future such a return value is required it should be provided using a distinct method rather than overloading `.as_array()`. * Remove now unnecessary `margin` parameter from CrunchCube._as_array(). Change call for raw_cube_array to use measure object returned by new `._measure()` method. * Remove now-dead `.raw_cube_array()` method. --- src/cr/cube/crunch_cube.py | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 58b212d32..e1a5982ed 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -86,7 +86,7 @@ def __repr__(self): return super(CrunchCube, self).__repr__() def as_array(self, include_missing=False, weighted=True, - include_transforms_for_dims=None, prune=False, margin=False): + include_transforms_for_dims=None, prune=False): """Return `ndarray` representing cube values. Returns the tabular representation of the crunch cube. The returned @@ -118,8 +118,7 @@ def as_array(self, include_missing=False, weighted=True, array = self._as_array( include_missing=include_missing, weighted=weighted, - include_transforms_for_dims=include_transforms_for_dims, - margin=margin + include_transforms_for_dims=include_transforms_for_dims ) # ---prune array if pruning was requested--- @@ -869,26 +868,20 @@ def _apply_missings_and_insertions(self, res, include_transforms_for_dims, return res[np.ix_(*new_valids)] if new_valids else res def _as_array(self, include_missing=False, get_non_selected=False, - weighted=True, include_transforms_for_dims=False, - margin=False): + weighted=True, include_transforms_for_dims=False): """Get crunch cube as ndarray. Args include_missing (bool): Include rows/cols for missing values. get_non_selected (bool): Get non-selected slices for MR vars. weighted (bool): Take weighted or unweighted counts. - adjusted (bool): If adjusted, add + 1 to the resulting array. This - is needed when calculating statistical significance. include_transforms_for_dims (list): For which dims to include headings & subtotals (H&S) transformations. - margin (bool): Designates whether array is intented for margin - calculation. This essentially tells the CrunchCube to use - counts (and not means, or other measures) as the result. Returns res (ndarray): Tabular representation of crunch cube """ return self._apply_missings_and_insertions( - self._raw_cube_array(weighted, margin), + self._measure(weighted).raw_cube_array, include_transforms_for_dims, include_missing=include_missing ) @@ -1380,22 +1373,6 @@ def _pruning_base(self, axis=None, hs_dims=None): include_transforms_for_dims=hs_dims, ) - def _raw_cube_array(self, weighted, margin): - """Return ndarray of measure values from cube-response. - - The shape of the ndarray mirrors the shape of the (raw) cube - response. Specifically, in includes values for missing elements, any - MR_CAT dimensions, and any prunable rows and columns. - - The choice among available measures in the cube response is - determined by *weighted* and *margin*, according to the same rules as - `._flat_values()`. - """ - return ( - np.array(self._flat_values(weighted, margin)) - .reshape(self._all_dimensions.shape) - ) - @lazyproperty def _shape(self): return tuple([dim.shape for dim in self._all_dimensions]) From 195470e6cee765ef5ea45d3068e19d8053f86f73 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Tue, 6 Nov 2018 10:43:30 -0800 Subject: [PATCH 28/30] cube: add CrunchCube._counts() --- src/cr/cube/crunch_cube.py | 13 +++++++++++++ tests/unit/test_crunch_cube.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index e1a5982ed..bf0cb4f28 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -940,6 +940,19 @@ def _calculate_std_res(self, counts, total, colsum, rowsum, slice_): def _col_direction_axis(self): return self.ndim - 2 + def _counts(self, weighted): + """Return _BaseMeasure subclass for *weighted* counts. + + The return value is a _WeightedCountMeasure object if *weighted* is + True and the cube response is weighted. Otherwise it is an + _UnweightedCountMeasure object. Any means measure that may be present + is not considered. Contrast with `._measure()` below. + """ + return ( + self._measures.weighted_counts if weighted else + self._measures.unweighted_counts + ) + @staticmethod def _create_mask(res, row_prune_inds, col_prune_inds): mask_rows = np.repeat( diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index fb9f2ac6d..e6487e34f 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -188,6 +188,16 @@ def it_knows_whether_an_axis_is_marginable_to_help( axis_is_marginable = cube._is_axis_allowed(axis) assert axis_is_marginable is expected_value + def it_selects_the_best_match_counts_measure_to_help( + self, counts_fixture, _measures_prop_): + weighted, measures_, expected_measure_ = counts_fixture + _measures_prop_.return_value = measures_ + cube = CrunchCube(None) + + measure = cube._counts(weighted) + + assert measure is expected_measure_ + def it_selects_the_best_match_measure_to_help( self, measure_fixture, _measures_prop_): weighted, measures_, expected_measure_ = measure_fixture @@ -317,6 +327,28 @@ def count_fixture(self, request, _measures_prop_, measures_): measures_.weighted_n = 48.732 return weighted, expected_value + @pytest.fixture(params=[ + (False, False, '_UnweightedCountMeasure'), + (False, True, '_UnweightedCountMeasure'), + (True, False, '_UnweightedCountMeasure'), + (True, True, '_WeightedCountMeasure'), + ]) + def counts_fixture(self, request, measures_, weighted_count_measure_, + unweighted_count_measure_): + weighted, is_weighted, expected_type = request.param + # --weighted indicates the caller has requested that weighted values + # --be used by passing (weighted=True) to method. + measures_.weighted_counts = ( + weighted_count_measure_ if is_weighted else + unweighted_count_measure_ + ) + measures_.unweighted_counts = unweighted_count_measure_ + expected_measure = { + '_UnweightedCountMeasure': unweighted_count_measure_, + '_WeightedCountMeasure': weighted_count_measure_ + }[expected_type] + return weighted, measures_, expected_measure + @pytest.fixture(params=[ (True, True), (False, False), From 7068262ce5733725adc5245ac66ee3315f574c77 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Tue, 6 Nov 2018 11:04:25 -0800 Subject: [PATCH 29/30] rfctr: replace CrunchCube._data() * Replace calls to CrunchCube._data() with calls to ._counts() or ._measure() depending on context, using the .raw_cube_array property of the returned measure object to access the appropriate array formerly provided by ._data(). * Remove CrunchCube._data() and its helpers `._flat_values()` and `._shape` as they are now dead code. --- src/cr/cube/crunch_cube.py | 38 ++------------------------------------ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index bf0cb4f28..0e2b4eac9 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -329,7 +329,7 @@ def hs_dims_for_den(hs_dims, axis): axis = [axis] return [dim for dim in hs_dims if dim not in axis] - table = self._data(weighted=weighted, margin=True) + table = self._counts(weighted).raw_cube_array new_axis = self._adjust_axis(axis) index = tuple( None if i in new_axis else slice(None) @@ -587,7 +587,7 @@ def hs_dims_for_den(hs_dims, axis): axis = [axis] return [dim for dim in hs_dims if dim not in axis] - table = self._data(weighted) + table = self._measure(weighted).raw_cube_array new_axis = self._adjust_axis(axis) index = tuple( None if i in new_axis else slice(None) @@ -981,17 +981,6 @@ def _cube_dict(self): '(str) or dict.' % type(self._cube_response_arg).__name__ ) - def _data(self, weighted, margin=False): - """Get the data in non-flattened shape. - - Converts the flattened shape (original response) into non-flattened - shape (count of elements per cube dimension). E.g. for a CAT x CAT - cube, with 2 categories in each dimension (variable), we end up with - a ndarray of shape (2, 2). - """ - values = self._flat_values(weighted, margin) - return np.array(values).reshape(self._shape) - def _drop_mr_cat_dims(self, array, fix_valids=False): """Return ndarray reflecting *array* with MR_CAT dims dropped. @@ -1055,25 +1044,6 @@ def _fix_valid_indices(cls, valid_indices, insertion_index, dim): valid_indices[dim] = indices.tolist() return valid_indices - def _flat_values(self, weighted, margin=False): - """Return list of measure values as found in cube response. - - If *weighted* is True, weighted counts are returned if present in the - cube. Otherwise, unweighted counts are returned. If *margin* is True, - counts are returned even if mean values are present, which may be - preferred for example when calculating a margin. - """ - values = self._cube_dict['result']['counts'] - if self.has_means and not margin: - mean = self._cube_dict['result']['measures'].get('mean', {}) - values = mean.get('data', values) - elif weighted and self.is_weighted: - count = self._cube_dict['result']['measures'].get('count', {}) - values = count.get('data', values) - values = [(val if not type(val) is dict else np.nan) - for val in values] - return values - def _inserted_dim_inds(self, transform_dims, axis): dim_ind = axis if self.ndim < 3 else axis + 1 if not transform_dims or dim_ind not in transform_dims: @@ -1386,10 +1356,6 @@ def _pruning_base(self, axis=None, hs_dims=None): include_transforms_for_dims=hs_dims, ) - @lazyproperty - def _shape(self): - return tuple([dim.shape for dim in self._all_dimensions]) - def _update_result(self, result, insertions, dimension_index, valid_indices): """Insert subtotals into resulting ndarray.""" From 6e10548f20f91f8c6f53a7afbbcfb0e361d98cd8 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Tue, 6 Nov 2018 11:31:11 -0800 Subject: [PATCH 30/30] test: scrub overall test coverage * Add `make unit-coverage` task to Makefile that shows our unit test coverage is pretty spotty, indicating an over-reliance on integration tests (which do not test fine-grained behaviors). --- tests/fixtures/scale_means/__init__.py | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 tests/fixtures/scale_means/__init__.py diff --git a/tests/fixtures/scale_means/__init__.py b/tests/fixtures/scale_means/__init__.py deleted file mode 100644 index f0ffedf3d..000000000 --- a/tests/fixtures/scale_means/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -import os -from functools import partial - -from cr.cube.util import load_fixture - -CUBES_DIR = os.path.dirname(os.path.abspath(__file__)) - - -def _load(cube_file): - load = partial(load_fixture, CUBES_DIR) - return load(cube_file) - - -CA_CAT_X_ITEMS = _load('ca-cat-x-items.json') -CA_ITEMS_X_CAT = _load('ca-items-x-cat.json') -CA_X_MR = _load('ca-x-mr.json') -CAT_X_CA_CAT_X_ITEMS = _load('cat-x-ca-cat-x-items.json') -CAT_X_CAT = _load('cat-x-cat.json') -CAT_X_MR = _load('cat-x-mr.json') -MR_X_CAT = _load('mr-x-cat.json') -UNIVARIATE_CAT = _load('univariate-cat.json') -CAT_X_CAT_SM_MARGIN = _load('cat-x-cat-sm-margin.json')