From 6fb7e6263a1ef42b75dbd0f2c914ba2057c7d214 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 30 Jan 2018 09:41:02 +0100 Subject: [PATCH 1/7] Refactor shape fixing into public method - Extract '_fix_shape' call from '_as_array' (internal) to 'as_array' (API) - Enable other methods that use '_as_array' to preserve information about the true shape of the table - This allows the numpy.sum (in particular the axis parameter) to operate on all dimensions consistently (no dimensions are lost while processing) - Only flatten dimensions when actually returning data from cr.cube --- src/cr/cube/crunch_cube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 07f7403e9..88aa99dc2 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -797,13 +797,14 @@ def as_array(self, include_missing=False, weighted=True, adjusted=False, [0, 0, 0, 0], ]) ''' - return self._as_array( + array = self._as_array( include_missing=include_missing, weighted=weighted, adjusted=adjusted, include_transforms_for_dims=include_transforms_for_dims, prune=prune, ) + return self._fix_shape(array) def margin(self, axis=None, weighted=True, include_transforms_for_dims=None, prune=False): From 9b4f811c53ee0390947f39f7a84d744a6a379e31 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 30 Jan 2018 10:16:04 +0100 Subject: [PATCH 2/7] Add tests for 3D cube margins (CA x (single) CAT) --- src/cr/cube/crunch_cube.py | 7 +- tests/integration/fixtures/__init__.py | 1 + .../fixtures/cubes/ca-x-single-cat.json | 343 ++++++++++++++++++ tests/integration/test_crunch_cube.py | 19 + 4 files changed, 369 insertions(+), 1 deletion(-) create mode 100644 tests/integration/fixtures/cubes/ca-x-single-cat.json diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 88aa99dc2..f4b23956e 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -566,7 +566,12 @@ def _margin(self, axis=None, weighted=True, adjusted=False, ) array = self._inflate_dim(array, axis) - res = np.sum(array, axis) + if axis and not isinstance(axis, tuple) and axis > len(array.shape) - 1: + # Handle special case when a dimension is lost due to a + # single element. + res = array + else: + res = np.sum(array, axis) if prune: # Remove values if 0 or np.nan diff --git a/tests/integration/fixtures/__init__.py b/tests/integration/fixtures/__init__.py index 20d8a8f09..811e88b64 100644 --- a/tests/integration/fixtures/__init__.py +++ b/tests/integration/fixtures/__init__.py @@ -125,3 +125,4 @@ FIXT_FRUIT_X_PETS_HS_TOP_BOTTOM = load_fixture( CUBES_DIR, 'fruit-x-pets-hs-top-bottom.json' ) +FIXT_CA_X_SINGLE_CAT = load_fixture(CUBES_DIR, 'ca-x-single-cat.json') diff --git a/tests/integration/fixtures/cubes/ca-x-single-cat.json b/tests/integration/fixtures/cubes/ca-x-single-cat.json new file mode 100644 index 000000000..8395f05cd --- /dev/null +++ b/tests/integration/fixtures/cubes/ca-x-single-cat.json @@ -0,0 +1,343 @@ +{ + "element": "shoji:view", + "self": "https://alpha.crunch.io/api/datasets/0063c146f72945b7a54dbe7edb35e15c/cube/?filter=%5B%5D&query=%7B%22dimensions%22:%5B%7B%22each%22:%22https:%2F%2Falpha.crunch.io%2Fapi%2Fdatasets%2F0063c146f72945b7a54dbe7edb35e15c%2Fvariables%2F00000b%2F%22%7D,%7B%22variable%22:%22https:%2F%2Falpha.crunch.io%2Fapi%2Fdatasets%2F0063c146f72945b7a54dbe7edb35e15c%2Fvariables%2F00000b%2F%22%7D,%7B%22variable%22:%22https:%2F%2Falpha.crunch.io%2Fapi%2Fdatasets%2F0063c146f72945b7a54dbe7edb35e15c%2Fvariables%2F3dc48bd3ebb74966ab55de5f69e47561%2F%22%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:null%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "each": "https://alpha.crunch.io/api/datasets/0063c146f72945b7a54dbe7edb35e15c/variables/00000b/" + }, + { + "variable": "https://alpha.crunch.io/api/datasets/0063c146f72945b7a54dbe7edb35e15c/variables/00000b/" + }, + { + "variable": "https://alpha.crunch.io/api/datasets/0063c146f72945b7a54dbe7edb35e15c/variables/3dc48bd3ebb74966ab55de5f69e47561/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": true, + "references": { + "alias": "pets_array", + "subreferences": [ + { + "alias": "a_cat", + "name": "cat", + "description": "a_cat" + }, + { + "alias": "a_dog", + "name": "dog", + "description": "a_dog" + }, + { + "alias": "a_wombat", + "name": "wombat", + "description": "a_wombat" + } + ], + "is_dichotomous": false, + "description": "Pets Array", + "name": "pets_array" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "a_cat", + "name": "cat", + "description": "a_cat" + }, + "id": "0003", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "not selected" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "selected" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "a_dog", + "name": "dog", + "description": "a_dog" + }, + "id": "0004", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "not selected" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "selected" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "a_wombat", + "name": "wombat", + "description": "a_wombat" + }, + "id": "0005", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "not selected" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "selected" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "derived": false, + "references": { + "alias": "pets_array", + "is_dichotomous": false, + "description": "Pets Array", + "name": "pets_array", + "subreferences": [ + { + "alias": "a_cat", + "name": "cat", + "description": "a_cat" + }, + { + "alias": "a_dog", + "name": "dog", + "description": "a_dog" + }, + { + "alias": "a_wombat", + "name": "wombat", + "description": "a_wombat" + } + ] + }, + "type": { + "ordinal": false, + "subvariables": [ + "0003", + "0004", + "0005" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "not selected" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "selected" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "derived": true, + "references": { + "alias": "Fruit Single CAT", + "name": "Fruit Single CAT", + "description": "Fruits" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "missing": false, + "id": 1, + "name": "rambutan" + }, + { + "numeric_value": null, + "missing": true, + "id": 2, + "name": "satsuma" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 70, + "measures": { + "count": { + "data": [ + 13, + 32, + 0, + 12, + 22, + 0, + 8, + 13, + 0, + 16, + 24, + 0, + 12, + 28, + 0, + 5, + 15, + 0, + 11, + 21, + 0, + 12, + 26, + 0, + 10, + 20, + 0 + ], + "n_missing": 70, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 13, + 32, + 0, + 12, + 22, + 0, + 8, + 13, + 0, + 16, + 24, + 0, + 12, + 28, + 0, + 5, + 15, + 0, + 11, + 21, + 0, + 12, + 26, + 0, + 10, + 20, + 0 + ], + "n": 100 + } + } +} diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index edda18e5c..b669353b2 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -43,6 +43,7 @@ from .fixtures import FIXT_SELECTED_3_WAY_2 from .fixtures import FIXT_SELECTED_3_WAY from .fixtures import FIXT_SINGLE_CAT_MEANS +from .fixtures import FIXT_CA_X_SINGLE_CAT from cr.cube.crunch_cube import CrunchCube @@ -2013,3 +2014,21 @@ def test_ca_with_single_cat_pruning(self): expected = np.array([79, 80, 70]) actual = cube.as_array(weighted=False, prune=True) np.testing.assert_almost_equal(actual, expected) + + def test_ca_x_single_cat_col_margins(test): + cube = CrunchCube(FIXT_CA_X_SINGLE_CAT) + expected = np.array([25, 28, 23]) + # Axis equals to 1, because col direction in 3D cube is 1 (and not 0). + # It operates on the 0th dimension of each slice (which is 1st + # dimension of the cube). + actual = cube.margin(axis=1) + np.testing.assert_array_equal(actual, expected) + + def test_ca_x_single_cat_row_margins(test): + cube = CrunchCube(FIXT_CA_X_SINGLE_CAT) + expected = np.array([[13, 12], [16, 12], [11, 12]]) + # Axis equals to 2, because col direction in 3D cube is 2 (and not 1). + # It operates on the 1st dimension of each slice (which is 2nd + # dimension of the cube). + actual = cube.margin(axis=2) + np.testing.assert_array_equal(actual, expected) From 6d88e7faaa528affd6a25c008f30193912d61314 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 30 Jan 2018 15:35:24 +0100 Subject: [PATCH 3/7] Add test for 3D cube (CA x (single) CAT) proportions by cell --- tests/integration/test_crunch_cube.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index b669353b2..14aa58add 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -2032,3 +2032,10 @@ def test_ca_x_single_cat_row_margins(test): # dimension of the cube). actual = cube.margin(axis=2) np.testing.assert_array_equal(actual, expected) + + def test_ca_x_single_cat_cell_margins(test): + cube = CrunchCube(FIXT_CA_X_SINGLE_CAT) + expected = np.array([25, 28, 23]) + # Axis equals to (1, 2), because the total is calculated for each slice. + actual = cube.margin(axis=(1, 2)) + np.testing.assert_array_equal(actual, expected) From da7a8fc772c8242a5a6ffb0a4c33b73404e7fc7b Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 30 Jan 2018 15:35:48 +0100 Subject: [PATCH 4/7] Fix edge case for flattened 3D cube (proportions by cell) - When a CAT dimension of a CA x CAT contains only 1 element - The result table drops one dimension - The axis parameter (1, 2) needs to be converted to 1 --- src/cr/cube/crunch_cube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index f4b23956e..4b8227e7c 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -570,6 +570,8 @@ def _margin(self, axis=None, weighted=True, adjusted=False, # Handle special case when a dimension is lost due to a # single element. res = array + elif axis == (1, 2) and len(array.shape) <= 2: + res = np.sum(array, 1) else: res = np.sum(array, axis) From f61011a15df0d75e6ba8eb681304bb4974a82855 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Wed, 31 Jan 2018 09:11:02 +0100 Subject: [PATCH 5/7] Update tests for edge case 3D cube, and fix shape of proportions --- src/cr/cube/crunch_cube.py | 7 +++-- tests/integration/test_crunch_cube.py | 45 ++++++++++++++++++++++++++- tests/unit/test_crunch_cube.py | 13 ++++++-- 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 4b8227e7c..800dd28ca 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -649,8 +649,9 @@ def _proportions(self, axis=None, weighted=True, adjusted=False, elif axis == 2: margin = margin[:, :, np.newaxis] - array = self.as_array( - include_missing=include_missing, weighted=weighted, + array = self._as_array( + include_missing=include_missing, + weighted=weighted, adjusted=adjusted, include_transforms_for_dims=include_transforms_for_dims, prune=prune, @@ -663,7 +664,7 @@ def _proportions(self, axis=None, weighted=True, adjusted=False, len(getattr(margin, 'shape', [])) == 1): margin = margin[:, np.newaxis, np.newaxis] - return array / margin + return self._fix_shape(array / margin) # Properties diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 14aa58add..d79706d71 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -2013,11 +2013,54 @@ def test_ca_with_single_cat_pruning(self): # pruning, which is not the responsibility of cr.cube. expected = np.array([79, 80, 70]) actual = cube.as_array(weighted=False, prune=True) + np.testing.assert_array_equal(actual, expected) + + def test_ca_x_single_cat_counts(test): + cube = CrunchCube(FIXT_CA_X_SINGLE_CAT) + expected = np.array([[13, 12], [16, 12], [11, 12]]) + actual = cube.as_array() + np.testing.assert_array_equal(actual, expected) + + def test_ca_x_single_cat_props_by_col(test): + cube = CrunchCube(FIXT_CA_X_SINGLE_CAT) + expected = np.array([ + [0.52, 0.48], + [0.57142857, 0.42857143], + [0.47826087, 0.52173913], + ]) + # Col direction is 1 (and not 0) because of 3D cube. + actual = cube.proportions(axis=1) + np.testing.assert_almost_equal(actual, expected) + + def test_ca_x_single_cat_props_by_row(test): + cube = CrunchCube(FIXT_CA_X_SINGLE_CAT) + expected = np.array([ + [1., 1.], + [1., 1.], + [1., 1.], + ]) + # Col direction is 2 (and not 1) because of 3D cube. + actual = cube.proportions(axis=2) + np.testing.assert_almost_equal(actual, expected) + + def test_ca_x_single_cat_props_by_cell(test): + cube = CrunchCube(FIXT_CA_X_SINGLE_CAT) + expected = np.array([ + [0.52, 0.48], + [0.57142857, 0.42857143], + [0.47826087, 0.52173913], + ]) + # Col direction is (1, 2) because 3D cube (total per slice). + actual = cube.proportions(axis=(1, 2)) np.testing.assert_almost_equal(actual, expected) def test_ca_x_single_cat_col_margins(test): cube = CrunchCube(FIXT_CA_X_SINGLE_CAT) - expected = np.array([25, 28, 23]) + expected = np.array([ + [25], + [28], + [23], + ]) # Axis equals to 1, because col direction in 3D cube is 1 (and not 0). # It operates on the 0th dimension of each slice (which is 1st # dimension of the cube). diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index e33de2b43..65e2603ce 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -137,13 +137,20 @@ def test_fix_valid_indices_zero_position(self): self.assertEqual(actual, expected) @patch('cr.cube.crunch_cube.CrunchCube.has_mr', False) - @patch('cr.cube.crunch_cube.CrunchCube.as_array') + @patch('cr.cube.crunch_cube.CrunchCube._fix_shape') + @patch('cr.cube.crunch_cube.CrunchCube._as_array') @patch('cr.cube.crunch_cube.CrunchCube.is_double_mr') @patch('cr.cube.crunch_cube.CrunchCube._margin') - def test_transform_param_propagation(self, mock_margin, - mock_is_double_mr, mock_as_array): + def test_transform_param_propagation( + self, + mock_margin, + mock_is_double_mr, + mock_as_array, + mock_fix_shape + ): mock_margin.return_value = 1 # Prevent 'proportions' from crashing mock_is_double_mr.return_value = False + mock_fix_shape.return_value = False mock_as_array.return_value = 0 cube = CrunchCube({}) From f119cd8073614c181f46332f7c3a9f07fb40f1d9 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 1 Feb 2018 18:27:29 +0100 Subject: [PATCH 6/7] Remove obsolete _fix_shape after refactor --- src/cr/cube/crunch_cube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 800dd28ca..40437337a 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -290,7 +290,7 @@ def _as_array(self, include_missing=False, get_non_selected=False, get_non_selected) res = np.array(values).reshape(shape) res = self._transform(res, include_transforms_for_dims, valid_indices) - res = self._fix_shape(res) + (1 if adjusted else 0) + res = res + adjusted if prune and not self.has_mr: return self._prune(res, include_transforms_for_dims) From 38364647bc5f37f64b91fedefdcfc50f4a5a03c3 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 1 Feb 2018 19:35:51 +0100 Subject: [PATCH 7/7] Address PR comments --- src/cr/cube/crunch_cube.py | 3 --- src/cr/cube/dimension.py | 25 ++++++++++--------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 40437337a..f5de609d3 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -240,10 +240,8 @@ def _transform(self, res, include_transforms_for_dims, valid_indices): ind_insertion = indices['anchor_ind'] + ind_offset if i == 0: value = sum(res[ind_subtotal_elements]) - # res = np.insert(res, ind_insertion + 1, value, axis=i) else: value = np.sum(res[:, ind_subtotal_elements], axis=1) - # res = np.insert(res, ind_insertion + 1, value, axis=i) insertions.append((ind_insertion, value)) for j, (ind_insertion, value) in enumerate(insertions): @@ -548,7 +546,6 @@ def _inflate_dim(self, array, axis): def _margin(self, axis=None, weighted=True, adjusted=False, include_transforms_for_dims=None, prune=False): - '''Calculate cube margin.''' # MR margins are calculated differently, so they need a separate method # for them. A good example of this is the rcrunch functionality. diff --git a/src/cr/cube/dimension.py b/src/cr/cube/dimension.py index 6cf7c3968..a0f081d7c 100644 --- a/src/cr/cube/dimension.py +++ b/src/cr/cube/dimension.py @@ -99,21 +99,17 @@ def _elements(self): @property def hs_indices(self): '''Headers and Subtotals indices.''' + elms = self._elements + indices = [{ - 'anchor_ind': [ - i - for (i, el) in enumerate(self._elements) - if el['id'] == subtotal.anchor - ][0] - if (subtotal.anchor in [el['id'] for el in self._elements]) else - subtotal.anchor, - # -1, - 'inds': [ - i - for (i, el) in enumerate(self._elements) - if el['id'] in subtotal.args - ], - } for subtotal in self.subtotals] + 'anchor_ind': ( + [i for (i, el) in enumerate(elms) if el['id'] == st.anchor][0] + if (st.anchor in [el['id'] for el in elms]) else + st.anchor + ), + 'inds': [i for (i, el) in enumerate(elms) if el['id'] in st.args], + } for st in self.subtotals] + return indices @property @@ -179,7 +175,6 @@ def labels(self, include_missing=False, include_transforms=False, ind_insert = next(( i for (i, x) in enumerate(labels_with_cat_ids) if x.get('id') == subtotal.anchor - # ), -1) + 1 ), subtotal.anchor) if ind_insert == 'top': ind_insert = 0