From af57370c47b232295bed465816f9b31c4fa49e74 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 24 Jul 2018 14:58:13 +0200 Subject: [PATCH 1/3] Failing test for CA(it) x CA(cat) x CAT index --- tests/integration/fixtures/__init__.py | 1 + .../cubes/ca-items-x-ca-cat-x-cat.json | 657 ++++++++++++++++++ tests/integration/test_index.py | 44 ++ 3 files changed, 702 insertions(+) create mode 100644 tests/integration/fixtures/cubes/ca-items-x-ca-cat-x-cat.json diff --git a/tests/integration/fixtures/__init__.py b/tests/integration/fixtures/__init__.py index 39ef11e01..2fd8f344f 100644 --- a/tests/integration/fixtures/__init__.py +++ b/tests/integration/fixtures/__init__.py @@ -140,3 +140,4 @@ def _load(cube_file): HUFFPOST_ACTIONS_X_HOUSEHOLD = _load('huffpost-actions-x-household.json') GENDER_X_WEIGHT = _load('gender-x-weight.json') CAT_X_CAT_PRUNING_HS = _load('cat-x-cat-pruning-hs.json') +CA_ITEMS_X_CA_CAT_X_CAT = _load('ca-items-x-ca-cat-x-cat.json') diff --git a/tests/integration/fixtures/cubes/ca-items-x-ca-cat-x-cat.json b/tests/integration/fixtures/cubes/ca-items-x-ca-cat-x-cat.json new file mode 100644 index 000000000..8cfb81544 --- /dev/null +++ b/tests/integration/fixtures/cubes/ca-items-x-ca-cat-x-cat.json @@ -0,0 +1,657 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/1433e16a07474ec1b1b3c0fbb0acd3c8/cube/?filter=%5B%5D&query=%7B%22dimensions%22:%5B%7B%22each%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2F1433e16a07474ec1b1b3c0fbb0acd3c8%2Fvariables%2F00000c%2F%22%7D,%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2F1433e16a07474ec1b1b3c0fbb0acd3c8%2Fvariables%2F00000c%2F%22%7D,%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2F1433e16a07474ec1b1b3c0fbb0acd3c8%2Fvariables%2F000000%2F%22%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2F1433e16a07474ec1b1b3c0fbb0acd3c8%2Fvariables%2F88908b0a6be04128be2fb1d69d0a0bcf%2F%22%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "each": "https://app.crunch.io/api/datasets/1433e16a07474ec1b1b3c0fbb0acd3c8/variables/00000c/" + }, + { + "variable": "https://app.crunch.io/api/datasets/1433e16a07474ec1b1b3c0fbb0acd3c8/variables/00000c/" + }, + { + "variable": "https://app.crunch.io/api/datasets/1433e16a07474ec1b1b3c0fbb0acd3c8/variables/000000/" + } + ], + "weight": "https://app.crunch.io/api/datasets/1433e16a07474ec1b1b3c0fbb0acd3c8/variables/88908b0a6be04128be2fb1d69d0a0bcf/" + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": true, + "references": { + "description": null, + "subreferences": [ + { + "alias": "ca_subvar_1", + "name": "ca_subvar_1", + "description": null + }, + { + "alias": "ca_subvar_2", + "name": "ca_subvar_2", + "description": null + }, + { + "alias": "ca_subvar_3", + "name": "ca_subvar_3", + "description": null + } + ], + "name": "categorical_array", + "alias": "categorical_array", + "is_dichotomous": false, + "view": { + "show_counts": false, + "transform": { + "insertions": [ + { + "function": "subtotal", + "args": [ + 1, + 2 + ], + "name": "TOP AB", + "anchor": "top" + }, + { + "function": "subtotal", + "args": [ + 2, + 3 + ], + "name": "MID BC", + "anchor": 2 + }, + { + "function": "subtotal", + "args": [ + 4, + 3 + ], + "name": "BOT CD", + "anchor": "bottom" + } + ] + }, + "include_missing": false, + "column_width": null + } + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "ca_subvar_1", + "name": "ca_subvar_1", + "description": null + }, + "id": "0007", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "missing": false, + "id": 1, + "name": "a" + }, + { + "numeric_value": null, + "missing": false, + "id": 2, + "name": "b" + }, + { + "numeric_value": null, + "missing": false, + "id": 3, + "name": "c" + }, + { + "numeric_value": null, + "missing": false, + "id": 4, + "name": "d" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "ca_subvar_2", + "name": "ca_subvar_2", + "description": null + }, + "id": "0008", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "missing": false, + "id": 1, + "name": "a" + }, + { + "numeric_value": null, + "missing": false, + "id": 2, + "name": "b" + }, + { + "numeric_value": null, + "missing": false, + "id": 3, + "name": "c" + }, + { + "numeric_value": null, + "missing": false, + "id": 4, + "name": "d" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "ca_subvar_3", + "name": "ca_subvar_3", + "description": null + }, + "id": "0009", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "missing": false, + "id": 1, + "name": "a" + }, + { + "numeric_value": null, + "missing": false, + "id": 2, + "name": "b" + }, + { + "numeric_value": null, + "missing": false, + "id": 3, + "name": "c" + }, + { + "numeric_value": null, + "missing": false, + "id": 4, + "name": "d" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "derived": false, + "references": { + "description": null, + "subreferences": [ + { + "alias": "ca_subvar_1", + "name": "ca_subvar_1", + "description": null + }, + { + "alias": "ca_subvar_2", + "name": "ca_subvar_2", + "description": null + }, + { + "alias": "ca_subvar_3", + "name": "ca_subvar_3", + "description": null + } + ], + "name": "categorical_array", + "alias": "categorical_array", + "is_dichotomous": false, + "view": { + "show_counts": false, + "transform": { + "insertions": [ + { + "function": "subtotal", + "args": [ + 1, + 2 + ], + "name": "TOP AB", + "anchor": "top" + }, + { + "function": "subtotal", + "args": [ + 2, + 3 + ], + "name": "MID BC", + "anchor": 2 + }, + { + "function": "subtotal", + "args": [ + 4, + 3 + ], + "name": "BOT CD", + "anchor": "bottom" + } + ] + }, + "include_missing": false, + "column_width": null + } + }, + "type": { + "ordinal": false, + "subvariables": [ + "0007", + "0008", + "0009" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "missing": false, + "id": 1, + "name": "a" + }, + { + "numeric_value": null, + "missing": false, + "id": 2, + "name": "b" + }, + { + "numeric_value": null, + "missing": false, + "id": 3, + "name": "c" + }, + { + "numeric_value": null, + "missing": false, + "id": 4, + "name": "d" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "derived": false, + "references": { + "alias": "x", + "view": { + "show_counts": false, + "show_numeric_values": false, + "transform": { + "insertions": [ + { + "name": "test bottom heading only", + "anchor": "bottom" + } + ] + }, + "include_missing": false, + "column_width": null + }, + "description": "Numeric variable with value labels", + "name": "x" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "red" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "green" + }, + { + "numeric_value": 3, + "missing": false, + "id": 3, + "name": "blue" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "4" + }, + { + "numeric_value": 8, + "missing": true, + "id": 8, + "name": "8" + }, + { + "numeric_value": 9, + "missing": false, + "id": 9, + "name": "9" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 1, + "measures": { + "count": { + "data": [ + 1.0, + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "n_missing": 1, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": false, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 1, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n": 6 + } + } +} \ No newline at end of file diff --git a/tests/integration/test_index.py b/tests/integration/test_index.py index 5dea8190e..a5b8f6075 100644 --- a/tests/integration/test_index.py +++ b/tests/integration/test_index.py @@ -7,6 +7,7 @@ from .fixtures import PETS_X_PETS from .fixtures import SELECTED_CROSSTAB_4 from .fixtures import CAT_X_MR_X_MR +from .fixtures import CA_ITEMS_X_CA_CAT_X_CAT class TestIndex(TestCase): @@ -48,3 +49,46 @@ def test_cat_mr_x_mr_index(self): expected = np.array([]) actual = cube.index() np.testing.assert_almost_equal(actual, expected) + + def test_ca_items_x_ca_cat_x_cat_index(self): + cube = CrunchCube(CA_ITEMS_X_CA_CAT_X_CAT) + + # Test index values + expected = np.array([ + [[ 1., 1., np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan]], + + [[ 2., 0., np.nan, np.nan, np.nan], + [ 0., 2., np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan]], + + [[np.nan, np.nan, np.nan, np.nan, np.nan], + [ 2., 0., np.nan, np.nan, np.nan], + [ 0., 2., np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan]], + ]) + actual = cube.index() + np.testing.assert_almost_equal(actual, expected) + + # Test pruning mask + expected = np.array([ + [[ False, False, True, True, True], + [True, True, True, True, True], + [True, True, True, True, True], + [True, True, True, True, True]], + + [[ False, False, True, True, True], + [ False, False, True, True, True], + [True, True, True, True, True], + [True, True, True, True, True]], + + [[True, True, True, True, True], + [ False, False, True, True, True], + [ False, False, True, True, True], + [True, True, True, True, True]], + ]) + actual = cube.index(prune=True).mask + np.testing.assert_array_equal(actual, expected) From 1ddc60a3cc0a3ae2a69631bb1bcf5215058cabdb Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 24 Jul 2018 14:58:51 +0200 Subject: [PATCH 2/3] Enable slicing for index --- src/cr/cube/measures/index.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/cr/cube/measures/index.py b/src/cr/cube/measures/index.py index 47e9194e4..b6fe58e0f 100644 --- a/src/cr/cube/measures/index.py +++ b/src/cr/cube/measures/index.py @@ -32,16 +32,25 @@ def prune(self): @lazyproperty def data(self): '''Return table index by margin.''' - if self.cube.has_mr: - return self._mr_index() - margin = ( - self.cube.margin(axis=0, weighted=self.weighted, prune=self.prune) / - self.cube.margin(weighted=self.weighted, prune=self.prune) - ) - proportions = self.cube.proportions( - axis=1, weighted=self.weighted, prune=self.prune - ) - return proportions / margin + result = [] + for slice_ in self.cube.slices: + if self.cube.has_mr: + return self._mr_index() + margin = ( + slice_.margin(axis=0, weighted=self.weighted, prune=self.prune) / + slice_.margin(weighted=self.weighted, prune=self.prune) + ) + proportions = slice_.proportions( + axis=1, weighted=self.weighted, prune=self.prune + ) + result.append(proportions / margin) + + if len(result) == 1 and self.cube.ndim < 3: + result = result[0] + else: + result = np.ma.masked_array(result) if self.prune else np.array(result) + + return result def _mr_index(self): # mr by mr From 50ce06bf9982cfc04305eec44956b9f8f438e745 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 9 Aug 2018 13:25:41 +0200 Subject: [PATCH 3/3] Fix mask application to slice indices --- src/cr/cube/measures/index.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/measures/index.py b/src/cr/cube/measures/index.py index b6fe58e0f..f7a56ef8c 100644 --- a/src/cr/cube/measures/index.py +++ b/src/cr/cube/measures/index.py @@ -48,7 +48,11 @@ def data(self): if len(result) == 1 and self.cube.ndim < 3: result = result[0] else: - result = np.ma.masked_array(result) if self.prune else np.array(result) + if self.prune: + mask = np.array([slice_.mask for slice_ in result]) + result = np.ma.masked_array(result, mask) + else: + result = np.array(result) return result