From d6a4e05ec0d48277bd2dfb228eb95cad68c2fbbc Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Mon, 9 Jul 2018 16:20:32 +0200 Subject: [PATCH 01/14] [#154065274]: Edge case for CAT x MR --- src/cr/cube/measures/scale_means.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/cr/cube/measures/scale_means.py b/src/cr/cube/measures/scale_means.py index b48b5f35c..25db2c8a6 100644 --- a/src/cr/cube/measures/scale_means.py +++ b/src/cr/cube/measures/scale_means.py @@ -61,9 +61,14 @@ def contents_inds(self): ] def _inner_prod(self, contents, values): - if len(contents.shape) == 3 and self._cube.ca_dim_ind == 0: + inflate_values = ( + self._cube.ca_dim_ind == 0 + if len(contents.shape) == 3 else + self._cube.mr_dim_ind == 1 + ) + if inflate_values: values = values[:, np.newaxis] try: return contents * values - except: + except ValueError: return contents * values[:, np.newaxis] From d30005bb619585d490153832577d82dd47362b11 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Mon, 9 Jul 2018 16:44:03 +0200 Subject: [PATCH 02/14] [#154065274]: Fix axis in scale means --- src/cr/cube/measures/scale_means.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/cr/cube/measures/scale_means.py b/src/cr/cube/measures/scale_means.py index 25db2c8a6..fc5242707 100644 --- a/src/cr/cube/measures/scale_means.py +++ b/src/cr/cube/measures/scale_means.py @@ -19,7 +19,8 @@ def data(self): contents = self._inner_prod(table, self.values) if self._cube.has_mr and not self._cube.is_double_mr: - axis = 1 - self._cube.mr_dim_ind + # axis = 1 - self._cube.mr_dim_ind + axis = self._cube.dim_types.index('categorical') return np.sum(contents, axis) / np.sum(table, axis) if self.valid_inds.all(): @@ -45,7 +46,9 @@ def values(self): return np.array([ dim.values for dim in self._cube.dimensions if dim.values and any(~np.isnan(dim.values)) - ][int(len(self._cube.dimensions) > 2)]) + ][:1][-1]) + # TODO: Refactor this indexing hack + # ][int(len(self._cube.dimensions) > 2)]) @lazyproperty def valid_inds(self): From f947c1403f937d3092ee6ed9a69eb798d255e820 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Mon, 9 Jul 2018 18:09:42 +0200 Subject: [PATCH 03/14] [#154065274]: Implement CA index in slices properly --- src/cr/cube/cube_slice.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/cr/cube/cube_slice.py b/src/cr/cube/cube_slice.py index 15622170b..c51cb0cd1 100644 --- a/src/cr/cube/cube_slice.py +++ b/src/cr/cube/cube_slice.py @@ -153,6 +153,21 @@ def has_ca(self): ''' return 'categorical_array' in self.dim_types + @property + def ca_dim_ind(self): + index = self._cube.ca_dim_ind + if index is None: + return None + + if self.ndim == 3: + if index == 0: + # If tab dim is items, slices are not + return None + return index - 1 + + # If 2D - just return it + return index + @property def mr_dim_ind(self): '''Get the correct index of the MR dimension in the cube slice.''' From d93f934fd18d5ddfd63d3e0f681393e8385b77e9 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Mon, 9 Jul 2018 18:32:44 +0200 Subject: [PATCH 04/14] [#154065274]: Cover scale means edge case for CA --- src/cr/cube/measures/scale_means.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cr/cube/measures/scale_means.py b/src/cr/cube/measures/scale_means.py index fc5242707..cd5343a17 100644 --- a/src/cr/cube/measures/scale_means.py +++ b/src/cr/cube/measures/scale_means.py @@ -65,9 +65,9 @@ def contents_inds(self): def _inner_prod(self, contents, values): inflate_values = ( - self._cube.ca_dim_ind == 0 - if len(contents.shape) == 3 else - self._cube.mr_dim_ind == 1 + self._cube.ca_dim_ind == 0 and len(contents.shape) == 3 or + self._cube.mr_dim_ind == 1 or + self._cube.ca_dim_ind == 1 and len(contents.shape) < 3 ) if inflate_values: values = values[:, np.newaxis] From 0314cfd1721df99a498ab85d82521cbd41392fa5 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 10 Jul 2018 00:08:06 +0200 Subject: [PATCH 05/14] [#154065274]: Added means tests, fix CA cases --- src/cr/cube/measures/scale_means.py | 18 +- tests/integration/fixtures/cubes/__init__.py | 0 .../fixtures/cubes/scale_means/__init__.py | 21 + .../cubes/scale_means/ca-cat-x-items.json | 513 ++++++ .../cubes/scale_means/ca-items-x-cat.json | 513 ++++++ .../fixtures/cubes/scale_means/ca-x-mr.json | 1390 +++++++++++++++++ .../scale_means/cat-x-ca-cat-x-items.json | 1033 ++++++++++++ .../fixtures/cubes/scale_means/cat-x-cat.json | 308 ++++ .../fixtures/cubes/scale_means/cat-x-mr.json | 539 +++++++ .../fixtures/cubes/scale_means/mr-x-cat.json | 539 +++++++ .../cubes/scale_means/univariate-cat.json | 140 ++ tests/integration/test_scale_means.py | 81 + 12 files changed, 5086 insertions(+), 9 deletions(-) create mode 100644 tests/integration/fixtures/cubes/__init__.py create mode 100644 tests/integration/fixtures/cubes/scale_means/__init__.py create mode 100644 tests/integration/fixtures/cubes/scale_means/ca-cat-x-items.json create mode 100644 tests/integration/fixtures/cubes/scale_means/ca-items-x-cat.json create mode 100644 tests/integration/fixtures/cubes/scale_means/ca-x-mr.json create mode 100644 tests/integration/fixtures/cubes/scale_means/cat-x-ca-cat-x-items.json create mode 100644 tests/integration/fixtures/cubes/scale_means/cat-x-cat.json create mode 100644 tests/integration/fixtures/cubes/scale_means/cat-x-mr.json create mode 100644 tests/integration/fixtures/cubes/scale_means/mr-x-cat.json create mode 100644 tests/integration/fixtures/cubes/scale_means/univariate-cat.json create mode 100644 tests/integration/test_scale_means.py diff --git a/src/cr/cube/measures/scale_means.py b/src/cr/cube/measures/scale_means.py index cd5343a17..f6132fb30 100644 --- a/src/cr/cube/measures/scale_means.py +++ b/src/cr/cube/measures/scale_means.py @@ -20,15 +20,16 @@ def data(self): if self._cube.has_mr and not self._cube.is_double_mr: # axis = 1 - self._cube.mr_dim_ind + # axis = self._cube.dim_types[-2:].index('categorical') axis = self._cube.dim_types.index('categorical') return np.sum(contents, axis) / np.sum(table, axis) if self.valid_inds.all(): return np.sum(contents, self.axis) / self._cube.margin(self.axis) - else: - num = np.sum(contents[self.contents_inds], self.axis) - den = np.sum(table[self.contents_inds], self.axis) - return num / den + + num = np.sum(contents[self.contents_inds], self.axis) + den = np.sum(table[self.contents_inds], self.axis) + return num / den @lazyproperty def axis(self): @@ -44,11 +45,9 @@ def axis(self): def values(self): '''Get num values for means calculation.''' return np.array([ - dim.values for dim in self._cube.dimensions + dim.values for dim in self._cube.dimensions[-2:] if dim.values and any(~np.isnan(dim.values)) - ][:1][-1]) - # TODO: Refactor this indexing hack - # ][int(len(self._cube.dimensions) > 2)]) + ][0]) @lazyproperty def valid_inds(self): @@ -67,7 +66,8 @@ def _inner_prod(self, contents, values): inflate_values = ( self._cube.ca_dim_ind == 0 and len(contents.shape) == 3 or self._cube.mr_dim_ind == 1 or - self._cube.ca_dim_ind == 1 and len(contents.shape) < 3 + self._cube.ca_dim_ind == 1 and len(contents.shape) < 3 or + self._cube.ca_dim_ind == 2 and len(contents.shape) == 3 ) if inflate_values: values = values[:, np.newaxis] diff --git a/tests/integration/fixtures/cubes/__init__.py b/tests/integration/fixtures/cubes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration/fixtures/cubes/scale_means/__init__.py b/tests/integration/fixtures/cubes/scale_means/__init__.py new file mode 100644 index 000000000..5f9f9debe --- /dev/null +++ b/tests/integration/fixtures/cubes/scale_means/__init__.py @@ -0,0 +1,21 @@ +import os +from functools import partial + +from cr.cube.utils import load_fixture + +CUBES_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def _load(cube_file): + load = partial(load_fixture, CUBES_DIR) + return load(cube_file) + + +CA_CAT_X_ITEMS = _load('ca-cat-x-items.json') +CA_ITEMS_X_CAT = _load('ca-items-x-cat.json') +CA_X_MR = _load('ca-x-mr.json') +CAT_X_CA_CAT_X_ITEMS = _load('cat-x-ca-cat-x-items.json') +CAT_X_CAT = _load('cat-x-cat.json') +CAT_X_MR = _load('cat-x-mr.json') +MR_X_CAT = _load('mr-x-cat.json') +UNIVARIATE_CAT = _load('univariate-cat.json') diff --git a/tests/integration/fixtures/cubes/scale_means/ca-cat-x-items.json b/tests/integration/fixtures/cubes/scale_means/ca-cat-x-items.json new file mode 100644 index 000000000..140149c78 --- /dev/null +++ b/tests/integration/fixtures/cubes/scale_means/ca-cat-x-items.json @@ -0,0 +1,513 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/cube/?filter=%5B%5D&query=%7B%22dimensions%22:%5B%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000d1%2F%22%7D,%7B%22each%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000d1%2F%22%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:null%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000d1/" + }, + { + "each": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000d1/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "references": { + "subreferences": [ + { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + } + ], + "notes": "A categorical array variable where the missing categories are interspersed throughout the non-missing categories", + "alias": "bolshevik_hair", + "description": "In your opinion, each of these famous Bolshevik's facial hair is...", + "name": "Bolshevik facial hair" + }, + "derived": false, + "type": { + "ordinal": false, + "subvariables": [ + "0007", + "0008", + "0009", + "000a" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + } + ], + "notes": "A categorical array variable where the missing categories are interspersed throughout the non-missing categories", + "description": "In your opinion, each of these famous Bolshevik's facial hair is...", + "name": "Bolshevik facial hair", + "alias": "bolshevik_hair" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + "id": "0007", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + "id": "0008", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + "id": "0009", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 4, + "value": { + "derived": false, + "references": { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + }, + "id": "000a", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + } + ], + "class": "enum" + } + } + ], + "missing": 2, + "measures": { + "count": { + "data": [ + 13, + 122, + 19, + 36, + 0, + 0, + 0, + 0, + 841, + 68, + 67, + 50, + 112, + 785, + 894, + 873, + 426, + 487, + 437, + 452, + 270, + 200, + 245, + 251, + 0, + 0, + 0, + 0 + ], + "n_missing": 2, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 13, + 122, + 19, + 36, + 0, + 0, + 0, + 0, + 841, + 68, + 67, + 50, + 112, + 785, + 894, + 873, + 426, + 487, + 437, + 452, + 270, + 200, + 245, + 251, + 0, + 0, + 0, + 0 + ], + "n": 1662 + } + } +} \ No newline at end of file diff --git a/tests/integration/fixtures/cubes/scale_means/ca-items-x-cat.json b/tests/integration/fixtures/cubes/scale_means/ca-items-x-cat.json new file mode 100644 index 000000000..0e634cb5a --- /dev/null +++ b/tests/integration/fixtures/cubes/scale_means/ca-items-x-cat.json @@ -0,0 +1,513 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/cube/?filter=%5B%5D&query=%7B%22dimensions%22:%5B%7B%22each%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000d1%2F%22%7D,%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000d1%2F%22%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:null%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "each": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000d1/" + }, + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000d1/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + } + ], + "notes": "A categorical array variable where the missing categories are interspersed throughout the non-missing categories", + "description": "In your opinion, each of these famous Bolshevik's facial hair is...", + "name": "Bolshevik facial hair", + "alias": "bolshevik_hair" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + "id": "0007", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + "id": "0008", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + "id": "0009", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 4, + "value": { + "derived": false, + "references": { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + }, + "id": "000a", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "references": { + "subreferences": [ + { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + } + ], + "notes": "A categorical array variable where the missing categories are interspersed throughout the non-missing categories", + "alias": "bolshevik_hair", + "description": "In your opinion, each of these famous Bolshevik's facial hair is...", + "name": "Bolshevik facial hair" + }, + "derived": false, + "type": { + "ordinal": false, + "subvariables": [ + "0007", + "0008", + "0009", + "000a" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + } + ], + "missing": 2, + "measures": { + "count": { + "data": [ + 13, + 0, + 841, + 112, + 426, + 270, + 0, + 122, + 0, + 68, + 785, + 487, + 200, + 0, + 19, + 0, + 67, + 894, + 437, + 245, + 0, + 36, + 0, + 50, + 873, + 452, + 251, + 0 + ], + "n_missing": 2, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 13, + 0, + 841, + 112, + 426, + 270, + 0, + 122, + 0, + 68, + 785, + 487, + 200, + 0, + 19, + 0, + 67, + 894, + 437, + 245, + 0, + 36, + 0, + 50, + 873, + 452, + 251, + 0 + ], + "n": 1662 + } + } +} \ No newline at end of file diff --git a/tests/integration/fixtures/cubes/scale_means/ca-x-mr.json b/tests/integration/fixtures/cubes/scale_means/ca-x-mr.json new file mode 100644 index 000000000..770bd1128 --- /dev/null +++ b/tests/integration/fixtures/cubes/scale_means/ca-x-mr.json @@ -0,0 +1,1390 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/cube/?filter=%5B%5D&query=%7B%22dimensions%22:%5B%7B%22each%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000d1%2F%22%7D,%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000d1%2F%22%7D,%7B%22each%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000dd%2F%22%7D,%7B%22function%22:%22as_selected%22,%22args%22:%5B%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000dd%2F%22%7D%5D%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:null%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "each": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000d1/" + }, + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000d1/" + }, + { + "each": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000dd/" + }, + { + "function": "as_selected", + "args": [ + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000dd/" + } + ] + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + } + ], + "notes": "A categorical array variable where the missing categories are interspersed throughout the non-missing categories", + "description": "In your opinion, each of these famous Bolshevik's facial hair is...", + "name": "Bolshevik facial hair", + "alias": "bolshevik_hair" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + "id": "0007", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + "id": "0008", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + "id": "0009", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 4, + "value": { + "derived": false, + "references": { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + }, + "id": "000a", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "references": { + "subreferences": [ + { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + } + ], + "notes": "A categorical array variable where the missing categories are interspersed throughout the non-missing categories", + "alias": "bolshevik_hair", + "description": "In your opinion, each of these famous Bolshevik's facial hair is...", + "name": "Bolshevik facial hair" + }, + "derived": false, + "type": { + "ordinal": false, + "subvariables": [ + "0007", + "0008", + "0009", + "000a" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "eurasia", + "name": "Eurasia", + "description": "union_hhold_1" + }, + { + "alias": "oceania", + "name": "Oceania", + "description": "union_hhold_2" + }, + { + "alias": "eastasia", + "name": "Eastasia", + "description": "union_hhold_3" + }, + { + "alias": "disputed", + "name": "Disputed" + } + ], + "notes": "A multiple response variable, where one item has no responses", + "alias": "1984_countries", + "description": "Which of the following countries from 1984 would you live in? (select all that apply)", + "name": "Countries from 1984" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "eurasia", + "name": "Eurasia", + "description": "union_hhold_1" + }, + "id": "00c5", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "oceania", + "name": "Oceania", + "description": "union_hhold_2" + }, + "id": "00c6", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "eastasia", + "name": "Eastasia", + "description": "union_hhold_3" + }, + "id": "00c7", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 4, + "value": { + "derived": false, + "references": { + "alias": "disputed", + "name": "Disputed" + }, + "id": "de5314cea98b44eb9c243a86e06d1476", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "references": { + "subreferences": [ + { + "alias": "eurasia", + "name": "Eurasia", + "description": "union_hhold_1" + }, + { + "alias": "oceania", + "name": "Oceania", + "description": "union_hhold_2" + }, + { + "alias": "eastasia", + "name": "Eastasia", + "description": "union_hhold_3" + }, + { + "alias": "disputed", + "name": "Disputed" + } + ], + "notes": "A multiple response variable, where one item has no responses", + "description": "Which of the following countries from 1984 would you live in? (select all that apply)", + "name": "Countries from 1984", + "alias": "1984_countries" + }, + "derived": true, + "type": { + "ordinal": false, + "subvariables": [ + "00c5", + "00c6", + "00c7", + "de5314cea98b44eb9c243a86e06d1476" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 2, + "measures": { + "count": { + "data": [ + 2, + 11, + 0, + 1, + 12, + 0, + 10, + 3, + 0, + 0, + 13, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 85, + 756, + 0, + 73, + 768, + 0, + 692, + 149, + 0, + 0, + 841, + 0, + 10, + 102, + 0, + 19, + 93, + 0, + 83, + 29, + 0, + 0, + 112, + 0, + 34, + 392, + 0, + 27, + 399, + 0, + 368, + 58, + 0, + 0, + 426, + 0, + 12, + 258, + 0, + 21, + 249, + 0, + 236, + 34, + 0, + 0, + 270, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 17, + 105, + 0, + 20, + 102, + 0, + 86, + 36, + 0, + 0, + 122, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 8, + 60, + 0, + 8, + 60, + 0, + 52, + 16, + 0, + 0, + 68, + 0, + 74, + 711, + 0, + 68, + 717, + 0, + 651, + 134, + 0, + 0, + 785, + 0, + 34, + 453, + 0, + 38, + 449, + 0, + 417, + 70, + 0, + 0, + 487, + 0, + 10, + 190, + 0, + 7, + 193, + 0, + 183, + 17, + 0, + 0, + 200, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 2, + 17, + 0, + 3, + 16, + 0, + 14, + 5, + 0, + 0, + 19, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 8, + 59, + 0, + 17, + 50, + 0, + 43, + 24, + 0, + 0, + 67, + 0, + 83, + 811, + 0, + 74, + 820, + 0, + 745, + 149, + 0, + 0, + 894, + 0, + 39, + 398, + 0, + 32, + 405, + 0, + 368, + 69, + 0, + 0, + 437, + 0, + 11, + 234, + 0, + 15, + 230, + 0, + 219, + 26, + 0, + 0, + 245, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 29, + 0, + 6, + 30, + 0, + 23, + 13, + 0, + 0, + 36, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 43, + 0, + 5, + 45, + 0, + 39, + 11, + 0, + 0, + 50, + 0, + 90, + 783, + 0, + 75, + 798, + 0, + 717, + 156, + 0, + 0, + 873, + 0, + 33, + 419, + 0, + 37, + 415, + 0, + 384, + 68, + 0, + 0, + 452, + 0, + 6, + 245, + 0, + 18, + 233, + 0, + 226, + 25, + 0, + 0, + 251, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 2, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 2, + 11, + 0, + 1, + 12, + 0, + 10, + 3, + 0, + 0, + 13, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 85, + 756, + 0, + 73, + 768, + 0, + 692, + 149, + 0, + 0, + 841, + 0, + 10, + 102, + 0, + 19, + 93, + 0, + 83, + 29, + 0, + 0, + 112, + 0, + 34, + 392, + 0, + 27, + 399, + 0, + 368, + 58, + 0, + 0, + 426, + 0, + 12, + 258, + 0, + 21, + 249, + 0, + 236, + 34, + 0, + 0, + 270, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 17, + 105, + 0, + 20, + 102, + 0, + 86, + 36, + 0, + 0, + 122, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 8, + 60, + 0, + 8, + 60, + 0, + 52, + 16, + 0, + 0, + 68, + 0, + 74, + 711, + 0, + 68, + 717, + 0, + 651, + 134, + 0, + 0, + 785, + 0, + 34, + 453, + 0, + 38, + 449, + 0, + 417, + 70, + 0, + 0, + 487, + 0, + 10, + 190, + 0, + 7, + 193, + 0, + 183, + 17, + 0, + 0, + 200, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 2, + 17, + 0, + 3, + 16, + 0, + 14, + 5, + 0, + 0, + 19, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 8, + 59, + 0, + 17, + 50, + 0, + 43, + 24, + 0, + 0, + 67, + 0, + 83, + 811, + 0, + 74, + 820, + 0, + 745, + 149, + 0, + 0, + 894, + 0, + 39, + 398, + 0, + 32, + 405, + 0, + 368, + 69, + 0, + 0, + 437, + 0, + 11, + 234, + 0, + 15, + 230, + 0, + 219, + 26, + 0, + 0, + 245, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 29, + 0, + 6, + 30, + 0, + 23, + 13, + 0, + 0, + 36, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 43, + 0, + 5, + 45, + 0, + 39, + 11, + 0, + 0, + 50, + 0, + 90, + 783, + 0, + 75, + 798, + 0, + 717, + 156, + 0, + 0, + 873, + 0, + 33, + 419, + 0, + 37, + 415, + 0, + 384, + 68, + 0, + 0, + 452, + 0, + 6, + 245, + 0, + 18, + 233, + 0, + 226, + 25, + 0, + 0, + 251, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n": 1662 + } + } +} \ No newline at end of file diff --git a/tests/integration/fixtures/cubes/scale_means/cat-x-ca-cat-x-items.json b/tests/integration/fixtures/cubes/scale_means/cat-x-ca-cat-x-items.json new file mode 100644 index 000000000..8cbed103d --- /dev/null +++ b/tests/integration/fixtures/cubes/scale_means/cat-x-ca-cat-x-items.json @@ -0,0 +1,1033 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/cube/?filter=%5B%5D&query=%7B%22dimensions%22:%5B%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F000026%2F%22%7D,%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000d1%2F%22%7D,%7B%22each%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000d1%2F%22%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:null%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/000026/" + }, + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000d1/" + }, + { + "each": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000d1/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "references": { + "alias": "pasta", + "notes": "A categorical variable", + "name": "Shapes of pasta", + "description": "The geometry of pasta" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "Bucatini" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "Chitarra" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Boccoli" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "Orecchiette" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "Quadrefiore" + }, + { + "numeric_value": 6, + "missing": false, + "id": 6, + "name": "Fileja" + }, + { + "numeric_value": 32766, + "missing": true, + "id": 32766, + "name": "Skipped" + }, + { + "numeric_value": 32767, + "missing": true, + "id": 32767, + "name": "Not asked" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "references": { + "subreferences": [ + { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + } + ], + "notes": "A categorical array variable where the missing categories are interspersed throughout the non-missing categories", + "alias": "bolshevik_hair", + "description": "In your opinion, each of these famous Bolshevik's facial hair is...", + "name": "Bolshevik facial hair" + }, + "derived": false, + "type": { + "ordinal": false, + "subvariables": [ + "0007", + "0008", + "0009", + "000a" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + } + ], + "notes": "A categorical array variable where the missing categories are interspersed throughout the non-missing categories", + "description": "In your opinion, each of these famous Bolshevik's facial hair is...", + "name": "Bolshevik facial hair", + "alias": "bolshevik_hair" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "stalin", + "name": "იოსებ სტალინი (Stalin)", + "description": "cabatt_1" + }, + "id": "0007", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "trotsky", + "name": "Лев Троцкий (Trotsky)", + "description": "cabatt_6" + }, + "id": "0008", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "kalinin", + "name": "Михаи́л Ива́нович Кали́нин (Kalinin)", + "description": "cabatt_3" + }, + "id": "0009", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 4, + "value": { + "derived": false, + "references": { + "alias": "lenin", + "name": "Владимир Ленин (Lenin)", + "description": "cabatt_7" + }, + "id": "000a", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 32766, + "selected": false, + "id": 32766, + "missing": true, + "name": "skipped" + }, + { + "numeric_value": 32767, + "selected": false, + "id": 32767, + "missing": true, + "name": "not asked" + }, + { + "numeric_value": 1, + "selected": false, + "id": 1, + "missing": false, + "name": "Too much" + }, + { + "numeric_value": 5, + "selected": false, + "id": 5, + "missing": false, + "name": "Too little" + }, + { + "numeric_value": 0, + "selected": false, + "id": 0, + "missing": false, + "name": "The right amount" + }, + { + "numeric_value": 4, + "selected": false, + "id": 4, + "missing": false, + "name": "Not sure" + }, + { + "numeric_value": null, + "selected": false, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + }, + "missing": false + } + ], + "class": "enum" + } + } + ], + "missing": 6, + "measures": { + "count": { + "data": [ + 0, + 6, + 1, + 0, + 0, + 0, + 0, + 0, + 28, + 13, + 14, + 13, + 6, + 20, + 24, + 25, + 17, + 14, + 13, + 15, + 4, + 2, + 3, + 2, + 0, + 0, + 0, + 0, + 2, + 13, + 1, + 3, + 0, + 0, + 0, + 0, + 64, + 9, + 10, + 6, + 16, + 63, + 73, + 75, + 36, + 30, + 27, + 27, + 8, + 11, + 15, + 15, + 0, + 0, + 0, + 0, + 3, + 37, + 6, + 10, + 0, + 0, + 0, + 0, + 350, + 19, + 16, + 11, + 38, + 338, + 372, + 348, + 131, + 150, + 143, + 149, + 94, + 72, + 79, + 98, + 0, + 0, + 0, + 0, + 1, + 26, + 2, + 11, + 0, + 0, + 0, + 0, + 163, + 11, + 13, + 6, + 24, + 149, + 176, + 175, + 82, + 98, + 82, + 90, + 40, + 26, + 37, + 28, + 0, + 0, + 0, + 0, + 4, + 21, + 4, + 6, + 0, + 0, + 0, + 0, + 177, + 11, + 9, + 10, + 22, + 169, + 194, + 192, + 148, + 177, + 161, + 160, + 50, + 23, + 33, + 33, + 0, + 0, + 0, + 0, + 3, + 17, + 5, + 6, + 0, + 0, + 0, + 0, + 58, + 5, + 5, + 4, + 6, + 45, + 53, + 57, + 12, + 18, + 10, + 10, + 71, + 65, + 77, + 73, + 0, + 0, + 0, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 2, + 1, + 0, + 0, + 1, + 1, + 3, + 1, + 1, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 6, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 0, + 6, + 1, + 0, + 0, + 0, + 0, + 0, + 28, + 13, + 14, + 13, + 6, + 20, + 24, + 25, + 17, + 14, + 13, + 15, + 4, + 2, + 3, + 2, + 0, + 0, + 0, + 0, + 2, + 13, + 1, + 3, + 0, + 0, + 0, + 0, + 64, + 9, + 10, + 6, + 16, + 63, + 73, + 75, + 36, + 30, + 27, + 27, + 8, + 11, + 15, + 15, + 0, + 0, + 0, + 0, + 3, + 37, + 6, + 10, + 0, + 0, + 0, + 0, + 350, + 19, + 16, + 11, + 38, + 338, + 372, + 348, + 131, + 150, + 143, + 149, + 94, + 72, + 79, + 98, + 0, + 0, + 0, + 0, + 1, + 26, + 2, + 11, + 0, + 0, + 0, + 0, + 163, + 11, + 13, + 6, + 24, + 149, + 176, + 175, + 82, + 98, + 82, + 90, + 40, + 26, + 37, + 28, + 0, + 0, + 0, + 0, + 4, + 21, + 4, + 6, + 0, + 0, + 0, + 0, + 177, + 11, + 9, + 10, + 22, + 169, + 194, + 192, + 148, + 177, + 161, + 160, + 50, + 23, + 33, + 33, + 0, + 0, + 0, + 0, + 3, + 17, + 5, + 6, + 0, + 0, + 0, + 0, + 58, + 5, + 5, + 4, + 6, + 45, + 53, + 57, + 12, + 18, + 10, + 10, + 71, + 65, + 77, + 73, + 0, + 0, + 0, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 2, + 1, + 0, + 0, + 1, + 1, + 3, + 1, + 1, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n": 1662 + } + } +} \ No newline at end of file diff --git a/tests/integration/fixtures/cubes/scale_means/cat-x-cat.json b/tests/integration/fixtures/cubes/scale_means/cat-x-cat.json new file mode 100644 index 000000000..25a79a6be --- /dev/null +++ b/tests/integration/fixtures/cubes/scale_means/cat-x-cat.json @@ -0,0 +1,308 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/cube/?filter=%7B%22filter%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Ffilters%2F06b4deb17519405ebe4581dd8dbe9eba%2F%22%7D&query=%7B%22dimensions%22:%5B%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F000026%2F%22%7D,%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F000067%2F%22%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:null%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/000026/" + }, + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/000067/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [ + "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/filters/06b4deb17519405ebe4581dd8dbe9eba/" + ] + }, + "result": { + "dimensions": [ + { + "references": { + "alias": "pasta", + "notes": "A categorical variable", + "name": "Shapes of pasta", + "description": "The geometry of pasta" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "Bucatini" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "Chitarra" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Boccoli" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "Orecchiette" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "Quadrefiore" + }, + { + "numeric_value": 6, + "missing": false, + "id": 6, + "name": "Fileja" + }, + { + "numeric_value": 32766, + "missing": true, + "id": 32766, + "name": "Skipped" + }, + { + "numeric_value": 32767, + "missing": true, + "id": 32767, + "name": "Not asked" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "references": { + "alias": "food_groups", + "notes": "A categorical variable where the missing categories are interspersed throughout the non-missing categories", + "name": "Food groups", + "description": "Four of the five USDA food groups" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Vegetables" + }, + { + "numeric_value": 32766, + "missing": true, + "id": 32766, + "name": "Don't know" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "Fruit" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "Grain" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "Meat" + }, + { + "numeric_value": 32767, + "missing": true, + "id": 32767, + "name": "Not asked" + } + ] + } + } + ], + "missing": 10, + "measures": { + "count": { + "data": [ + 31, + 0, + 8, + 3, + 0, + 12, + 0, + 31, + 0, + 45, + 27, + 0, + 21, + 0, + 156, + 3, + 217, + 129, + 0, + 105, + 0, + 66, + 0, + 101, + 59, + 0, + 80, + 0, + 92, + 1, + 68, + 47, + 0, + 188, + 0, + 49, + 2, + 51, + 25, + 0, + 23, + 0, + 3, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 10, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 31, + 0, + 8, + 3, + 0, + 12, + 0, + 31, + 0, + 45, + 27, + 0, + 21, + 0, + 156, + 3, + 217, + 129, + 0, + 105, + 0, + 66, + 0, + 101, + 59, + 0, + 80, + 0, + 92, + 1, + 68, + 47, + 0, + 188, + 0, + 49, + 2, + 51, + 25, + 0, + 23, + 0, + 3, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n": 1644 + } + } +} \ No newline at end of file diff --git a/tests/integration/fixtures/cubes/scale_means/cat-x-mr.json b/tests/integration/fixtures/cubes/scale_means/cat-x-mr.json new file mode 100644 index 000000000..6fcae828b --- /dev/null +++ b/tests/integration/fixtures/cubes/scale_means/cat-x-mr.json @@ -0,0 +1,539 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/cube/?filter=%7B%22filter%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Ffilters%2F06b4deb17519405ebe4581dd8dbe9eba%2F%22%7D&query=%7B%22dimensions%22:%5B%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F000067%2F%22%7D,%7B%22each%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000dd%2F%22%7D,%7B%22function%22:%22as_selected%22,%22args%22:%5B%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000dd%2F%22%7D%5D%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:null%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/000067/" + }, + { + "each": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000dd/" + }, + { + "function": "as_selected", + "args": [ + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000dd/" + } + ] + } + ], + "weight": null + }, + "query_environment": { + "filter": [ + "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/filters/06b4deb17519405ebe4581dd8dbe9eba/" + ] + }, + "result": { + "dimensions": [ + { + "references": { + "alias": "food_groups", + "notes": "A categorical variable where the missing categories are interspersed throughout the non-missing categories", + "name": "Food groups", + "description": "Four of the five USDA food groups" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Vegetables" + }, + { + "numeric_value": 32766, + "missing": true, + "id": 32766, + "name": "Don't know" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "Fruit" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "Grain" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "Meat" + }, + { + "numeric_value": 32767, + "missing": true, + "id": 32767, + "name": "Not asked" + } + ] + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "eurasia", + "name": "Eurasia", + "description": "union_hhold_1" + }, + { + "alias": "oceania", + "name": "Oceania", + "description": "union_hhold_2" + }, + { + "alias": "eastasia", + "name": "Eastasia", + "description": "union_hhold_3" + }, + { + "alias": "disputed", + "name": "Disputed" + } + ], + "notes": "A multiple response variable, where one item has no responses", + "alias": "1984_countries", + "description": "Which of the following countries from 1984 would you live in? (select all that apply)", + "name": "Countries from 1984" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "eurasia", + "name": "Eurasia", + "description": "union_hhold_1" + }, + "id": "00c5", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "oceania", + "name": "Oceania", + "description": "union_hhold_2" + }, + "id": "00c6", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "eastasia", + "name": "Eastasia", + "description": "union_hhold_3" + }, + "id": "00c7", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 4, + "value": { + "derived": false, + "references": { + "alias": "disputed", + "name": "Disputed" + }, + "id": "de5314cea98b44eb9c243a86e06d1476", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "references": { + "subreferences": [ + { + "alias": "eurasia", + "name": "Eurasia", + "description": "union_hhold_1" + }, + { + "alias": "oceania", + "name": "Oceania", + "description": "union_hhold_2" + }, + { + "alias": "eastasia", + "name": "Eastasia", + "description": "union_hhold_3" + }, + { + "alias": "disputed", + "name": "Disputed" + } + ], + "notes": "A multiple response variable, where one item has no responses", + "description": "Which of the following countries from 1984 would you live in? (select all that apply)", + "name": "Countries from 1984", + "alias": "1984_countries" + }, + "derived": true, + "type": { + "ordinal": false, + "subvariables": [ + "00c5", + "00c6", + "00c7", + "de5314cea98b44eb9c243a86e06d1476" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 6, + "measures": { + "count": { + "data": [ + 39, + 389, + 0, + 34, + 394, + 0, + 357, + 71, + 0, + 0, + 428, + 0, + 0, + 6, + 0, + 0, + 6, + 0, + 5, + 1, + 0, + 0, + 6, + 0, + 44, + 447, + 0, + 36, + 455, + 0, + 415, + 76, + 0, + 0, + 491, + 0, + 24, + 266, + 0, + 29, + 261, + 0, + 241, + 49, + 0, + 0, + 290, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 35, + 394, + 0, + 24, + 405, + 0, + 371, + 58, + 0, + 0, + 429, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 6, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 39, + 389, + 0, + 34, + 394, + 0, + 357, + 71, + 0, + 0, + 428, + 0, + 0, + 6, + 0, + 0, + 6, + 0, + 5, + 1, + 0, + 0, + 6, + 0, + 44, + 447, + 0, + 36, + 455, + 0, + 415, + 76, + 0, + 0, + 491, + 0, + 24, + 266, + 0, + 29, + 261, + 0, + 241, + 49, + 0, + 0, + 290, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 35, + 394, + 0, + 24, + 405, + 0, + 371, + 58, + 0, + 0, + 429, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n": 1644 + } + } +} \ No newline at end of file diff --git a/tests/integration/fixtures/cubes/scale_means/mr-x-cat.json b/tests/integration/fixtures/cubes/scale_means/mr-x-cat.json new file mode 100644 index 000000000..8b685878f --- /dev/null +++ b/tests/integration/fixtures/cubes/scale_means/mr-x-cat.json @@ -0,0 +1,539 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/cube/?filter=%7B%22filter%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Ffilters%2F06b4deb17519405ebe4581dd8dbe9eba%2F%22%7D&query=%7B%22dimensions%22:%5B%7B%22each%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000dd%2F%22%7D,%7B%22function%22:%22as_selected%22,%22args%22:%5B%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F0000dd%2F%22%7D%5D%7D,%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F000067%2F%22%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:null%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "each": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000dd/" + }, + { + "function": "as_selected", + "args": [ + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/0000dd/" + } + ] + }, + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/000067/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [ + "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/filters/06b4deb17519405ebe4581dd8dbe9eba/" + ] + }, + "result": { + "dimensions": [ + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "eurasia", + "name": "Eurasia", + "description": "union_hhold_1" + }, + { + "alias": "oceania", + "name": "Oceania", + "description": "union_hhold_2" + }, + { + "alias": "eastasia", + "name": "Eastasia", + "description": "union_hhold_3" + }, + { + "alias": "disputed", + "name": "Disputed" + } + ], + "notes": "A multiple response variable, where one item has no responses", + "alias": "1984_countries", + "description": "Which of the following countries from 1984 would you live in? (select all that apply)", + "name": "Countries from 1984" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "eurasia", + "name": "Eurasia", + "description": "union_hhold_1" + }, + "id": "00c5", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "oceania", + "name": "Oceania", + "description": "union_hhold_2" + }, + "id": "00c6", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "eastasia", + "name": "Eastasia", + "description": "union_hhold_3" + }, + "id": "00c7", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + }, + { + "id": 4, + "value": { + "derived": false, + "references": { + "alias": "disputed", + "name": "Disputed" + }, + "id": "de5314cea98b44eb9c243a86e06d1476", + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "references": { + "subreferences": [ + { + "alias": "eurasia", + "name": "Eurasia", + "description": "union_hhold_1" + }, + { + "alias": "oceania", + "name": "Oceania", + "description": "union_hhold_2" + }, + { + "alias": "eastasia", + "name": "Eastasia", + "description": "union_hhold_3" + }, + { + "alias": "disputed", + "name": "Disputed" + } + ], + "notes": "A multiple response variable, where one item has no responses", + "description": "Which of the following countries from 1984 would you live in? (select all that apply)", + "name": "Countries from 1984", + "alias": "1984_countries" + }, + "derived": true, + "type": { + "ordinal": false, + "subvariables": [ + "00c5", + "00c6", + "00c7", + "de5314cea98b44eb9c243a86e06d1476" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "references": { + "alias": "food_groups", + "notes": "A categorical variable where the missing categories are interspersed throughout the non-missing categories", + "name": "Food groups", + "description": "Four of the five USDA food groups" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Vegetables" + }, + { + "numeric_value": 32766, + "missing": true, + "id": 32766, + "name": "Don't know" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "Fruit" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "Grain" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "Meat" + }, + { + "numeric_value": 32767, + "missing": true, + "id": 32767, + "name": "Not asked" + } + ] + } + } + ], + "missing": 6, + "measures": { + "count": { + "data": [ + 39, + 0, + 44, + 24, + 0, + 35, + 0, + 389, + 6, + 447, + 266, + 0, + 394, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 34, + 0, + 36, + 29, + 0, + 24, + 0, + 394, + 6, + 455, + 261, + 0, + 405, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 357, + 5, + 415, + 241, + 0, + 371, + 0, + 71, + 1, + 76, + 49, + 0, + 58, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 428, + 6, + 491, + 290, + 0, + 429, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 6, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 39, + 0, + 44, + 24, + 0, + 35, + 0, + 389, + 6, + 447, + 266, + 0, + 394, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 34, + 0, + 36, + 29, + 0, + 24, + 0, + 394, + 6, + 455, + 261, + 0, + 405, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 357, + 5, + 415, + 241, + 0, + 371, + 0, + 71, + 1, + 76, + 49, + 0, + 58, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 428, + 6, + 491, + 290, + 0, + 429, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n": 1644 + } + } +} \ No newline at end of file diff --git a/tests/integration/fixtures/cubes/scale_means/univariate-cat.json b/tests/integration/fixtures/cubes/scale_means/univariate-cat.json new file mode 100644 index 000000000..844088ae8 --- /dev/null +++ b/tests/integration/fixtures/cubes/scale_means/univariate-cat.json @@ -0,0 +1,140 @@ +{ + "element": "shoji:view", + "self": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/cube/?filter=%7B%22filter%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Ffilters%2F06b4deb17519405ebe4581dd8dbe9eba%2F%22%7D&query=%7B%22dimensions%22:%5B%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F000026%2F%22%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:null%7D", + "value": { + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/000026/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [ + "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/filters/06b4deb17519405ebe4581dd8dbe9eba/" + ] + }, + "result": { + "dimensions": [ + { + "references": { + "alias": "pasta", + "notes": "A categorical variable", + "name": "Shapes of pasta", + "description": "The geometry of pasta" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "Bucatini" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "Chitarra" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Boccoli" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "Orecchiette" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "Quadrefiore" + }, + { + "numeric_value": 6, + "missing": false, + "id": 6, + "name": "Fileja" + }, + { + "numeric_value": 32766, + "missing": true, + "id": 32766, + "name": "Skipped" + }, + { + "numeric_value": 32767, + "missing": true, + "id": 32767, + "name": "Not asked" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 4, + "measures": { + "count": { + "data": [ + 54, + 124, + 610, + 306, + 396, + 150, + 4, + 0, + 0 + ], + "n_missing": 4, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "element": "crunch:cube", + "counts": [ + 54, + 124, + 610, + 306, + 396, + 150, + 4, + 0, + 0 + ], + "n": 1644 + } + } +} \ No newline at end of file diff --git a/tests/integration/test_scale_means.py b/tests/integration/test_scale_means.py new file mode 100644 index 000000000..5bab73905 --- /dev/null +++ b/tests/integration/test_scale_means.py @@ -0,0 +1,81 @@ +from unittest import TestCase +import numpy as np + +from cr.cube.crunch_cube import CrunchCube + +from .fixtures.cubes.scale_means import CA_CAT_X_ITEMS +from .fixtures.cubes.scale_means import CA_ITEMS_X_CAT +from .fixtures.cubes.scale_means import CA_X_MR +from .fixtures.cubes.scale_means import CAT_X_CA_CAT_X_ITEMS +from .fixtures.cubes.scale_means import CAT_X_CAT +from .fixtures.cubes.scale_means import CAT_X_MR +from .fixtures.cubes.scale_means import MR_X_CAT +from .fixtures.cubes.scale_means import UNIVARIATE_CAT + + +def test_ca_cat_x_items(): + cube = CrunchCube(CA_CAT_X_ITEMS) + expected = np.array([1.50454821, 3.11233766, 3.35788192, 3.33271833]) + actual = cube.scale_means() + np.testing.assert_almost_equal(actual, expected) + + +def test_ca_items_x_cat(): + cube = CrunchCube(CA_ITEMS_X_CAT) + expected = np.array([1.50454821, 3.11233766, 3.35788192, 3.33271833]) + actual = cube.scale_means() + np.testing.assert_almost_equal(actual, expected) + + +def test_ca_x_mr(): + cube = CrunchCube(CA_X_MR) + expected = np.array([ + [1.29787234, 1.8 , 1.48730964, np.nan], # noqa + [3.31746032, 3.10743802, 3.09976976, np.nan], + [3.31205674, 3.23913043, 3.37745455, np.nan], + [3.53676471, 3.34814815, 3.3147877 , np.nan], # noqa + ]) + actual = cube.scale_means() + np.testing.assert_almost_equal(actual, expected) + + +def test_cat_x_ca_cat_x_items(): + cube = CrunchCube(CAT_X_CA_CAT_X_ITEMS) + expected = np.array([ + [1.34545455, 2.46938776, 2.7037037 , 2.65454545], # noqa + [1.41935484, 3.25663717, 3.48 , 3.58536585], # noqa + [1.49429038, 3.44905009, 3.59344262, 3.53630363], + [1.43365696, 3.02816901, 3.37987013, 3.32107023], + [1.22670025, 2.49473684, 2.79848866, 2.78987342], + [2.53061224, 3.68421053, 3.9862069 , 4.03472222], # noqa + ]) + actual = cube.scale_means() + np.testing.assert_almost_equal(actual, expected) + + +def test_cat_x_cat(): + cube = CrunchCube(CAT_X_CAT) + expected = np.array([2.61411765, 2.34285714, 2.33793103, 3.38461538]) + actual = cube.scale_means() + np.testing.assert_almost_equal(actual, expected) + + +def test_cat_x_mr(): + cube = CrunchCube(CAT_X_MR) + expected = np.array([2.45070423, 2.54471545, 2.54263006, np.nan]) + actual = cube.scale_means() + np.testing.assert_almost_equal(actual, expected) + + +def test_mr_x_cat(): + cube = CrunchCube(MR_X_CAT) + expected = np.array([2.45070423, 2.54471545, 2.54263006, np.nan]) + actual = cube.scale_means() + np.testing.assert_almost_equal(actual, expected) + + +def test_univariate_cat(): + cube = CrunchCube(UNIVARIATE_CAT) + expected = np.array([2.6865854]) + actual = cube.scale_means() + np.testing.assert_almost_equal(actual, expected) From 6b7b947c5f34a5dea82e617b515c4874238ad3f7 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 2 Aug 2018 18:18:36 +0200 Subject: [PATCH 06/14] Refactor scale means - Take into account all dimensions - Return 'None' if there are no numerical values - Only do slices if cube is 3D --- src/cr/cube/crunch_cube.py | 2 +- src/cr/cube/measures/scale_means.py | 96 ++- tests/integration/__init__.py | 14 + .../fixtures/cubes/scale_means/cat-x-cat.json | 598 +++++++++--------- tests/integration/test_crunch_cube.py | 64 +- tests/integration/test_scale_means.py | 61 +- 6 files changed, 421 insertions(+), 414 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 67df582fb..34a1f7161 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1128,7 +1128,7 @@ def pvals(self, weighted=True, prune=False, hs_dims=None): def scale_means(self): '''Get cube means.''' - return ScaleMeans(self).data + return [ScaleMeans(slice_).data for slice_ in self.slices] def get_slices(self, ca_as_0th=False): if self.ndim < 3 and not ca_as_0th: diff --git a/src/cr/cube/measures/scale_means.py b/src/cr/cube/measures/scale_means.py index f6132fb30..59ac11d26 100644 --- a/src/cr/cube/measures/scale_means.py +++ b/src/cr/cube/measures/scale_means.py @@ -9,69 +9,61 @@ class ScaleMeans(object): '''Implementation of the Means service.''' - def __init__(self, cube): - self._cube = cube + def __init__(self, slice_): + self._slice = slice_ @lazyproperty def data(self): '''Get the means calculation.''' - table = self._cube.as_array() - contents = self._inner_prod(table, self.values) + means = [] + table = self._slice.as_array() + products = self._inner_prods(table, self.values) - if self._cube.has_mr and not self._cube.is_double_mr: - # axis = 1 - self._cube.mr_dim_ind - # axis = self._cube.dim_types[-2:].index('categorical') - axis = self._cube.dim_types.index('categorical') - return np.sum(contents, axis) / np.sum(table, axis) + for axis, product in enumerate(products): + if product is None: + means.append(product) + continue - if self.valid_inds.all(): - return np.sum(contents, self.axis) / self._cube.margin(self.axis) + # Eliminate missings + nans = np.isnan(product) + if len(product.shape) == 1: + product = product[~nans] + table = table[~nans] + else: + ind_rows = ~nans.any(axis=1) + ind_cols = ~nans[ind_rows].any(axis=0) + product = product[ind_rows][:, ind_cols] + table = table[ind_rows][:, ind_cols] - num = np.sum(contents[self.contents_inds], self.axis) - den = np.sum(table[self.contents_inds], self.axis) - return num / den - - @lazyproperty - def axis(self): - '''Get axis for means calculation.''' - axis = 0 - if self._cube.ca_dim_ind == 0 or self._cube.ca_dim_ind == 2: - axis = 1 - elif len(self._cube.dimensions) > 2 and self._cube.ca_dim_ind == 1: - axis = 2 - return axis + # Calculate means + num = np.sum(product, axis) + den = np.sum(table, axis) + mean = num / den + if not isinstance(mean, np.ndarray): + mean = np.array([mean]) + means.append(mean) + return means @lazyproperty def values(self): '''Get num values for means calculation.''' - return np.array([ - dim.values for dim in self._cube.dimensions[-2:] - if dim.values and any(~np.isnan(dim.values)) - ][0]) - - @lazyproperty - def valid_inds(self): - '''Valid indices for numerical values.''' - return ~np.isnan(self.values) - - @lazyproperty - def contents_inds(self): - '''Create contents selection indices based on valid num vals.''' return [ - slice(None) if i != self.axis else self.valid_inds - for i in range(len(self._cube.as_array().shape)) + ( + np.array(dim.values) + if dim.values and any(~np.isnan(dim.values)) else + None + ) + for dim in self._slice.dimensions ] - def _inner_prod(self, contents, values): - inflate_values = ( - self._cube.ca_dim_ind == 0 and len(contents.shape) == 3 or - self._cube.mr_dim_ind == 1 or - self._cube.ca_dim_ind == 1 and len(contents.shape) < 3 or - self._cube.ca_dim_ind == 2 and len(contents.shape) == 3 - ) - if inflate_values: - values = values[:, np.newaxis] - try: - return contents * values - except ValueError: - return contents * values[:, np.newaxis] + def _inner_prods(self, contents, values): + products = [] + for i, numeric in enumerate(values): + if numeric is None: + products.append(numeric) + continue + inflate = self._slice.ndim > 1 and not i + numeric = numeric[:, None] if inflate else numeric + product = contents * numeric + products.append(product) + return products diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index e69de29bb..027ae8840 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -0,0 +1,14 @@ +# encoding: utf-8 + +import numpy as np + + +def assert_scale_means_equal(actual, expected): + for act, exp in zip(actual, expected): + if isinstance(exp, np.ndarray) and isinstance(act, np.ndarray): + np.testing.assert_almost_equal(act, exp) + elif isinstance(exp, list) and isinstance(act, list): + assert_scale_means_equal(act, exp) + else: + assert act == exp + assert True diff --git a/tests/integration/fixtures/cubes/scale_means/cat-x-cat.json b/tests/integration/fixtures/cubes/scale_means/cat-x-cat.json index 25a79a6be..a15918f61 100644 --- a/tests/integration/fixtures/cubes/scale_means/cat-x-cat.json +++ b/tests/integration/fixtures/cubes/scale_means/cat-x-cat.json @@ -1,308 +1,302 @@ { - "element": "shoji:view", - "self": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/cube/?filter=%7B%22filter%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Ffilters%2F06b4deb17519405ebe4581dd8dbe9eba%2F%22%7D&query=%7B%22dimensions%22:%5B%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F000026%2F%22%7D,%7B%22variable%22:%22https:%2F%2Fapp.crunch.io%2Fapi%2Fdatasets%2Fddc1b8a25c454689911d3d7a59c97aee%2Fvariables%2F000067%2F%22%7D%5D,%22measures%22:%7B%22count%22:%7B%22function%22:%22cube_count%22,%22args%22:%5B%5D%7D%7D,%22weight%22:null%7D", - "value": { - "query": { - "measures": { - "count": { - "function": "cube_count", - "args": [] - } - }, - "dimensions": [ - { - "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/000026/" - }, - { - "variable": "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/variables/000067/" - } - ], - "weight": null + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "variable": "000026" + }, + { + "variable": "000067" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "references": { + "alias": "pasta", + "notes": "A categorical variable", + "name": "Shapes of pasta", + "description": "The geometry of pasta" }, - "query_environment": { - "filter": [ - "https://app.crunch.io/api/datasets/ddc1b8a25c454689911d3d7a59c97aee/filters/06b4deb17519405ebe4581dd8dbe9eba/" - ] + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "Bucatini" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "Chitarra" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Boccoli" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "Orecchiette" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "Quadrefiore" + }, + { + "numeric_value": 6, + "missing": false, + "id": 6, + "name": "Fileja" + }, + { + "numeric_value": 32766, + "missing": true, + "id": 32766, + "name": "Skipped" + }, + { + "numeric_value": 32767, + "missing": true, + "id": 32767, + "name": "Not asked" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "references": { + "alias": "food_groups", + "notes": "A categorical variable where the missing categories are interspersed throughout the non-missing categories", + "name": "Food groups", + "description": "Four of the five USDA food groups" }, - "result": { - "dimensions": [ - { - "references": { - "alias": "pasta", - "notes": "A categorical variable", - "name": "Shapes of pasta", - "description": "The geometry of pasta" - }, - "derived": false, - "type": { - "ordinal": false, - "class": "categorical", - "categories": [ - { - "numeric_value": 1, - "missing": false, - "id": 1, - "name": "Bucatini" - }, - { - "numeric_value": 2, - "missing": false, - "id": 2, - "name": "Chitarra" - }, - { - "numeric_value": 0, - "missing": false, - "id": 0, - "name": "Boccoli" - }, - { - "numeric_value": 4, - "missing": false, - "id": 4, - "name": "Orecchiette" - }, - { - "numeric_value": 5, - "missing": false, - "id": 5, - "name": "Quadrefiore" - }, - { - "numeric_value": 6, - "missing": false, - "id": 6, - "name": "Fileja" - }, - { - "numeric_value": 32766, - "missing": true, - "id": 32766, - "name": "Skipped" - }, - { - "numeric_value": 32767, - "missing": true, - "id": 32767, - "name": "Not asked" - }, - { - "numeric_value": null, - "missing": true, - "id": -1, - "name": "No Data" - } - ] - } - }, - { - "references": { - "alias": "food_groups", - "notes": "A categorical variable where the missing categories are interspersed throughout the non-missing categories", - "name": "Food groups", - "description": "Four of the five USDA food groups" - }, - "derived": false, - "type": { - "ordinal": false, - "class": "categorical", - "categories": [ - { - "numeric_value": 0, - "missing": false, - "id": 0, - "name": "Vegetables" - }, - { - "numeric_value": 32766, - "missing": true, - "id": 32766, - "name": "Don't know" - }, - { - "numeric_value": 2, - "missing": false, - "id": 2, - "name": "Fruit" - }, - { - "numeric_value": 5, - "missing": false, - "id": 5, - "name": "Grain" - }, - { - "numeric_value": null, - "missing": true, - "id": -1, - "name": "No Data" - }, - { - "numeric_value": 4, - "missing": false, - "id": 4, - "name": "Meat" - }, - { - "numeric_value": 32767, - "missing": true, - "id": 32767, - "name": "Not asked" - } - ] - } - } - ], - "missing": 10, - "measures": { - "count": { - "data": [ - 31, - 0, - 8, - 3, - 0, - 12, - 0, - 31, - 0, - 45, - 27, - 0, - 21, - 0, - 156, - 3, - 217, - 129, - 0, - 105, - 0, - 66, - 0, - 101, - 59, - 0, - 80, - 0, - 92, - 1, - 68, - 47, - 0, - 188, - 0, - 49, - 2, - 51, - 25, - 0, - 23, - 0, - 3, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0 - ], - "n_missing": 10, - "metadata": { - "references": {}, - "derived": true, - "type": { - "integer": true, - "missing_rules": {}, - "missing_reasons": { - "No Data": -1 - }, - "class": "numeric" - } - } - } + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Vegetables" + }, + { + "numeric_value": 32766, + "missing": true, + "id": 32766, + "name": "Don't know" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "Fruit" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "Grain" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "Meat" + }, + { + "numeric_value": 32767, + "missing": true, + "id": 32767, + "name": "Not asked" + } + ] + } + } + ], + "missing": 10, + "measures": { + "count": { + "data": [ + 32, + 0, + 8, + 3, + 0, + 12, + 0, + 33, + 0, + 45, + 27, + 0, + 21, + 0, + 158, + 3, + 218, + 132, + 0, + 105, + 0, + 66, + 0, + 102, + 60, + 0, + 82, + 0, + 93, + 1, + 70, + 47, + 0, + 190, + 0, + 49, + 2, + 51, + 25, + 0, + 23, + 0, + 3, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 10, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 }, - "element": "crunch:cube", - "counts": [ - 31, - 0, - 8, - 3, - 0, - 12, - 0, - 31, - 0, - 45, - 27, - 0, - 21, - 0, - 156, - 3, - 217, - 129, - 0, - 105, - 0, - 66, - 0, - 101, - 59, - 0, - 80, - 0, - 92, - 1, - 68, - 47, - 0, - 188, - 0, - 49, - 2, - 51, - 25, - 0, - 23, - 0, - 3, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0 - ], - "n": 1644 + "class": "numeric" + } } - } + } + }, + "element": "crunch:cube", + "counts": [ + 32, + 0, + 8, + 3, + 0, + 12, + 0, + 33, + 0, + 45, + 27, + 0, + 21, + 0, + 158, + 3, + 218, + 132, + 0, + 105, + 0, + 66, + 0, + 102, + 60, + 0, + 82, + 0, + 93, + 1, + 70, + 47, + 0, + 190, + 0, + 49, + 2, + 51, + 25, + 0, + 23, + 0, + 3, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n": 1662 + } } \ No newline at end of file diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 03996713e..72e2ec538 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -54,6 +54,8 @@ from .fixtures import HUFFPOST_ACTIONS_X_HOUSEHOLD from .fixtures import GENDER_X_WEIGHT +from . import assert_scale_means_equal + class TestCrunchCube(TestCase): def test_crunch_cube_loads_data(self): @@ -1540,80 +1542,80 @@ def test_ca_subvar_x_cat_hs_counts_prune(self): def test_means_univariate_cat(self): cube = CrunchCube(ECON_BLAME_WITH_HS) - expected = 2.1735205616850553 + expected = [[np.array(2.1735205616850553)]] actual = cube.scale_means() - self.assertEqual(actual, expected) + assert_scale_means_equal(actual, expected) def test_means_bivariate_cat(self): cube = CrunchCube(ECON_BLAME_X_IDEOLOGY_ROW_HS) - expected = np.array([ + expected = [[np.array([ 2.19444444, 2.19230769, 2.26666667, 1.88990826, 1.76363636, 3.85, - ]) + ])]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_means_cat_x_mr(self): cube = CrunchCube(FRUIT_X_PETS) - expected = np.array([1.7, 1.6470588, 1.6842105]) + expected = [[np.array([1.7, 1.6470588, 1.6842105]), None]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_means_mr_x_cat(self): cube = CrunchCube(PETS_X_FRUIT) - expected = np.array([1.7, 1.6470588, 1.6842105]) + expected = [[None, np.array([1.7, 1.6470588, 1.6842105])]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_means_cat_array_cat_dim_first(self): cube = CrunchCube(PETS_ARRAY_CAT_FIRST) - expected = np.array([1.44333002, 1.48049069, 1.57881177]) + expected = [[None, np.array([1.44333002, 1.48049069, 1.57881177])]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_means_cat_array_subvar_dim_first(self): cube = CrunchCube(PETS_ARRAY_SUBVAR_FIRST) - expected = np.array([1.44333002, 1.48049069, 1.57881177]) + expected = [[np.array([1.44333002, 1.48049069, 1.57881177]), None]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_means_cat_x_cat_arr_fruit_first(self): cube = CrunchCube(FRUIT_X_PETS_ARRAY) - expected = np.array([ - [1.48, 1.42857143, 1.52173913], - [1.40740741, 1.53846154, 1.55319149], - ]) + expected = [ + [None, np.array([1.48, 1.42857143, 1.52173913])], + [None, np.array([1.40740741, 1.53846154, 1.55319149])], + ] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_means_cat_x_cat_arr_subvars_first(self): cube = CrunchCube(FRUIT_X_PETS_ARRAY_SUBVARS_FIRST) - expected = np.array([ - [1.71111111, 1.6, 1.65625], - [1.64705882, 1.7, 1.68421053], - ]) + expected = [ + [np.array([1.71111111, 1.6, 1.65625]), None], + [np.array([1.64705882, 1.7, 1.68421053]), None], + ] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_means_cat_x_cat_arr_pets_first(self): cube = CrunchCube(FRUIT_X_PETS_ARRAY_PETS_FIRST) - expected = np.array([ - [1.48, 1.40740741], - [1.42857143, 1.53846154], - [1.52173913, 1.55319149], - ]) + expected = [ + [np.array([1.48, 1.40740741]), np.array([1.71111111, 1.64705882])], + [np.array([1.42857143, 1.53846154]), np.array([1.6, 1.7])], + [np.array([1.52173913, 1.55319149]), np.array([1.65625, 1.68421053])], + ] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_means_with_null_values(self): cube = CrunchCube(SCALE_WITH_NULL_VALUES) - expected = np.array([1.2060688, 1.0669344, 1.023199]) + expected = [[np.array([1.2060688, 1.0669344, 1.023199]), None]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_values_services(self): cube = CrunchCube(VALUE_SERVICES) diff --git a/tests/integration/test_scale_means.py b/tests/integration/test_scale_means.py index 5bab73905..c815c4104 100644 --- a/tests/integration/test_scale_means.py +++ b/tests/integration/test_scale_means.py @@ -12,70 +12,75 @@ from .fixtures.cubes.scale_means import MR_X_CAT from .fixtures.cubes.scale_means import UNIVARIATE_CAT +from . import assert_scale_means_equal + def test_ca_cat_x_items(): cube = CrunchCube(CA_CAT_X_ITEMS) - expected = np.array([1.50454821, 3.11233766, 3.35788192, 3.33271833]) + expected = [[np.array([1.50454821, 3.11233766, 3.35788192, 3.33271833]), None]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_ca_items_x_cat(): cube = CrunchCube(CA_ITEMS_X_CAT) - expected = np.array([1.50454821, 3.11233766, 3.35788192, 3.33271833]) + expected = [[None, np.array([1.50454821, 3.11233766, 3.35788192, 3.33271833])]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_ca_x_mr(): cube = CrunchCube(CA_X_MR) - expected = np.array([ - [1.29787234, 1.8 , 1.48730964, np.nan], # noqa - [3.31746032, 3.10743802, 3.09976976, np.nan], - [3.31205674, 3.23913043, 3.37745455, np.nan], - [3.53676471, 3.34814815, 3.3147877 , np.nan], # noqa - ]) + expected = [ + [np.array([1.29787234, 1.8 , 1.48730964, np.nan]), None], # noqa + [np.array([3.31746032, 3.10743802, 3.09976976, np.nan]), None], # noqa + [np.array([3.31205674, 3.23913043, 3.37745455, np.nan]), None], # noqa + [np.array([3.53676471, 3.34814815, 3.3147877 , np.nan]), None], # noqa + ] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_cat_x_ca_cat_x_items(): cube = CrunchCube(CAT_X_CA_CAT_X_ITEMS) - expected = np.array([ - [1.34545455, 2.46938776, 2.7037037 , 2.65454545], # noqa - [1.41935484, 3.25663717, 3.48 , 3.58536585], # noqa - [1.49429038, 3.44905009, 3.59344262, 3.53630363], - [1.43365696, 3.02816901, 3.37987013, 3.32107023], - [1.22670025, 2.49473684, 2.79848866, 2.78987342], - [2.53061224, 3.68421053, 3.9862069 , 4.03472222], # noqa - ]) + expected = [ + [np.array([1.34545455, 2.46938776, 2.7037037 , 2.65454545]), None], # noqa + [np.array([1.41935484, 3.25663717, 3.48 , 3.58536585]), None], # noqa + [np.array([1.49429038, 3.44905009, 3.59344262, 3.53630363]), None], # noqa + [np.array([1.43365696, 3.02816901, 3.37987013, 3.32107023]), None], # noqa + [np.array([1.22670025, 2.49473684, 2.79848866, 2.78987342]), None], # noqa + [np.array([2.53061224, 3.68421053, 3.9862069 , 4.03472222]), None], # noqa + ] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_cat_x_cat(): cube = CrunchCube(CAT_X_CAT) - expected = np.array([2.61411765, 2.34285714, 2.33793103, 3.38461538]) + expected = [[ + np.array([2.6009281, 2.3522267, 2.3197279, 3.3949192]), + np.array([1.43636364, 2.45238095, 2.4730832 , 2.68387097, 2.8375, 2.15540541]), + ]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_cat_x_mr(): cube = CrunchCube(CAT_X_MR) - expected = np.array([2.45070423, 2.54471545, 2.54263006, np.nan]) + expected = [[np.array([2.45070423, 2.54471545, 2.54263006, np.nan]), None]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_mr_x_cat(): cube = CrunchCube(MR_X_CAT) - expected = np.array([2.45070423, 2.54471545, 2.54263006, np.nan]) + expected = [[None, np.array([2.45070423, 2.54471545, 2.54263006, np.nan])]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) def test_univariate_cat(): cube = CrunchCube(UNIVARIATE_CAT) - expected = np.array([2.6865854]) + expected = [[np.array([2.6865854])]] actual = cube.scale_means() - np.testing.assert_almost_equal(actual, expected) + assert_scale_means_equal(actual, expected) From e4a36029d1f18bf93a336c60727efe4e7960df0d Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 2 Aug 2018 18:53:32 +0200 Subject: [PATCH 07/14] scale_means for cube slices --- src/cr/cube/cube_slice.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cr/cube/cube_slice.py b/src/cr/cube/cube_slice.py index c51cb0cd1..e8e8ba43c 100644 --- a/src/cr/cube/cube_slice.py +++ b/src/cr/cube/cube_slice.py @@ -245,3 +245,6 @@ def is_double_mr(self): properties of the slices. ''' return self.dim_types == ['multiple_response'] * 2 + + def scale_means(self): + return self._cube.scale_means()[self._index] From eeaf33ab7843c6391480846dd695da276a8f70f6 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 2 Aug 2018 22:24:44 +0200 Subject: [PATCH 08/14] Fix accounting for NaNs in scale means --- src/cr/cube/measures/scale_means.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/cr/cube/measures/scale_means.py b/src/cr/cube/measures/scale_means.py index 59ac11d26..4b2de050a 100644 --- a/src/cr/cube/measures/scale_means.py +++ b/src/cr/cube/measures/scale_means.py @@ -24,20 +24,9 @@ def data(self): means.append(product) continue - # Eliminate missings - nans = np.isnan(product) - if len(product.shape) == 1: - product = product[~nans] - table = table[~nans] - else: - ind_rows = ~nans.any(axis=1) - ind_cols = ~nans[ind_rows].any(axis=0) - product = product[ind_rows][:, ind_cols] - table = table[ind_rows][:, ind_cols] - # Calculate means - num = np.sum(product, axis) - den = np.sum(table, axis) + num = np.sum(product[self.valid_indices(axis)], axis) + den = np.sum(table[self.valid_indices(axis)], axis) mean = num / den if not isinstance(mean, np.ndarray): mean = np.array([mean]) @@ -56,6 +45,16 @@ def values(self): for dim in self._slice.dimensions ] + def valid_indices(self, axis): + return [ + ( + ~np.isnan(np.array(dim.values)) + if dim.values and any(~np.isnan(dim.values)) and axis == i else + slice(None) + ) + for i, dim in enumerate(self._slice.dimensions) + ] + def _inner_prods(self, contents, values): products = [] for i, numeric in enumerate(values): From d1d2a98ce6f81434be9241496d4ce670497d1e44 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Fri, 3 Aug 2018 09:57:42 +0200 Subject: [PATCH 09/14] Support H&S in ScaleMeans --- src/cr/cube/crunch_cube.py | 16 ++++++++++++++-- src/cr/cube/cube_slice.py | 4 ++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 34a1f7161..b6fcfc472 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1126,9 +1126,21 @@ def pvals(self, weighted=True, prune=False, hs_dims=None): return res - def scale_means(self): + def scale_means(self, hs_dims=None): '''Get cube means.''' - return [ScaleMeans(slice_).data for slice_ in self.slices] + inserted_indices = self.inserted_hs_indices() + slices_means = [ScaleMeans(slice_).data for slice_ in self.slices] + for scale_means in slices_means: + if scale_means[0] is not None and 0 in hs_dims and inserted_indices[0]: + for i in inserted_indices[0]: + scale_means[0] = np.insert(scale_means[0], i, np.nan) + if len(scale_means) == 1 or len(inserted_indices) == 1: + continue + if scale_means[1] is not None and 1 in hs_dims and inserted_indices[1]: + for i in inserted_indices[1]: + scale_means[1] = np.insert(scale_means[1], i, np.nan) + + return slices_means def get_slices(self, ca_as_0th=False): if self.ndim < 3 and not ca_as_0th: diff --git a/src/cr/cube/cube_slice.py b/src/cr/cube/cube_slice.py index e8e8ba43c..ae08debb6 100644 --- a/src/cr/cube/cube_slice.py +++ b/src/cr/cube/cube_slice.py @@ -246,5 +246,5 @@ def is_double_mr(self): ''' return self.dim_types == ['multiple_response'] * 2 - def scale_means(self): - return self._cube.scale_means()[self._index] + def scale_means(self, hs_dims=None): + return self._cube.scale_means(hs_dims)[self._index] From 96afccd3f2d2a693e9a8fd2c54b3ba9abc968642 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Fri, 3 Aug 2018 12:54:01 +0200 Subject: [PATCH 10/14] Fix bug when hs_dims not provided --- src/cr/cube/crunch_cube.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index b6fcfc472..920a192ef 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1128,8 +1128,12 @@ def pvals(self, weighted=True, prune=False, hs_dims=None): def scale_means(self, hs_dims=None): '''Get cube means.''' - inserted_indices = self.inserted_hs_indices() slices_means = [ScaleMeans(slice_).data for slice_ in self.slices] + + if not hs_dims: + return slices_means + + inserted_indices = self.inserted_hs_indices() for scale_means in slices_means: if scale_means[0] is not None and 0 in hs_dims and inserted_indices[0]: for i in inserted_indices[0]: From a33caa17ab513616bbba7df4c80313789a9e1731 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Fri, 3 Aug 2018 17:26:58 +0200 Subject: [PATCH 11/14] Bring coverage to 100% --- src/cr/cube/crunch_cube.py | 7 +- tests/integration/test_scale_means.py | 102 ++++++++++++++++++++++++++ tests/unit/test_crunch_cube.py | 30 ++++++++ tests/unit/test_cube_slice.py | 26 +++++++ 4 files changed, 159 insertions(+), 6 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 920a192ef..4a7a58161 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1082,9 +1082,6 @@ def zscore(self, weighted=True, prune=False, hs_dims=None): if hs_dims: res = self._intersperse_hs_in_std_res(hs_dims, res) - arr = self.as_array(include_transforms_for_dims=hs_dims) - if isinstance(arr, np.ma.core.MaskedArray): - res = np.ma.masked_array(res, mask=arr.mask) if prune: arr = self.as_array( @@ -1130,7 +1127,7 @@ def scale_means(self, hs_dims=None): '''Get cube means.''' slices_means = [ScaleMeans(slice_).data for slice_ in self.slices] - if not hs_dims: + if not hs_dims or self.ndim == 1: return slices_means inserted_indices = self.inserted_hs_indices() @@ -1138,8 +1135,6 @@ def scale_means(self, hs_dims=None): if scale_means[0] is not None and 0 in hs_dims and inserted_indices[0]: for i in inserted_indices[0]: scale_means[0] = np.insert(scale_means[0], i, np.nan) - if len(scale_means) == 1 or len(inserted_indices) == 1: - continue if scale_means[1] is not None and 1 in hs_dims and inserted_indices[1]: for i in inserted_indices[1]: scale_means[1] = np.insert(scale_means[1], i, np.nan) diff --git a/tests/integration/test_scale_means.py b/tests/integration/test_scale_means.py index c815c4104..28db9c135 100644 --- a/tests/integration/test_scale_means.py +++ b/tests/integration/test_scale_means.py @@ -11,6 +11,9 @@ from .fixtures.cubes.scale_means import CAT_X_MR from .fixtures.cubes.scale_means import MR_X_CAT from .fixtures.cubes.scale_means import UNIVARIATE_CAT +from .fixtures import ECON_BLAME_X_IDEOLOGY_ROW_HS +from .fixtures import ECON_BLAME_WITH_HS +from .fixtures import ECON_BLAME_X_IDEOLOGY_ROW_AND_COL_HS from . import assert_scale_means_equal @@ -20,6 +23,9 @@ def test_ca_cat_x_items(): expected = [[np.array([1.50454821, 3.11233766, 3.35788192, 3.33271833]), None]] actual = cube.scale_means() assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) def test_ca_items_x_cat(): @@ -27,6 +33,9 @@ def test_ca_items_x_cat(): expected = [[None, np.array([1.50454821, 3.11233766, 3.35788192, 3.33271833])]] actual = cube.scale_means() assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) def test_ca_x_mr(): @@ -39,6 +48,9 @@ def test_ca_x_mr(): ] actual = cube.scale_means() assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) def test_cat_x_ca_cat_x_items(): @@ -53,6 +65,15 @@ def test_cat_x_ca_cat_x_items(): ] actual = cube.scale_means() assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) + + actual = cube.scale_means(hs_dims=[0, 1]) + assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means(hs_dims=[0, 1]) + assert_scale_means_equal(actual, expected[0]) def test_cat_x_cat(): @@ -63,6 +84,9 @@ def test_cat_x_cat(): ]] actual = cube.scale_means() assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) def test_cat_x_mr(): @@ -70,6 +94,9 @@ def test_cat_x_mr(): expected = [[np.array([2.45070423, 2.54471545, 2.54263006, np.nan]), None]] actual = cube.scale_means() assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) def test_mr_x_cat(): @@ -77,6 +104,9 @@ def test_mr_x_cat(): expected = [[None, np.array([2.45070423, 2.54471545, 2.54263006, np.nan])]] actual = cube.scale_means() assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) def test_univariate_cat(): @@ -84,3 +114,75 @@ def test_univariate_cat(): expected = [[np.array([2.6865854])]] actual = cube.scale_means() assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) + +def test_cat_x_cat_with_hs(): + cube = CrunchCube(ECON_BLAME_X_IDEOLOGY_ROW_HS) + + # Test without H&S + expected = [[ + np.array([2.19444444, 2.19230769, 2.26666667, 1.88990826, 1.76363636, 3.85]), + np.array([3.87368421, 2.51767677, 3.38429752, 3.66666667, 4.13235294]), + ]] + actual = cube.scale_means() + assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) + + # Test with H&S + expected = [[ + np.array([2.19444444, 2.19230769, np.nan, 2.26666667, 1.88990826, 1.76363636, 3.85]), + np.array([3.87368421, 2.51767677, 3.38429752, 3.66666667, 4.13235294]), + ]] + actual = cube.scale_means(hs_dims=[0, 1]) + assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means(hs_dims=[0, 1]) + assert_scale_means_equal(actual, expected[0]) + +def test_univariate_with_hs(): + cube = CrunchCube(ECON_BLAME_WITH_HS) + + # Test without H&S + expected = [[np.array([2.17352056])]] + actual = cube.scale_means() + assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) + + # Test with H&S + expected = [[np.array([2.17352056])]] + actual = cube.scale_means(hs_dims=[0]) + assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means(hs_dims=[0, 1]) + assert_scale_means_equal(actual, expected[0]) + +def test_cat_x_cat_with_hs_on_both_dims(): + cube = CrunchCube(ECON_BLAME_X_IDEOLOGY_ROW_AND_COL_HS) + + # Test without H&S + expected =[[ + np.array([2.19444444, 2.19230769, 2.26666667, 1.88990826, 1.76363636, 3.85]), + np.array([3.87368421, 2.51767677, 3.38429752, 3.66666667, 4.13235294]), + ]] + actual = cube.scale_means() + assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means() + assert_scale_means_equal(actual, expected[0]) + + # Test with H&S + expected =[[ + np.array([2.19444444, 2.19230769, np.nan, 2.26666667, 1.88990826, 1.76363636, 3.85]), + np.array([3.87368421, 2.51767677, 3.38429752, np.nan, 3.66666667, 4.13235294]), + ]] + actual = cube.scale_means(hs_dims=[0, 1]) + assert_scale_means_equal(actual, expected) + # Test for cube slices + actual = cube.slices[0].scale_means(hs_dims=[0, 1]) + assert_scale_means_equal(actual, expected[0]) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 5874f80cb..0375409d8 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -901,3 +901,33 @@ def test_axis_allowed_mr_x_mr_x_cat(self): assert is_allowed(2) assert is_allowed(None) assert is_allowed((1, 2)) + + @patch('cr.cube.crunch_cube.CrunchCube.all_dimensions', [ + Mock(type='categorical_array', is_selections=False), + Mock(type='categorical', is_selections=True), + ]) + def test_ca_dim_ind_is_zero(self): + cc = CrunchCube({}) + actual = cc.ca_dim_ind + expected = 0 + assert actual == expected + + @patch('cr.cube.crunch_cube.CrunchCube.all_dimensions', [ + Mock(type='categorical', is_selections=False), + Mock(type='categorical_array', is_selections=True), + ]) + def test_ca_dim_ind_is_one(self): + cc = CrunchCube({}) + actual = cc.ca_dim_ind + expected = 1 + assert actual == expected + + @patch('cr.cube.crunch_cube.CrunchCube.all_dimensions', [ + Mock(type='categorical', is_selections=False), + Mock(type='categorical', is_selections=True), + ]) + def test_ca_dim_ind_is_none(self): + cc = CrunchCube({}) + actual = cc.ca_dim_ind + expected = None + assert actual == expected \ No newline at end of file diff --git a/tests/unit/test_cube_slice.py b/tests/unit/test_cube_slice.py index 59be09294..7003bb615 100644 --- a/tests/unit/test_cube_slice.py +++ b/tests/unit/test_cube_slice.py @@ -347,3 +347,29 @@ def test_is_double_mr(self): cs = CubeSlice(cube, 0) # Not double MR because the 0th dims is 'just' tabs assert not cs.is_double_mr + + def test_ca_dim_ind(self): + '''Test if slice are double MRs.''' + cube = Mock() + + cube.ca_dim_ind = None + cs = CubeSlice(cube, 0) + assert cs.ca_dim_ind == None + + cube.ca_dim_ind = 0 + cs = CubeSlice(cube, 0) + assert cs.ca_dim_ind == 0 + + cube.ca_dim_ind = 1 + cs = CubeSlice(cube, 0) + assert cs.ca_dim_ind == 1 + + cube.ndim = 3 + cube.ca_dim_ind = 1 + cs = CubeSlice(cube, 0) + assert cs.ca_dim_ind == 0 + + cube.ndim = 3 + cube.ca_dim_ind = 0 + cs = CubeSlice(cube, 0) + assert cs.ca_dim_ind == None From 984054c824884dd4e962f1234eb6b23fbd7b869f Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Mon, 6 Aug 2018 13:36:11 +0200 Subject: [PATCH 12/14] Add failing test for ScaleMeans with pruning --- tests/integration/test_scale_means.py | 55 +++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/integration/test_scale_means.py b/tests/integration/test_scale_means.py index 28db9c135..6e0a81013 100644 --- a/tests/integration/test_scale_means.py +++ b/tests/integration/test_scale_means.py @@ -14,6 +14,8 @@ from .fixtures import ECON_BLAME_X_IDEOLOGY_ROW_HS from .fixtures import ECON_BLAME_WITH_HS from .fixtures import ECON_BLAME_X_IDEOLOGY_ROW_AND_COL_HS +from .fixtures import CA_X_MR_HS +from .fixtures import CAT_X_CAT_PRUNING_HS from . import assert_scale_means_equal @@ -186,3 +188,56 @@ def test_cat_x_cat_with_hs_on_both_dims(): # Test for cube slices actual = cube.slices[0].scale_means(hs_dims=[0, 1]) assert_scale_means_equal(actual, expected[0]) + +def test_ca_x_mr_with_hs_and_pruning(): + cube = CrunchCube(CA_X_MR_HS) + expected = [ + [np.array([2.50818336, 2.56844883, 2.90251939, np.nan]), None], + [np.array([2.78385708, 2.69292009, 3.11594714, np.nan]), None], + [np.array([np.nan, np.nan, np.nan, np.nan]), None], + ] + actual = cube.scale_means() + assert_scale_means_equal(actual, expected) + expected = [ + [np.array([2.50818336, 2.56844883, 2.90251939]), None], + [np.array([2.78385708, 2.69292009, 3.11594714]), None], + [np.array([]), None], + ] + actual = cube.scale_means(prune=True) + assert_scale_means_equal(actual, expected) + actual = cube.scale_means(prune=True, hs_dims=[0, 1]) + assert_scale_means_equal(actual, expected) + +def test_cat_x_cat_pruning_and_hs(): + cube = CrunchCube(CAT_X_CAT_PRUNING_HS) + expected = [[ + np.array([1.57933884, 2.10618401, 2.30460074, np.nan, 2.34680135]), + np.array([1.74213625, 1.97 , 2.45356177, 2.11838791, np.nan, 2.]), + ]] + actual = cube.scale_means() + + # Just H&S + assert_scale_means_equal(actual, expected) + expected = [[ + np.array([1.57933884, np.nan, 2.10618401, 2.30460074, np.nan, 2.34680135]), + np.array([1.74213625, np.nan, 1.97 , 2.45356177, 2.11838791, np.nan, 2.]), + ]] + actual = cube.scale_means(hs_dims=[0, 1]) + + # Just pruning + assert_scale_means_equal(actual, expected) + expected = [[ + np.array([1.57933884, 2.10618401, 2.30460074, 2.34680135]), + np.array([1.74213625, 1.97 , 2.45356177, 2.11838791, 2.]), + ]] + actual = cube.scale_means(prune=True) + assert_scale_means_equal(actual, expected) + + # Pruning and H&S + assert_scale_means_equal(actual, expected) + expected = [[ + np.array([1.57933884, np.nan, 2.10618401, 2.30460074, 2.34680135]), + np.array([1.74213625, np.nan, 1.97 , 2.45356177, 2.11838791, 2.]), + ]] + actual = cube.scale_means(hs_dims=[0, 1], prune=True) + assert_scale_means_equal(actual, expected) From 8b269f8be19b6d0f12c407cc9f3bf51140defd50 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Mon, 6 Aug 2018 13:36:50 +0200 Subject: [PATCH 13/14] Implement pruning for scale means --- src/cr/cube/crunch_cube.py | 35 +++++++++++++++++++++++------------ src/cr/cube/cube_slice.py | 4 ++-- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 4a7a58161..071bc9f7d 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1123,22 +1123,33 @@ def pvals(self, weighted=True, prune=False, hs_dims=None): return res - def scale_means(self, hs_dims=None): + def scale_means(self, hs_dims=None, prune=False): '''Get cube means.''' slices_means = [ScaleMeans(slice_).data for slice_ in self.slices] - if not hs_dims or self.ndim == 1: - return slices_means - - inserted_indices = self.inserted_hs_indices() - for scale_means in slices_means: - if scale_means[0] is not None and 0 in hs_dims and inserted_indices[0]: - for i in inserted_indices[0]: - scale_means[0] = np.insert(scale_means[0], i, np.nan) - if scale_means[1] is not None and 1 in hs_dims and inserted_indices[1]: - for i in inserted_indices[1]: - scale_means[1] = np.insert(scale_means[1], i, np.nan) + if hs_dims and self.ndim > 1: + # Intersperse scale means with nans if H&S specified + inserted_indices = self.inserted_hs_indices() + for scale_means in slices_means: + if scale_means[0] is not None and 0 in hs_dims and inserted_indices[0]: + for i in inserted_indices[0]: + scale_means[0] = np.insert(scale_means[0], i, np.nan) + if scale_means[1] is not None and 1 in hs_dims and inserted_indices[1]: + for i in inserted_indices[1]: + scale_means[1] = np.insert(scale_means[1], i, np.nan) + if prune: + # Apply pruning + arr = self.as_array(include_transforms_for_dims=hs_dims, prune=True) + if isinstance(arr, np.ma.core.MaskedArray): + mask = arr.mask + for i, scale_means in enumerate(slices_means): + if scale_means[0] is not None: + row_mask = mask.all(axis=0) if self.ndim < 3 else mask.all(axis=1)[i] + scale_means[0] = scale_means[0][~row_mask] + if self.ndim > 1 and scale_means[1] is not None: + col_mask = mask.all(axis=1) if self.ndim < 3 else mask.all(axis=2)[i] + scale_means[1] = scale_means[1][~col_mask] return slices_means def get_slices(self, ca_as_0th=False): diff --git a/src/cr/cube/cube_slice.py b/src/cr/cube/cube_slice.py index ae08debb6..3391558bb 100644 --- a/src/cr/cube/cube_slice.py +++ b/src/cr/cube/cube_slice.py @@ -246,5 +246,5 @@ def is_double_mr(self): ''' return self.dim_types == ['multiple_response'] * 2 - def scale_means(self, hs_dims=None): - return self._cube.scale_means(hs_dims)[self._index] + def scale_means(self, hs_dims=None, prune=False): + return self._cube.scale_means(hs_dims, prune)[self._index] From 01520170bc27a2e2d612f6bf8e111ccf4478c25d Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Wed, 8 Aug 2018 08:42:49 +0200 Subject: [PATCH 14/14] Correct expectations and fix H&S in scale means --- src/cr/cube/crunch_cube.py | 18 ++++++++++++------ tests/integration/test_scale_means.py | 8 ++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 071bc9f7d..6a0685836 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1128,14 +1128,20 @@ def scale_means(self, hs_dims=None, prune=False): slices_means = [ScaleMeans(slice_).data for slice_ in self.slices] if hs_dims and self.ndim > 1: - # Intersperse scale means with nans if H&S specified - inserted_indices = self.inserted_hs_indices() + # Intersperse scale means with nans if H&S specified, and 2D. No + # need to modify 1D, as only one mean will ever be inserted. + inserted_indices = self.inserted_hs_indices()[-2:] for scale_means in slices_means: - if scale_means[0] is not None and 0 in hs_dims and inserted_indices[0]: - for i in inserted_indices[0]: - scale_means[0] = np.insert(scale_means[0], i, np.nan) - if scale_means[1] is not None and 1 in hs_dims and inserted_indices[1]: + # Scale means 0 corresonds to the column dimension (is + # calculated by using its values). The result of it, however, + # is a row. That's why we need to check the insertions on the + # row dim (inserted columns). + if scale_means[0] is not None and 1 in hs_dims and inserted_indices[1]: for i in inserted_indices[1]: + scale_means[0] = np.insert(scale_means[0], i, np.nan) + # Scale means 1 is a column, so we need to check for row insertions. + if scale_means[1] is not None and 0 in hs_dims and inserted_indices[0]: + for i in inserted_indices[0]: scale_means[1] = np.insert(scale_means[1], i, np.nan) if prune: diff --git a/tests/integration/test_scale_means.py b/tests/integration/test_scale_means.py index 6e0a81013..7ab2964d9 100644 --- a/tests/integration/test_scale_means.py +++ b/tests/integration/test_scale_means.py @@ -136,8 +136,8 @@ def test_cat_x_cat_with_hs(): # Test with H&S expected = [[ - np.array([2.19444444, 2.19230769, np.nan, 2.26666667, 1.88990826, 1.76363636, 3.85]), - np.array([3.87368421, 2.51767677, 3.38429752, 3.66666667, 4.13235294]), + np.array([2.19444444, 2.19230769, 2.26666667, 1.88990826, 1.76363636, 3.85]), + np.array([3.87368421, 2.51767677, np.nan, 3.38429752, 3.66666667, 4.13235294]), ]] actual = cube.scale_means(hs_dims=[0, 1]) assert_scale_means_equal(actual, expected) @@ -180,8 +180,8 @@ def test_cat_x_cat_with_hs_on_both_dims(): # Test with H&S expected =[[ - np.array([2.19444444, 2.19230769, np.nan, 2.26666667, 1.88990826, 1.76363636, 3.85]), - np.array([3.87368421, 2.51767677, 3.38429752, np.nan, 3.66666667, 4.13235294]), + np.array([2.19444444, 2.19230769, 2.26666667, np.nan, 1.88990826, 1.76363636, 3.85]), + np.array([3.87368421, 2.51767677, np.nan, 3.38429752, 3.66666667, 4.13235294]), ]] actual = cube.scale_means(hs_dims=[0, 1]) assert_scale_means_equal(actual, expected)