From ecb6dc6ada4bd7f3a0e77088cff92212bb1152c8 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Tue, 30 Oct 2018 12:05:34 -0700 Subject: [PATCH] test: add xfail for #161344294 * Add single-cube fixture that triggers "mask/data size mismatch" error. * Add integration test that uses the fixture to trigger that error. --- tests/integration/fixtures/__init__.py | 3 +- .../fixtures/cubes/cat-x-mr-sentry.json | 286 ++++++++++++++++++ tests/integration/test_crunch_cube.py | 22 +- 3 files changed, 306 insertions(+), 5 deletions(-) create mode 100644 tests/integration/fixtures/cubes/cat-x-mr-sentry.json diff --git a/tests/integration/fixtures/__init__.py b/tests/integration/fixtures/__init__.py index e22ebf952..4686b4cbc 100644 --- a/tests/integration/fixtures/__init__.py +++ b/tests/integration/fixtures/__init__.py @@ -36,10 +36,11 @@ def _load(cube_file): CA_SUBVAR_HS_X_MR_X_CA_CAT = _load('ca-subvar-hs-x-mr-x-ca-cat.json') CAT_X_NUM_X_DATETIME = _load('cat-x-num-x-datetime.json') SIMPLE_MR = _load('simple-mr.json') -CAT_X_MR_SIMPLE = _load('cat-x-mr.json') CAT_X_MR_PRUNED_ROW = _load('cat-x-mr-pruned-row.json') CAT_X_MR_PRUNED_COL = _load('cat-x-mr-pruned-col.json') CAT_X_MR_PRUNED_ROW_COL = _load('cat-x-mr-pruned-row-col.json') +CAT_X_MR_SIMPLE = _load('cat-x-mr.json') +CAT_X_MR_SENTRY = _load('cat-x-mr-sentry.json') MR_X_CAT_PRUNED_COL = _load('mr-x-cat-pruned-col.json') MR_X_CAT_PRUNED_ROW = _load('mr-x-cat-pruned-row.json') MR_X_CAT_PRUNED_ROW_COL = _load('mr-x-cat-pruned-row-col.json') diff --git a/tests/integration/fixtures/cubes/cat-x-mr-sentry.json b/tests/integration/fixtures/cubes/cat-x-mr-sentry.json new file mode 100644 index 000000000..ae1403f55 --- /dev/null +++ b/tests/integration/fixtures/cubes/cat-x-mr-sentry.json @@ -0,0 +1,286 @@ +{ + "result": { + "counts": [ + 0, + 108, + 1, + 14, + 94, + 1, + 94, + 14, + 1, + 192, + 791, + 41, + 406, + 577, + 41, + 385, + 598, + 41 + ], + "dimensions": [ + { + "derived": true, + "references": { + "alias": "wealth_rc1", + "description": "", + "name": "Emerging Affluent", + "notes": "" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Emerging Affluent", + "numeric_value": null + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + }, + { + "derived": true, + "references": { + "alias": "localincome_hk_rc", + "description": "Monthly household income", + "name": "Monthly household income (NET)", + "notes": "", + "subreferences": [ + { + "alias": "localincome_hk_rc_1", + "name": "Low (below HKD 25,000)" + }, + { + "alias": "localincome_hk_rc_2", + "name": "Mid (HKD 25,000 - 49,999)" + }, + { + "alias": "localincome_hk_rc_3", + "name": "High (HKD 50,000 or above)" + } + ] + }, + "type": { + "class": "enum", + "elements": [ + { + "id": 1, + "missing": false, + "value": { + "derived": false, + "id": "0001", + "references": { + "alias": "localincome_hk_rc_1", + "name": "Low (below HKD 25,000)" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Selected", + "numeric_value": 1, + "selected": true + }, + { + "id": 0, + "missing": false, + "name": "Other", + "numeric_value": 0 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + } + }, + { + "id": 2, + "missing": false, + "value": { + "derived": false, + "id": "0002", + "references": { + "alias": "localincome_hk_rc_2", + "name": "Mid (HKD 25,000 - 49,999)" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Selected", + "numeric_value": 1, + "selected": true + }, + { + "id": 0, + "missing": false, + "name": "Other", + "numeric_value": 0 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + } + }, + { + "id": 3, + "missing": false, + "value": { + "derived": false, + "id": "0003", + "references": { + "alias": "localincome_hk_rc_3", + "name": "High (HKD 50,000 or above)" + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Selected", + "numeric_value": 1, + "selected": true + }, + { + "id": 0, + "missing": false, + "name": "Other", + "numeric_value": 0 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + } + } + ], + "subtype": { + "class": "variable" + } + } + }, + { + "derived": true, + "references": { + "alias": "localincome_hk_rc", + "description": "Monthly household income", + "name": "Monthly household income (NET)", + "notes": "", + "subreferences": [ + { + "alias": "localincome_hk_rc_1", + "name": "Low (below HKD 25,000)" + }, + { + "alias": "localincome_hk_rc_2", + "name": "Mid (HKD 25,000 - 49,999)" + }, + { + "alias": "localincome_hk_rc_3", + "name": "High (HKD 50,000 or above)" + } + ] + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Selected", + "numeric_value": 1, + "selected": true + }, + { + "id": 0, + "missing": false, + "name": "Other", + "numeric_value": 0 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false, + "subvariables": [ + "0001", + "0002", + "0003" + ] + } + } + ], + "measures": { + "count": { + "data": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 1133 + } + }, + "n": 1133 + } +} diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 22477dabb..7e178eddc 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -9,10 +9,10 @@ from .fixtures import ( ADMIT_X_DEPT_UNWEIGHTED, ADMIT_X_GENDER_WEIGHTED, BINNED, CAT_X_CAT, CAT_X_CAT_FILTERED_POP, CAT_X_CAT_GERMAN_WEIGHTED, - CAT_X_CAT_WITH_EMPTY_COLS, CAT_X_DATETIME, CAT_X_LOGICAL, CAT_X_MR_X_CAT, - CAT_X_NUM_X_DATETIME, CA_SINGLE_CAT, CA_SUBVAR_HS_X_MR_X_CA_CAT, - CA_SUBVAR_X_CAT_HS, CA_X_MR_WEIGHTED_HS, CA_X_SINGLE_CAT, - ECON_BLAME_WITH_HS, ECON_BLAME_X_IDEOLOGY_ROW_HS, + CAT_X_CAT_WITH_EMPTY_COLS, CAT_X_DATETIME, CAT_X_LOGICAL, + CAT_X_MR_SENTRY, CAT_X_MR_X_CAT, CAT_X_NUM_X_DATETIME, CA_SINGLE_CAT, + CA_SUBVAR_HS_X_MR_X_CA_CAT, CA_SUBVAR_X_CAT_HS, CA_X_MR_WEIGHTED_HS, + CA_X_SINGLE_CAT, ECON_BLAME_WITH_HS, ECON_BLAME_X_IDEOLOGY_ROW_HS, ECON_GENDER_X_IDEOLOGY_WEIGHTED, ECON_MEAN_AGE_BLAME_X_GENDER, ECON_MEAN_NO_DIMS, FRUIT_X_PETS, FRUIT_X_PETS_ARRAY, FRUIT_X_PETS_ARRAY_PETS_FIRST, FRUIT_X_PETS_ARRAY_SUBVARS_FIRST, @@ -49,6 +49,20 @@ def it_knows_the_main_axis_of_a_univariate_ca_cube(self): univariate_ca_main_axis = cube.univariate_ca_main_axis assert univariate_ca_main_axis == 1 + @pytest.mark.xfail(reason='WIP', strict=True) + def it_provides_array_for_single_valid_cat_CAT_X_MR(self): + # --we're not sure yet what is distinctive about this cube, but it + # --broke .as_array() in _prune_body with mask size mismatch. The + # --cat dimension has only one valid cat, so that's one thing, the + # --other is that all weighted counts are 0 (zero) even though + # --unweighted counts are non-zero. + cube = CrunchCube(CAT_X_MR_SENTRY) + arr = cube.as_array(prune=True) + # ---!!! not sure what expected form is, maybe [] since all values + # ---are zero and prune=True. It raises mask exception so haven't + # ---ever seen the right answer and can't confidently predict it yet. + np.testing.assert_array_equal(arr, np.array([[0, 0, 0]])) + # fixtures ------------------------------------------------------- @pytest.fixture(params=[