From 41684259accabb625077e291f41900f0f1a9c137 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 28 Aug 2018 13:54:19 +0200 Subject: [PATCH 1/5] Add unit tests for unfiltered/filtered counts --- tests/unit/test_data_table.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tests/unit/test_data_table.py diff --git a/tests/unit/test_data_table.py b/tests/unit/test_data_table.py new file mode 100644 index 000000000..58bccf316 --- /dev/null +++ b/tests/unit/test_data_table.py @@ -0,0 +1,20 @@ +# encoding: utf-8 + +from mock import Mock + +from cr.cube.mixins.data_table import DataTable + + +def test_cube_counts(): + dt = DataTable({}) + assert dt.counts == (None, None) + + fake_count = Mock() + dt = DataTable({'unfiltered': fake_count}) + assert dt.counts == (fake_count, None) + + dt = DataTable({'filtered': fake_count}) + assert dt.counts == (None, fake_count) + + dt = DataTable({'unfiltered': fake_count, 'filtered': fake_count}) + assert dt.counts == (fake_count, fake_count) From 872ebc0843f5b13fc8d6926bc0e159b0e897d57f Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 28 Aug 2018 15:19:30 +0200 Subject: [PATCH 2/5] Add unit tests for population fraction --- tests/unit/test_crunch_cube.py | 39 +++++++++++++++++++++++++++++++++- tests/unit/test_data_table.py | 8 +++---- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 0375409d8..3cd5788a9 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -1,4 +1,6 @@ '''Unit tests for the CrunchCube class.''' + +import pytest from unittest import TestCase from mock import Mock from mock import patch @@ -930,4 +932,39 @@ def test_ca_dim_ind_is_none(self): cc = CrunchCube({}) actual = cc.ca_dim_ind expected = None - assert actual == expected \ No newline at end of file + assert actual == expected + + def test_population_fraction(self): + + # Assert fraction is 1 when none of the counts are specified + cc = CrunchCube({}) + actual = cc.population_fraction + assert actual == 1 + + # Assert fraction is 1 when only some counts are specified + cc = CrunchCube({'result': {'unfiltered': {'unweighted_n': 10}}}) + assert cc.population_fraction == 1 + cc = CrunchCube({'result': {'unfiltered': {'weighted_n': 10}}}) + assert cc.population_fraction == 1 + cc = CrunchCube({'result': {'unfiltered': {'weighted_n': 10, 'unweighted_n': 10}}}) + assert cc.population_fraction == 1 + cc = CrunchCube({'result': {'filtered': {'weighted_n': 10, 'unweighted_n': 10}}}) + assert cc.population_fraction == 1 + + # Assert fraction is calculated when correct counts are specified + cc = CrunchCube({ + 'result': { + 'filtered': {'weighted_n': 5}, + 'unfiltered': {'weighted_n': 10}, + } + }) + assert cc.population_fraction == 0.5 + + # Assert fraction is NaN, when denominator is zero + cc = CrunchCube({ + 'result': { + 'filtered': {'weighted_n': 5}, + 'unfiltered': {'weighted_n': 0}, + } + }) + assert np.isnan(cc.population_fraction) diff --git a/tests/unit/test_data_table.py b/tests/unit/test_data_table.py index 58bccf316..884a63cdb 100644 --- a/tests/unit/test_data_table.py +++ b/tests/unit/test_data_table.py @@ -6,15 +6,15 @@ def test_cube_counts(): - dt = DataTable({}) + dt = DataTable({'result': {}}) assert dt.counts == (None, None) fake_count = Mock() - dt = DataTable({'unfiltered': fake_count}) + dt = DataTable({'result': {'unfiltered': fake_count}}) assert dt.counts == (fake_count, None) - dt = DataTable({'filtered': fake_count}) + dt = DataTable({'result': {'filtered': fake_count}}) assert dt.counts == (None, fake_count) - dt = DataTable({'unfiltered': fake_count, 'filtered': fake_count}) + dt = DataTable({'result': {'unfiltered': fake_count, 'filtered': fake_count}}) assert dt.counts == (fake_count, fake_count) From ecaef1fa5131a3adf4f7612f4245e53a3fd84a79 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 28 Aug 2018 15:24:42 +0200 Subject: [PATCH 3/5] Add integration tests for filtered pop counts --- tests/integration/fixtures/__init__.py | 1 + .../cubes/cat-x-cat-filtered-population.json | 371 ++++++++++++++++++ tests/integration/test_crunch_cube.py | 12 + 3 files changed, 384 insertions(+) create mode 100644 tests/integration/fixtures/cubes/cat-x-cat-filtered-population.json diff --git a/tests/integration/fixtures/__init__.py b/tests/integration/fixtures/__init__.py index aab53a16b..4b713cbb5 100644 --- a/tests/integration/fixtures/__init__.py +++ b/tests/integration/fixtures/__init__.py @@ -142,3 +142,4 @@ def _load(cube_file): CAT_X_CAT_PRUNING_HS = _load('cat-x-cat-pruning-hs.json') CA_ITEMS_X_CA_CAT_X_CAT = _load('ca-items-x-ca-cat-x-cat.json') CAT_X_MR_X_CAT = _load('cat-x-mr-x-cat.json') +CAT_X_CAT_FILTERED_POP = _load('cat-x-cat-filtered-population.json') diff --git a/tests/integration/fixtures/cubes/cat-x-cat-filtered-population.json b/tests/integration/fixtures/cubes/cat-x-cat-filtered-population.json new file mode 100644 index 000000000..5a67ae236 --- /dev/null +++ b/tests/integration/fixtures/cubes/cat-x-cat-filtered-population.json @@ -0,0 +1,371 @@ +{ + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + } + }, + "dimensions": [ + { + "variable": "https://alpha.crunch.io/api/datasets/41fb7b1179bb4c948a63afb1de66303c/variables/000000/" + }, + { + "variable": "https://alpha.crunch.io/api/datasets/41fb7b1179bb4c948a63afb1de66303c/variables/000001/" + } + ], + "weight": null + }, + "query_environment": { + "filter": [ + "https://alpha.crunch.io/api/datasets/41fb7b1179bb4c948a63afb1de66303c/filters/a1c21b17d9fc4664ab87bb7ace4dc139/" + ] + }, + "result": { + "dimensions": [ + { + "derived": false, + "references": { + "alias": "ShutdownBlame", + "description": "If President Obama and the Republicans in Congress do not reach a budget agreement in time to avoid a shutdown of the federal government, who do you think will more to blame--President Obama or the Republican Congress?", + "name": "ShutdownBlame", + "view": { + "show_counts": false, + "column_width": null, + "transform": { + "insertions": [ + { + "function": "subtotal", + "args": [ + 3, + 4 + ], + "name": "HS Both + Neither", + "anchor": 3 + } + ] + }, + "include_missing": false, + "show_numeric_values": false + } + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "President Obama" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "Republicans in Congress" + }, + { + "numeric_value": 3, + "missing": false, + "id": 3, + "name": "Both" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "Neither" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "Not sure" + }, + { + "numeric_value": 8, + "missing": true, + "id": 8, + "name": "Skipped" + }, + { + "numeric_value": 9, + "missing": true, + "id": 9, + "name": "Not Asked" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "derived": false, + "references": { + "alias": "RespondentIdeology", + "view": { + "show_counts": false, + "show_numeric_values": false, + "transform": { + "insertions": [ + { + "function": "subtotal", + "args": [ + 4 + ], + "name": "HS Conservative", + "anchor": 3 + } + ] + }, + "include_missing": false, + "column_width": null + }, + "description": "In general, how would you describe your own political viewpoint?", + "name": "RespondentIdeology" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "missing": false, + "id": 1, + "name": "Very liberal" + }, + { + "numeric_value": 2, + "missing": false, + "id": 2, + "name": "Liberal" + }, + { + "numeric_value": 3, + "missing": false, + "id": 3, + "name": "Moderate" + }, + { + "numeric_value": 4, + "missing": false, + "id": 4, + "name": "Conservative" + }, + { + "numeric_value": 5, + "missing": false, + "id": 5, + "name": "Very Conservative" + }, + { + "numeric_value": 6, + "missing": false, + "id": 6, + "name": "Not sure" + }, + { + "numeric_value": 8, + "missing": true, + "id": 8, + "name": "Skipped" + }, + { + "numeric_value": 9, + "missing": true, + "id": 9, + "name": "Not Asked" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 0, + "measures": { + "count": { + "data": [ + 3, + 14, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 59, + 132, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 6, + 29, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 6, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + } + }, + "n": 254, + "unfiltered": { + "unweighted_n": 1000, + "weighted_n": 1000 + }, + "filtered": { + "unweighted_n": 254, + "weighted_n": 254 + }, + "counts": [ + 3, + 14, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 59, + 132, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 6, + 29, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 6, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "element": "crunch:cube" + } +} \ No newline at end of file diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py index 86222577e..320746d1e 100644 --- a/tests/integration/test_crunch_cube.py +++ b/tests/integration/test_crunch_cube.py @@ -55,6 +55,7 @@ from .fixtures import HUFFPOST_ACTIONS_X_HOUSEHOLD from .fixtures import GENDER_X_WEIGHT from .fixtures import CAT_X_MR_X_CAT +from .fixtures import CAT_X_CAT_FILTERED_POP from . import assert_scale_means_equal @@ -394,6 +395,17 @@ def test_population_counts_cat_x_cat(self): actual = cube.population_counts(9001) np.testing.assert_almost_equal(actual, expected) + def test_filtered_population_counts(self): + cube = CrunchCube(CAT_X_CAT_FILTERED_POP) + expected = np.array([ + [ 300000., 1400000., 0., 0., 0., 0.], + [5900000., 13200000., 0., 0., 0., 0.], + [ 600000., 2900000., 0., 0., 0., 0.], + [ 100000., 100000., 0., 0., 0., 0.], + [ 300000., 600000., 0., 0., 0., 0.]]) + actual = cube.population_counts(100000000) + np.testing.assert_almost_equal(actual, expected) + def test_labels_cat_x_cat_exclude_missing(self): cube = CrunchCube(CAT_X_CAT) expected = [ From 86bdf262e3b510f0a62c2353bd5ae2937bbcd368 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 28 Aug 2018 15:25:19 +0200 Subject: [PATCH 4/5] Implement filtered pop counts fraction --- src/cr/cube/crunch_cube.py | 14 +++++++++++++- src/cr/cube/mixins/data_table.py | 6 ++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index da85ede25..4e5aeb634 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1016,6 +1016,18 @@ def percentages(self, axis=None): ''' return self.proportions(axis) * 100 + @lazyproperty + def population_fraction(self): + try: + unfiltered, filtered = self.counts + num = filtered.get('weighted_n') + den = unfiltered.get('weighted_n') + return num / den + except ZeroDivisionError: + return np.nan + except: + return 1 + def population_counts(self, population_size, weighted=True, include_missing=False, include_transforms_for_dims=None, prune=False): @@ -1048,7 +1060,7 @@ def population_counts(self, population_size, weighted=True, include_missing=include_missing, include_transforms_for_dims=include_transforms_for_dims, prune=prune - ) * population_size + ) * population_size * self.population_fraction def index(self, weighted=True, prune=False): '''Get cube index measurement.''' diff --git a/src/cr/cube/mixins/data_table.py b/src/cr/cube/mixins/data_table.py index b6b106c31..74a2ac089 100644 --- a/src/cr/cube/mixins/data_table.py +++ b/src/cr/cube/mixins/data_table.py @@ -132,6 +132,12 @@ def flat_values(self, weighted, margin=False): def _shape(self): return tuple([dim.shape for dim in self.all_dimensions]) + @lazyproperty + def counts(self): + unfiltered = self._cube['result'].get('unfiltered') + filtered = self._cube['result'].get('filtered') + return unfiltered, filtered + def data(self, weighted, margin=False): '''Get the data in non-flattened shape. From 600f70586e09fdcb9a917a7336638f43f17b4939 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 28 Aug 2018 21:21:05 +0200 Subject: [PATCH 5/5] Remove unused import --- tests/unit/test_crunch_cube.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index 3cd5788a9..146090ca5 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -1,6 +1,5 @@ '''Unit tests for the CrunchCube class.''' -import pytest from unittest import TestCase from mock import Mock from mock import patch