From 41684259accabb625077e291f41900f0f1a9c137 Mon Sep 17 00:00:00 2001
From: Slobodan Ilic <slobodan@crunch.io>
Date: Tue, 28 Aug 2018 13:54:19 +0200
Subject: [PATCH 1/5] Add unit tests for unfiltered/filtered counts

---
 tests/unit/test_data_table.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 tests/unit/test_data_table.py

diff --git a/tests/unit/test_data_table.py b/tests/unit/test_data_table.py
new file mode 100644
index 000000000..58bccf316
--- /dev/null
+++ b/tests/unit/test_data_table.py
@@ -0,0 +1,20 @@
+# encoding: utf-8
+
+from mock import Mock
+
+from cr.cube.mixins.data_table import DataTable
+
+
+def test_cube_counts():
+    dt = DataTable({})
+    assert dt.counts == (None, None)
+
+    fake_count = Mock()
+    dt = DataTable({'unfiltered': fake_count})
+    assert dt.counts == (fake_count, None)
+
+    dt = DataTable({'filtered': fake_count})
+    assert dt.counts == (None, fake_count)
+
+    dt = DataTable({'unfiltered': fake_count, 'filtered': fake_count})
+    assert dt.counts == (fake_count, fake_count)

From 872ebc0843f5b13fc8d6926bc0e159b0e897d57f Mon Sep 17 00:00:00 2001
From: Slobodan Ilic <slobodan@crunch.io>
Date: Tue, 28 Aug 2018 15:19:30 +0200
Subject: [PATCH 2/5] Add unit tests for population fraction

---
 tests/unit/test_crunch_cube.py | 39 +++++++++++++++++++++++++++++++++-
 tests/unit/test_data_table.py  |  8 +++----
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py
index 0375409d8..3cd5788a9 100644
--- a/tests/unit/test_crunch_cube.py
+++ b/tests/unit/test_crunch_cube.py
@@ -1,4 +1,6 @@
 '''Unit tests for the CrunchCube class.'''
+
+import pytest
 from unittest import TestCase
 from mock import Mock
 from mock import patch
@@ -930,4 +932,39 @@ def test_ca_dim_ind_is_none(self):
         cc = CrunchCube({})
         actual = cc.ca_dim_ind
         expected = None
-        assert actual == expected
\ No newline at end of file
+        assert actual == expected
+
+    def test_population_fraction(self):
+
+        # Assert fraction is 1 when none of the counts are specified
+        cc = CrunchCube({})
+        actual = cc.population_fraction
+        assert actual == 1
+
+        # Assert fraction is 1 when only some counts are specified
+        cc = CrunchCube({'result': {'unfiltered': {'unweighted_n': 10}}})
+        assert cc.population_fraction == 1
+        cc = CrunchCube({'result': {'unfiltered': {'weighted_n': 10}}})
+        assert cc.population_fraction == 1
+        cc = CrunchCube({'result': {'unfiltered': {'weighted_n': 10, 'unweighted_n': 10}}})
+        assert cc.population_fraction == 1
+        cc = CrunchCube({'result': {'filtered': {'weighted_n': 10, 'unweighted_n': 10}}})
+        assert cc.population_fraction == 1
+
+        # Assert fraction is calculated when correct counts are specified
+        cc = CrunchCube({
+            'result': {
+                'filtered': {'weighted_n': 5},
+                'unfiltered': {'weighted_n': 10},
+            }
+        })
+        assert cc.population_fraction == 0.5
+
+        # Assert fraction is NaN, when denominator is zero
+        cc = CrunchCube({
+            'result': {
+                'filtered': {'weighted_n': 5},
+                'unfiltered': {'weighted_n': 0},
+            }
+        })
+        assert np.isnan(cc.population_fraction)
diff --git a/tests/unit/test_data_table.py b/tests/unit/test_data_table.py
index 58bccf316..884a63cdb 100644
--- a/tests/unit/test_data_table.py
+++ b/tests/unit/test_data_table.py
@@ -6,15 +6,15 @@
 
 
 def test_cube_counts():
-    dt = DataTable({})
+    dt = DataTable({'result': {}})
     assert dt.counts == (None, None)
 
     fake_count = Mock()
-    dt = DataTable({'unfiltered': fake_count})
+    dt = DataTable({'result': {'unfiltered': fake_count}})
     assert dt.counts == (fake_count, None)
 
-    dt = DataTable({'filtered': fake_count})
+    dt = DataTable({'result': {'filtered': fake_count}})
     assert dt.counts == (None, fake_count)
 
-    dt = DataTable({'unfiltered': fake_count, 'filtered': fake_count})
+    dt = DataTable({'result': {'unfiltered': fake_count, 'filtered': fake_count}})
     assert dt.counts == (fake_count, fake_count)

From ecaef1fa5131a3adf4f7612f4245e53a3fd84a79 Mon Sep 17 00:00:00 2001
From: Slobodan Ilic <slobodan@crunch.io>
Date: Tue, 28 Aug 2018 15:24:42 +0200
Subject: [PATCH 3/5] Add integration tests for filtered pop counts

---
 tests/integration/fixtures/__init__.py        |   1 +
 .../cubes/cat-x-cat-filtered-population.json  | 371 ++++++++++++++++++
 tests/integration/test_crunch_cube.py         |  12 +
 3 files changed, 384 insertions(+)
 create mode 100644 tests/integration/fixtures/cubes/cat-x-cat-filtered-population.json

diff --git a/tests/integration/fixtures/__init__.py b/tests/integration/fixtures/__init__.py
index aab53a16b..4b713cbb5 100644
--- a/tests/integration/fixtures/__init__.py
+++ b/tests/integration/fixtures/__init__.py
@@ -142,3 +142,4 @@ def _load(cube_file):
 CAT_X_CAT_PRUNING_HS = _load('cat-x-cat-pruning-hs.json')
 CA_ITEMS_X_CA_CAT_X_CAT = _load('ca-items-x-ca-cat-x-cat.json')
 CAT_X_MR_X_CAT = _load('cat-x-mr-x-cat.json')
+CAT_X_CAT_FILTERED_POP = _load('cat-x-cat-filtered-population.json')
diff --git a/tests/integration/fixtures/cubes/cat-x-cat-filtered-population.json b/tests/integration/fixtures/cubes/cat-x-cat-filtered-population.json
new file mode 100644
index 000000000..5a67ae236
--- /dev/null
+++ b/tests/integration/fixtures/cubes/cat-x-cat-filtered-population.json
@@ -0,0 +1,371 @@
+{
+  "query": {
+    "measures": {
+      "count": {
+        "function": "cube_count",
+        "args": []
+      }
+    },
+    "dimensions": [
+      {
+        "variable": "https://alpha.crunch.io/api/datasets/41fb7b1179bb4c948a63afb1de66303c/variables/000000/"
+      },
+      {
+        "variable": "https://alpha.crunch.io/api/datasets/41fb7b1179bb4c948a63afb1de66303c/variables/000001/"
+      }
+    ],
+    "weight": null
+  },
+  "query_environment": {
+    "filter": [
+      "https://alpha.crunch.io/api/datasets/41fb7b1179bb4c948a63afb1de66303c/filters/a1c21b17d9fc4664ab87bb7ace4dc139/"
+    ]
+  },
+  "result": {
+    "dimensions": [
+      {
+        "derived": false,
+        "references": {
+          "alias": "ShutdownBlame",
+          "description": "If President Obama and the Republicans in Congress do not reach a budget agreement in time to avoid a shutdown of the federal government, who do you think will more to blame--President Obama or the Republican Congress?",
+          "name": "ShutdownBlame",
+          "view": {
+            "show_counts": false,
+            "column_width": null,
+            "transform": {
+              "insertions": [
+                {
+                  "function": "subtotal",
+                  "args": [
+                    3,
+                    4
+                  ],
+                  "name": "HS Both + Neither",
+                  "anchor": 3
+                }
+              ]
+            },
+            "include_missing": false,
+            "show_numeric_values": false
+          }
+        },
+        "type": {
+          "ordinal": false,
+          "class": "categorical",
+          "categories": [
+            {
+              "numeric_value": 1,
+              "missing": false,
+              "id": 1,
+              "name": "President Obama"
+            },
+            {
+              "numeric_value": 2,
+              "missing": false,
+              "id": 2,
+              "name": "Republicans in Congress"
+            },
+            {
+              "numeric_value": 3,
+              "missing": false,
+              "id": 3,
+              "name": "Both"
+            },
+            {
+              "numeric_value": 4,
+              "missing": false,
+              "id": 4,
+              "name": "Neither"
+            },
+            {
+              "numeric_value": 5,
+              "missing": false,
+              "id": 5,
+              "name": "Not sure"
+            },
+            {
+              "numeric_value": 8,
+              "missing": true,
+              "id": 8,
+              "name": "Skipped"
+            },
+            {
+              "numeric_value": 9,
+              "missing": true,
+              "id": 9,
+              "name": "Not Asked"
+            },
+            {
+              "numeric_value": null,
+              "missing": true,
+              "id": -1,
+              "name": "No Data"
+            }
+          ]
+        }
+      },
+      {
+        "derived": false,
+        "references": {
+          "alias": "RespondentIdeology",
+          "view": {
+            "show_counts": false,
+            "show_numeric_values": false,
+            "transform": {
+              "insertions": [
+                {
+                  "function": "subtotal",
+                  "args": [
+                    4
+                  ],
+                  "name": "HS Conservative",
+                  "anchor": 3
+                }
+              ]
+            },
+            "include_missing": false,
+            "column_width": null
+          },
+          "description": "In general, how would you describe your own political viewpoint?",
+          "name": "RespondentIdeology"
+        },
+        "type": {
+          "ordinal": false,
+          "class": "categorical",
+          "categories": [
+            {
+              "numeric_value": 1,
+              "missing": false,
+              "id": 1,
+              "name": "Very liberal"
+            },
+            {
+              "numeric_value": 2,
+              "missing": false,
+              "id": 2,
+              "name": "Liberal"
+            },
+            {
+              "numeric_value": 3,
+              "missing": false,
+              "id": 3,
+              "name": "Moderate"
+            },
+            {
+              "numeric_value": 4,
+              "missing": false,
+              "id": 4,
+              "name": "Conservative"
+            },
+            {
+              "numeric_value": 5,
+              "missing": false,
+              "id": 5,
+              "name": "Very Conservative"
+            },
+            {
+              "numeric_value": 6,
+              "missing": false,
+              "id": 6,
+              "name": "Not sure"
+            },
+            {
+              "numeric_value": 8,
+              "missing": true,
+              "id": 8,
+              "name": "Skipped"
+            },
+            {
+              "numeric_value": 9,
+              "missing": true,
+              "id": 9,
+              "name": "Not Asked"
+            },
+            {
+              "numeric_value": null,
+              "missing": true,
+              "id": -1,
+              "name": "No Data"
+            }
+          ]
+        }
+      }
+    ],
+    "missing": 0,
+    "measures": {
+      "count": {
+        "data": [
+          3,
+          14,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          59,
+          132,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          6,
+          29,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          1,
+          1,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          3,
+          6,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        "n_missing": 0,
+        "metadata": {
+          "references": {},
+          "derived": true,
+          "type": {
+            "integer": true,
+            "missing_rules": {},
+            "missing_reasons": {
+              "No Data": -1
+            },
+            "class": "numeric"
+          }
+        }
+      }
+    },
+    "n": 254,
+    "unfiltered": {
+      "unweighted_n": 1000,
+      "weighted_n": 1000
+    },
+    "filtered": {
+      "unweighted_n": 254,
+      "weighted_n": 254
+    },
+    "counts": [
+      3,
+      14,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      59,
+      132,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      6,
+      29,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      1,
+      1,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      3,
+      6,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0
+    ],
+    "element": "crunch:cube"
+  }
+}
\ No newline at end of file
diff --git a/tests/integration/test_crunch_cube.py b/tests/integration/test_crunch_cube.py
index 86222577e..320746d1e 100644
--- a/tests/integration/test_crunch_cube.py
+++ b/tests/integration/test_crunch_cube.py
@@ -55,6 +55,7 @@
 from .fixtures import HUFFPOST_ACTIONS_X_HOUSEHOLD
 from .fixtures import GENDER_X_WEIGHT
 from .fixtures import CAT_X_MR_X_CAT
+from .fixtures import CAT_X_CAT_FILTERED_POP
 
 from . import assert_scale_means_equal
 
@@ -394,6 +395,17 @@ def test_population_counts_cat_x_cat(self):
         actual = cube.population_counts(9001)
         np.testing.assert_almost_equal(actual, expected)
 
+    def test_filtered_population_counts(self):
+        cube = CrunchCube(CAT_X_CAT_FILTERED_POP)
+        expected = np.array([
+            [ 300000.,  1400000., 0., 0., 0., 0.],
+            [5900000., 13200000., 0., 0., 0., 0.],
+            [ 600000.,  2900000., 0., 0., 0., 0.],
+            [ 100000.,   100000., 0., 0., 0., 0.],
+            [ 300000.,   600000., 0., 0., 0., 0.]])
+        actual = cube.population_counts(100000000)
+        np.testing.assert_almost_equal(actual, expected)
+
     def test_labels_cat_x_cat_exclude_missing(self):
         cube = CrunchCube(CAT_X_CAT)
         expected = [

From 86bdf262e3b510f0a62c2353bd5ae2937bbcd368 Mon Sep 17 00:00:00 2001
From: Slobodan Ilic <slobodan@crunch.io>
Date: Tue, 28 Aug 2018 15:25:19 +0200
Subject: [PATCH 4/5] Implement filtered pop counts fraction

---
 src/cr/cube/crunch_cube.py       | 14 +++++++++++++-
 src/cr/cube/mixins/data_table.py |  6 ++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py
index da85ede25..4e5aeb634 100644
--- a/src/cr/cube/crunch_cube.py
+++ b/src/cr/cube/crunch_cube.py
@@ -1016,6 +1016,18 @@ def percentages(self, axis=None):
         '''
         return self.proportions(axis) * 100
 
+    @lazyproperty
+    def population_fraction(self):
+        try:
+            unfiltered, filtered = self.counts
+            num = filtered.get('weighted_n')
+            den = unfiltered.get('weighted_n')
+            return num / den
+        except ZeroDivisionError:
+            return np.nan
+        except:
+            return 1
+
     def population_counts(self, population_size, weighted=True,
                           include_missing=False,
                           include_transforms_for_dims=None, prune=False):
@@ -1048,7 +1060,7 @@ def population_counts(self, population_size, weighted=True,
             include_missing=include_missing,
             include_transforms_for_dims=include_transforms_for_dims,
             prune=prune
-        ) * population_size
+        ) * population_size * self.population_fraction
 
     def index(self, weighted=True, prune=False):
         '''Get cube index measurement.'''
diff --git a/src/cr/cube/mixins/data_table.py b/src/cr/cube/mixins/data_table.py
index b6b106c31..74a2ac089 100644
--- a/src/cr/cube/mixins/data_table.py
+++ b/src/cr/cube/mixins/data_table.py
@@ -132,6 +132,12 @@ def flat_values(self, weighted, margin=False):
     def _shape(self):
         return tuple([dim.shape for dim in self.all_dimensions])
 
+    @lazyproperty
+    def counts(self):
+        unfiltered = self._cube['result'].get('unfiltered')
+        filtered = self._cube['result'].get('filtered')
+        return unfiltered, filtered
+
     def data(self, weighted, margin=False):
         '''Get the data in non-flattened shape.
 

From 600f70586e09fdcb9a917a7336638f43f17b4939 Mon Sep 17 00:00:00 2001
From: Slobodan Ilic <slobodan@crunch.io>
Date: Tue, 28 Aug 2018 21:21:05 +0200
Subject: [PATCH 5/5] Remove unused import

---
 tests/unit/test_crunch_cube.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py
index 3cd5788a9..146090ca5 100644
--- a/tests/unit/test_crunch_cube.py
+++ b/tests/unit/test_crunch_cube.py
@@ -1,6 +1,5 @@
 '''Unit tests for the CrunchCube class.'''
 
-import pytest
 from unittest import TestCase
 from mock import Mock
 from mock import patch