Skip to content

Commit

Permalink
[#177831399]: fix population counts
Browse files Browse the repository at this point in the history
* Drive by TDD, add tests for pop counts, with cat date
* Implemnet correct functionality in cube part, for pop counts
  • Loading branch information
slobodan-ilic committed Apr 20, 2021
1 parent 6b0f0e1 commit e0646b2
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,9 +775,14 @@ def pairwise_significance_tests(self):

@lazyproperty
def population_counts(self):
return (
self.table_proportions * self._population * self._cube.population_fraction
proportions = (
self.row_proportions
if self._dimensions[0].is_cat_date
else self.column_proportions
if self._dimensions[1].is_cat_date
else self.table_proportions
)
return proportions * self._population * self._cube.population_fraction

@lazyproperty
def population_counts_moe(self):
Expand Down
51 changes: 51 additions & 0 deletions tests/integration/test_cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,57 @@ def it_provides_share_of_sum_measure_for_mr_x_mr(self):
)
)

def it_uses_row_proportions_for_pop_counts_when_row_dim_is_cat_date(self):
slice_ = Cube(CR.CAT_DATE_X_CAT, population=100).partitions[0]
assert slice_.population_counts == pytest.approx(
np.array(
[
[72.69, 12.77, 12.37, 0.58, 0.58, 0.78, 0.19, 0.0],
[66.19, 13.64, 11.20, 2.64, 0.61, 2.44, 2.64, 0.61],
[74.44, 7.57, 7.25, 4.10, 0.63, 4.73, 1.26, 0.0],
[80.32, 8.19, 6.01, 1.63, 0.0, 1.63, 1.63, 0.54],
]
),
abs=1e-2,
)
# ---Since CAT DATE is on the row dimension, assert that row proportions are
# ---used for the multiplication of the population
assert (
slice_.population_counts.tolist() == (100 * slice_.row_proportions).tolist()
)
# ---Assert that each row of population counts sums to total population
assert pytest.approx(slice_.population_counts.sum(axis=1)) == [100] * 4

def it_uses_column_proportions_for_pop_counts_when_column_dim_is_cat_date(self):
slice_ = Cube(CR.CAT_HS_X_CAT_DATE, population=100).partitions[0]
assert slice_.population_counts == pytest.approx(
np.array(
[
[73.28, 68.83, 78.54, 81.96],
[72.69, 66.19, 74.44, 80.32],
[12.77, 13.64, 7.57, 8.19],
[12.37, 11.20, 7.25, 6.01],
[25.14, 24.84, 14.82, 14.20],
[0.58, 2.64, 4.10, 1.63],
[0.58, 0.61, 0.63, 0.0],
[0.78, 2.44, 4.73, 1.63],
[0.19, 2.64, 1.26, 1.63],
[0.0, 0.61, 0.0, 0.54],
[0.19, 3.25, 1.26, 2.18],
]
),
abs=1e-2,
)
# ---Since CAT DATE is on the column dimension, assert that column proportions
# ---are used for the multiplication of the population
assert (
slice_.population_counts.tolist()
== (100 * slice_.column_proportions).tolist()
)
# ---Assert that each column of population counts sums to total population
base_rows_idx = ~np.in1d(np.arange(11), slice_.inserted_row_idxs)
slice_.population_counts[base_rows_idx, :].sum(axis=0).tolist() == [100] * 4


class Describe_Strand(object):
"""Integration-test suite for `cr.cube.cubepart._Strand` object."""
Expand Down

0 comments on commit e0646b2

Please sign in to comment.