Skip to content

Commit

Permalink
Merge pull request #226 from Crunch-io/moe-for-1D-cubes
Browse files Browse the repository at this point in the history
MoE for 1D cubes
  • Loading branch information
slobodan-ilic committed Nov 4, 2020
2 parents a90651e + 68511a9 commit f02c180
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 8 deletions.
25 changes: 18 additions & 7 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
from cr.cube.stripe import TransformedStripe
from cr.cube.util import lazyproperty

# ---This is the quantile of the normal Cumulative Distribution Function (CDF) at
# ---probability 97.5% (p=.975), since the computed confidence interval
# ---is ±2.5% (.025) on each side of the CDF.
Z_975 = 1.959964


class CubePartition(object):
"""A slice, a strand, or a nub drawn from a cube-response.
Expand Down Expand Up @@ -239,11 +244,6 @@ class _Slice(CubePartition):
dimensions which can be crosstabbed in a slice.
"""

# ---This is the quantile of the normal Cumulative Distribution Function (CDF) at
# ---probability 97.5% (p=.975), since the computed confidence interval
# ---is ±2.5% (.025) on each side of the CDF.
Z_975 = 1.959964

def __init__(self, cube, slice_idx, transforms, population, mask_size):
super(_Slice, self).__init__(cube, transforms)
self._slice_idx = slice_idx
Expand Down Expand Up @@ -305,7 +305,7 @@ def columns_percentages_moe(self):
The values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective columns margin is 0.
"""
return self.Z_975 * 100 * self.columns_std_err
return Z_975 * 100 * self.columns_std_err

@lazyproperty
def columns_std_dev(self):
Expand Down Expand Up @@ -809,7 +809,7 @@ def table_percentages_moe(self):
The values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective table margin is 0.
"""
return self.Z_975 * 100 * self.table_std_err
return Z_975 * 100 * self.table_std_err

@lazyproperty
def table_proportions(self):
Expand Down Expand Up @@ -1125,6 +1125,17 @@ def standard_error(self):
return np.sqrt(self._variance / self.bases)
return np.sqrt(self._variance / np.sum(self.rows_margin))

@lazyproperty
def table_percentages_moe(self):
"""1D np.float64 ndarray of margin-of-error (MoE) for table percentages.
The values are represented as percentages, analogue to the `table_percentages`
property. This means that the value of 3.5% will have the value 3.5 (not 0.035).
The values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective columns margin is 0.
"""
return Z_975 * 100 * self.standard_error

@lazyproperty
def table_base(self):
"""1D, single-element ndarray (like [3770])."""
Expand Down
8 changes: 7 additions & 1 deletion tests/integration/test_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,12 +476,15 @@ def test_proportions_text(self):
(0.1666667, 0.1666667, 0.1666667, 0.1666667, 0.1666667, 0.1666667),
)

def test_std_dev_err_univariate_cat_axis_none(self):
def test_std_dev_err_moe_univariate_cat_axis_none(self):
strand = Cube(CR.UNIVARIATE_CATEGORICAL).partitions[0]
np.testing.assert_almost_equal(
strand.standard_deviation, [0.47140452, 0.47140452]
)
np.testing.assert_almost_equal(strand.standard_error, [0.1217161, 0.1217161])
np.testing.assert_almost_equal(
strand.table_percentages_moe, [23.8559221, 23.8559221]
)

def test_std_dev_err_numeric(self):
strand = Cube(CR.VOTER_REGISTRATION).partitions[0]
Expand All @@ -491,6 +494,9 @@ def test_std_dev_err_numeric(self):
np.testing.assert_almost_equal(
strand.standard_error, [0.0100884, 0.0096941, 0.0031464]
)
np.testing.assert_almost_equal(
strand.table_percentages_moe, [1.9772822, 1.9000029, 0.6166883]
)

def test_std_dev_err_datetime(self):
strand = Cube(CR.SIMPLE_DATETIME).partitions[0]
Expand Down

0 comments on commit f02c180

Please sign in to comment.