Skip to content

Commit

Permalink
residuals: added col std dev and std error and integration tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ernestoarbitrio committed May 7, 2020
1 parent 3e04803 commit 0ffbbee
Show file tree
Hide file tree
Showing 6 changed files with 522 additions and 30 deletions.
20 changes: 18 additions & 2 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,20 @@ def columns_dimension_type(self):
def columns_margin(self):
return np.array([column.margin for column in self._matrix.columns]).T

@lazyproperty
def columns_std_dev(self):
"""Returns the standard deviation for cell percentages
`std_deviation = sqrt(variance)`
"""
return np.sqrt(self._columns_variance)

@lazyproperty
def columns_std_err(self):
"""Returns the standard error for cell percentages
`std_error = sqrt(variance/N)`
"""
return np.sqrt(self._columns_variance / self.columns_margin)

@lazyproperty
def counts(self):
return np.array([row.values for row in self._matrix.rows])
Expand Down Expand Up @@ -591,11 +605,13 @@ def _transforms_dict(self):
return self._transforms_arg if self._transforms_arg is not None else {}

@lazyproperty
def _variance(self):
def _columns_variance(self):
"""Returns the variance for cell percentages
`variance = p * (1-p)`
"""
return self.counts / self.table_margin * (1 - self.counts / self.table_margin)
return (
self.counts / self.columns_margin * (1 - self.counts / self.columns_margin)
)


class _Strand(CubePartition):
Expand Down
6 changes: 6 additions & 0 deletions src/cr/cube/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,10 +1336,16 @@ def pvals(self):

@lazyproperty
def table_std_dev(self):
"""Returns the standard deviation for cell percentages
`std_deviation = sqrt(variance)`
"""
return np.sqrt(self._variance)

@lazyproperty
def table_std_err(self):
"""Returns the standard error for cell percentages
`std_error = sqrt(variance/N)`
"""
return np.sqrt(self._variance / self.table_margin)

@lazyproperty
Expand Down
52 changes: 38 additions & 14 deletions tests/integration/test_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,16 +814,26 @@ def test_calculate_various_measures_axis_0(self):
],
]
)
expected_standard_dev = [
expected_table_std_dev = [
[0.17860955, 0.28275439, 0.38106557, 0.32204575, 0.25876083, 0.1149889],
[0.19320144, 0.29207169, 0.39927, 0.30384472, 0.19320144, 0.15976996],
]
expected_standard_error = [
expected_table_std_err = [
[0.00564813, 0.00894148, 0.01205035, 0.01018398, 0.00818274, 0.00363627],
[0.00610957, 0.00923612, 0.01262603, 0.00960841, 0.00610957, 0.00505237],
]
np.testing.assert_almost_equal(slice_.table_std_dev, expected_standard_dev)
np.testing.assert_almost_equal(slice_.table_std_err, expected_standard_error)
expected_col_std_dev = [
[0.49834148, 0.4996758, 0.49908137, 0.49890016, 0.47692704, 0.47313192],
[0.49834148, 0.4996758, 0.49908137, 0.49890016, 0.47692704, 0.47313192],
]
expected_col_std_err = [
[0.05880176, 0.03705843, 0.02576154, 0.03360238, 0.04526793, 0.07517074],
[0.05880176, 0.03705843, 0.02576154, 0.03360238, 0.04526793, 0.07517074],
]
np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev)
np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err)
np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev)
np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err)
np.testing.assert_almost_equal(slice_.zscore, expected_zscore)

def test_pvals(self):
Expand Down Expand Up @@ -943,18 +953,28 @@ def test_various_measures_admit_by_dept_unweighted_rows(self):
],
]
)
expected_standard_error = [
expected_table_std_dev = [
[0.33934583, 0.27398329, 0.25706606, 0.2364359, 0.17726851, 0.10030056],
[0.26071661, 0.21271283, 0.33814647, 0.31969003, 0.29534847, 0.35469478],
]
expected_table_std_err = [
[0.00504412, 0.00407255, 0.00382109, 0.00351444, 0.00263496, 0.00149089],
[0.00387535, 0.00316181, 0.00502629, 0.00475195, 0.00439013, 0.00527227],
]
expected_standard_dev = [
[0.33934583, 0.27398329, 0.25706606, 0.2364359, 0.17726851, 0.10030056],
[0.26071661, 0.21271283, 0.33814647, 0.31969003, 0.29534847, 0.35469478],
expected_col_std_dev = [
[0.47876747, 0.48213008, 0.47720873, 0.47358921, 0.43399681, 0.24550986],
[0.47876747, 0.48213008, 0.47720873, 0.47358921, 0.43399681, 0.24550986],
]
expected_col_std_err = [
[0.01567414, 0.01993363, 0.01575024, 0.01682826, 0.01795892, 0.00918798],
[0.01567414, 0.01993363, 0.01575024, 0.01682826, 0.01795892, 0.00918798],
]

np.testing.assert_almost_equal(slice_.zscore, expected_zscore)
np.testing.assert_almost_equal(slice_.table_std_dev, expected_standard_dev)
np.testing.assert_almost_equal(slice_.table_std_err, expected_standard_error)
np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev)
np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err)
np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev)
np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err)

def test_various_measures_admit_by_gender_weighted_rows(self):
""" see
Expand All @@ -968,12 +988,16 @@ def test_various_measures_admit_by_gender_weighted_rows(self):
[-9.425619845206922, 9.42561984520692],
]
)
expected_standard_dev = [[0.44013199, 0.32828883], [0.47059018, 0.45061221]]
expected_standard_error = [[0.00659641, 0.00492018], [0.0070529, 0.00675348]]
expected_table_std_dev = [[0.44013199, 0.32828883], [0.47059018, 0.45061221]]
expected_table_std_err = [[0.00659641, 0.00492018], [0.0070529, 0.00675348]]
expected_col_std_dev = [[0.49668253, 0.45933735], [0.49668253, 0.45933735]]
expected_col_std_err = [[0.00966009, 0.01080163], [0.00966009, 0.01080163]]

np.testing.assert_almost_equal(slice_.zscore, expected_zscore)
np.testing.assert_almost_equal(slice_.table_std_dev, expected_standard_dev)
np.testing.assert_almost_equal(slice_.table_std_err, expected_standard_error)
np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev)
np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err)
np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev)
np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err)

def test_selected_crosstab_as_array(self):
slice_ = Cube(CR.SELECTED_CROSSTAB_4).partitions[0]
Expand Down
176 changes: 166 additions & 10 deletions tests/integration/test_cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ def it_calculates_various_measures(self):
],
)

# Test standard deviation
# Test table standard deviation
np.testing.assert_almost_equal(
slice_.table_std_dev,
[
Expand All @@ -454,7 +454,7 @@ def it_calculates_various_measures(self):
],
)

# Test standard error
# Test table standard error
np.testing.assert_almost_equal(
slice_.table_std_err,
[
Expand All @@ -468,6 +468,34 @@ def it_calculates_various_measures(self):
],
)

# Test cols standard dev
np.testing.assert_almost_equal(
slice_.columns_std_dev,
[
[0.41561694, 0.45910103, 0.48762374, 0.49916867, np.nan, 0.4689693],
[0.39644438, 0.44601408, 0.48005275, 0.49353964, np.nan, 0.4689693],
[0.16604076, 0.12087539, 0.0, 0.22060003, np.nan, 0.0],
[0.27659294, 0.30162497, 0.32573599, 0.31156024, np.nan, 0.4689693],
[0.27659294, 0.36469915, 0.42678893, 0.4384431, np.nan, 0.0],
[0.0, 0.0, 0.0, 0.0, np.nan, 0.0],
[0.16126906, 0.1647831, 0.16853704, 0.22060003, np.nan, 0.0],
],
)

# Test cols standard err
np.testing.assert_almost_equal(
slice_.columns_std_err,
[
[0.06895161, 0.05506512, 0.08465401, 0.11473767, np.nan, 0.27200111],
[0.06577085, 0.05349546, 0.08333965, 0.1134438, np.nan, 0.27200111],
[0.02754647, 0.01449794, 0.0, 0.05070657, np.nan, 0.0],
[0.04588727, 0.03617726, 0.05654946, 0.07161446, np.nan, 0.27200111],
[0.04588727, 0.04374245, 0.07409277, 0.10077944, np.nan, 0.0],
[0.0, 0.0, 0.0, 0.0, np.nan, 0.0],
[0.02675483, 0.01976428, 0.0292589, 0.05070657, np.nan, 0.0],
],
)

def it_provides_base_counts(self):
slice_ = _Slice(Cube(CR.CAT_X_CAT_PRUNING_HS), 0, None, None, 0)
np.testing.assert_array_equal(
Expand Down Expand Up @@ -788,7 +816,7 @@ def it_calculates_mr_x_cat_various_measures(self):
np.nan,
],
]
expected_standard_dev = [
expected_table_std_dev = [
[
0.26982777,
0.18268971,
Expand Down Expand Up @@ -840,7 +868,7 @@ def it_calculates_mr_x_cat_various_measures(self):
0.49151833,
],
]
expected_standard_error = [
expected_table_std_err = [
[
0.02031794,
0.01375648,
Expand Down Expand Up @@ -892,8 +920,114 @@ def it_calculates_mr_x_cat_various_measures(self):
0.0226256,
],
]
np.testing.assert_almost_equal(slice_.table_std_dev, expected_standard_dev)
np.testing.assert_almost_equal(slice_.table_std_err, expected_standard_error)
expected_col_std_dev = [
[
0.48002447,
0.38894286,
0.4819874,
np.nan,
0.25671222,
0.32250231,
np.nan,
0.29348932,
],
[
0.4949243,
0.46165233,
0.48568705,
np.nan,
0.46761583,
0.46335822,
np.nan,
0.4655109,
],
[
0.42148452,
0.49947006,
0.49363665,
np.nan,
0.49695538,
0.4998946,
np.nan,
0.49900958,
],
[
0.46916094,
0.46423936,
0.46550355,
np.nan,
0.40251749,
0.35069396,
np.nan,
0.37577601,
],
[
0.41284167,
0.37480303,
0.38563611,
np.nan,
0.41249133,
0.4002662,
np.nan,
0.40614819,
],
]
expected_col_std_err = [
[
0.1028366,
0.06789606,
0.06522613,
np.nan,
0.03345903,
0.04066543,
np.nan,
0.02659733,
],
[
0.12475421,
0.07228711,
0.06460091,
np.nan,
0.0532943,
0.05249552,
np.nan,
0.03740326,
],
[
0.12056446,
0.07802173,
0.06767673,
np.nan,
0.05182406,
0.04935598,
np.nan,
0.03577725,
],
[
0.10249407,
0.05842565,
0.05076373,
np.nan,
0.03127232,
0.02435786,
np.nan,
0.01945794,
],
[
0.07421655,
0.03989992,
0.03532412,
np.nan,
0.03203276,
0.02927602,
np.nan,
0.02162477,
],
]
np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev)
np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err)
np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev)
np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err)
np.testing.assert_almost_equal(slice_.zscore, expected_zscore)
np.testing.assert_almost_equal(slice_.pvals, expected_pvals)

Expand Down Expand Up @@ -943,7 +1077,7 @@ def it_calculates_cat_x_mr_various_measures(self):
[np.nan, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, np.nan],
]
expected_standard_dev = [
expected_table_std_dev = [
[0.26982777, 0.20175242, 0.10614473, 0.17290444, 0.22056401],
[0.18268971, 0.2363915, 0.26958793, 0.29282782, 0.36225248],
[0.31735855, 0.30254544, 0.28661105, 0.33136132, 0.40489719],
Expand All @@ -953,7 +1087,7 @@ def it_calculates_cat_x_mr_various_measures(self):
[0.0, 0.0, 0.0, 0.0, 0.0],
[0.24779961, 0.42250711, 0.49311729, 0.46756128, 0.49151833],
]
expected_standard_error = [
expected_table_std_err = [
[0.02031794, 0.01387539, 0.00674372, 0.00808768, 0.01015302],
[0.01375648, 0.01625767, 0.01712781, 0.01369714, 0.01667523],
[0.023897, 0.02080736, 0.01820934, 0.01549956, 0.01863825],
Expand All @@ -963,9 +1097,31 @@ def it_calculates_cat_x_mr_various_measures(self):
[0.0, 0.0, 0.0, 0.0, 0.0],
[0.01865923, 0.02905764, 0.03132936, 0.02187038, 0.0226256],
]
expected_col_std_dev = [
[0.4964821, 0.33305134, 0.14814455, 0.19222783, 0.24516228],
[0.39446083, 0.38216178, 0.36232051, 0.32260503, 0.39589898],
[0.48183561, 0.46026052, 0.38241808, 0.36325841, 0.43799672],
[0.0, 0.0, 0.0, 0.0, 0.0],
[0.3384968, 0.47745422, 0.49106178, 0.47991786, 0.4751812],
[0.42365382, 0.47497424, 0.49293283, 0.49973929, 0.48921994],
[0.0, 0.0, 0.0, 0.0, 0.0],
[0.48183561, 0.46026052, 0.38241808, 0.36325841, 0.43799672],
]
expected_col_std_err = [
[0.08827619, 0.03960109, 0.0132104, 0.01003574, 0.01263043],
[0.07013646, 0.04544051, 0.03230898, 0.01684241, 0.02039618],
[0.08567199, 0.05472675, 0.03410112, 0.01896482, 0.02256499],
[0.0, 0.0, 0.0, 0.0, 0.0],
[0.06018587, 0.05677115, 0.04378914, 0.02505532, 0.02448069],
[0.07532707, 0.05647627, 0.04395598, 0.02609015, 0.02520394],
[0.0, 0.0, 0.0, 0.0, 0.0],
[0.08567199, 0.05472675, 0.03410112, 0.01896482, 0.02256499],
]

np.testing.assert_almost_equal(slice_.table_std_dev, expected_standard_dev)
np.testing.assert_almost_equal(slice_.table_std_err, expected_standard_error)
np.testing.assert_almost_equal(slice_.table_std_dev, expected_table_std_dev)
np.testing.assert_almost_equal(slice_.table_std_err, expected_table_std_err)
np.testing.assert_almost_equal(slice_.columns_std_dev, expected_col_std_dev)
np.testing.assert_almost_equal(slice_.columns_std_err, expected_col_std_err)
np.testing.assert_almost_equal(slice_.zscore, expected_zscore)
np.testing.assert_almost_equal(slice_.pvals, expected_pvals)

Expand Down
Loading

0 comments on commit 0ffbbee

Please sign in to comment.