Skip to content

Commit

Permalink
Merge b19c139 into 9745bb1
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Nov 13, 2020
2 parents 9745bb1 + b19c139 commit 77deb7e
Show file tree
Hide file tree
Showing 22 changed files with 557 additions and 386 deletions.
111 changes: 85 additions & 26 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,31 +298,31 @@ def columns_margin(self):
return np.array([column.margin for column in self._matrix.columns]).T

@lazyproperty
def columns_percentages_moe(self):
"""1D/2D np.float64 ndarray of margin-of-error (MoE) for columns percentages.
def column_proportions_moe(self):
"""1D/2D np.float64 ndarray of margin-of-error (MoE) for columns proportions.
The values are represented as percentages, analogue to the `table_percentages`
property. This means that the value of 3.5% will have the value 3.5 (not 0.035).
The values are represented as fractions, analogue to the `column_proportions`
property. This means that the value of 3.5% will have the value 0.035.
The values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective columns margin is 0.
"""
return Z_975 * 100 * self.columns_std_err
return Z_975 * self.column_std_err

@lazyproperty
def columns_std_dev(self):
"""standard deviation for column percentages
def column_std_err(self):
"""standard error for column percentages
`std_deviation = sqrt(variance)`
`std_error = sqrt(variance/N)`
"""
return np.sqrt(self._columns_variance)
return np.sqrt(self._column_variance / self.columns_margin)

@lazyproperty
def columns_std_err(self):
"""standard error for column percentages
def column_std_dev(self):
"""standard deviation for column percentages
`std_error = sqrt(variance/N)`
`std_deviation = sqrt(variance)`
"""
return np.sqrt(self._columns_variance / self.columns_margin)
return np.sqrt(self._column_variance)

@lazyproperty
def counts(self):
Expand Down Expand Up @@ -459,6 +459,19 @@ def population_counts(self):
self.table_proportions * self._population * self._cube.population_fraction
)

@lazyproperty
def population_moe(self):
"""2D np.float64 ndarray of population margin-of-error (MoE) for table percents.
The values are represented as population estimates, analogue to the
`population_counts` property. This means that the values will be presented by
actual estimated counts of the population. The values can be np.nan when the
corresponding percentage is also np.nan, which happens when the respective
table margin is 0.
"""
total_filtered_population = self._population * self._cube.population_fraction
return Z_975 * total_filtered_population * self.table_std_err

@lazyproperty
def pvals(self):
return np.array([row.pvals for row in self._matrix.rows])
Expand Down Expand Up @@ -526,6 +539,29 @@ def rows_dimension_type(self):
def rows_margin(self):
return np.array([row.margin for row in self._matrix.rows])

@lazyproperty
def row_proportions_moe(self):
"""2D np.float64 ndarray of margin-of-error (MoE) for rows proportions.
The values are represented as percentage-fractions, analogue to the
`row_proportions` property. This means that the value of 3.5% will have the
value 0.035. The values can be np.nan when the corresponding percentage is also
np.nan, which happens when the respective table margin is 0.
"""
return Z_975 * self.row_std_err

@lazyproperty
def row_std_dev(self):
"""2D np.float64 ndarray of standard deviation for row percentages."""
return np.sqrt(self._row_variance)

@lazyproperty
def row_std_err(self):
"""2D np.float64 ndarray of standard errors for row percentages."""
# --- We need to add `np.newaxis` to cast the rows margin vector to an actual
# --- column, in NumPy terms, to be able to devide correctly.
return np.sqrt(self._row_variance / self.rows_margin[:, np.newaxis])

@lazyproperty
def scale_mean_pairwise_indices(self):
"""Sequence of column-idx tuples indicating pairwise-t result of scale-means.
Expand Down Expand Up @@ -802,15 +838,15 @@ def table_percentages(self):
return self.table_proportions * 100

@lazyproperty
def table_percentages_moe(self):
"""1D/2D np.float64 ndarray of margin-of-error (MoE) for table percentages.
def table_proportions_moe(self):
"""1D/2D np.float64 ndarray of margin-of-error (MoE) for table proportions.
The values are represented as percentages, analogue to the `table_percentages`
property. This means that the value of 3.5% will have the value 3.5 (not 0.035).
The values can be np.nan when the corresponding percentage is also np.nan, which
The values are represented as fractions, analogue to the `table_proportions`
property. This means that the value of 3.5% will have the value 0.035. The
values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective table margin is 0.
"""
return Z_975 * 100 * self.table_std_err
return Z_975 * self.table_std_err

@lazyproperty
def table_proportions(self):
Expand Down Expand Up @@ -891,7 +927,7 @@ def _columns_dimension_numeric_values(self):
return np.array([column.numeric_value for column in self._matrix.columns])

@lazyproperty
def _columns_variance(self):
def _column_variance(self):
"""variance for column percentages
`variance = p * (1-p)`
Expand All @@ -900,6 +936,16 @@ def _columns_variance(self):
self.counts / self.columns_margin * (1 - self.counts / self.columns_margin)
)

@lazyproperty
def _row_variance(self):
"""ndarray of variances for row percentages"""
# --- Rows margin is a vector, that's supposed to represent a column (to the
# --- right of the crosstab). We need to devide all values in the crosstab by it
# --- and therefore need to cast it to an actual column (because of how NumPy
# --- does broadcasting).
margin = self.rows_margin[:, np.newaxis]
return self.counts / margin * (1 - self.counts / margin)

@lazyproperty
def _dimensions(self):
"""tuple of (rows_dimension, columns_dimension) Dimension objects."""
Expand Down Expand Up @@ -1006,6 +1052,19 @@ def population_counts(self):
* self._cube.population_fraction
)

@lazyproperty
def population_moe(self):
"""1D np.float64 ndarray of population margin-of-error (MoE) for table percents.
The values are represented as population estimates, analogue to the
`population_counts` property. This means that the values will be presented by
actual estimated counts of the population The values can be np.nan when the
corresponding percentage is also np.nan, which happens when the respective
table margin is 0.
"""
total_filtered_population = self._population * self._cube.population_fraction
return Z_975 * total_filtered_population * self.standard_error

@lazyproperty
def row_base(self):
return np.array([row.base for row in self._stripe.rows])
Expand Down Expand Up @@ -1127,15 +1186,15 @@ def standard_error(self):
return np.sqrt(self._variance / np.sum(self.rows_margin))

@lazyproperty
def table_percentages_moe(self):
"""1D np.float64 ndarray of margin-of-error (MoE) for table percentages.
def table_proportions_moe(self):
"""1D np.float64 ndarray of margin-of-error (MoE) for table proportions.
The values are represented as percentages, analogue to the `table_percentages`
property. This means that the value of 3.5% will have the value 3.5 (not 0.035).
The values can be np.nan when the corresponding percentage is also np.nan, which
The values are represented as fractions, analogue to the `table_proportions`
property. This means that the value of 3.5% will have the value 0.035. The
values can be np.nan when the corresponding proportion is also np.nan, which
happens when the respective columns margin is 0.
"""
return Z_975 * 100 * self.standard_error
return Z_975 * self.standard_error

@lazyproperty
def table_base(self):
Expand Down
18 changes: 18 additions & 0 deletions tests/expectations/admit-x-dept-unweighted-col-prop-moe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
[
0.0307207565,
0.0390691882,
0.030869894,
0.0329827837,
0.0351988285,
0.0180081013,
],
[
0.0307207565,
0.0390691882,
0.030869894,
0.0329827837,
0.0351988285,
0.0180081013,
],
]
12 changes: 6 additions & 6 deletions tests/expectations/cat-hs-x-mr-col-moe.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
[
[17.30181459, 7.76167031, 2.58919072, 1.96696835, 2.47551804],
[13.7464937, 8.90617586, 6.33244289, 3.30105103, 3.99757694],
[16.79140176, 10.72624566, 6.68369737, 3.71703613, 4.42265749],
[0.17301815, 0.0776167, 0.02589191, 0.01966968, 0.02475518],
[0.13746494, 0.08906176, 0.06332443, 0.03301051, 0.03997577],
[0.16791402, 0.10726246, 0.06683697, 0.03717036, 0.04422657],
[0.0, 0.0, 0.0, 0.0, 0.0],
[11.79621344, 11.12694011, 8.58251349, 4.91075221, 4.79812657],
[14.76383504, 11.06914477, 8.61521467, 5.11357472, 4.93988229],
[0.11796213, 0.1112694, 0.08582513, 0.04910752, 0.04798127],
[0.14763835, 0.11069145, 0.08615215, 0.05113575, 0.04939882],
[0.0, 0.0, 0.0, 0.0, 0.0],
[16.79140176, 10.72624566, 6.68369737, 3.71703613, 4.42265749],
[0.16791402, 0.10726246, 0.06683697, 0.03717036, 0.04422657],
]
30 changes: 30 additions & 0 deletions tests/expectations/cat-x-cat-pruning-hs-col-prop-moe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[
[
0.1351426726,
0.1079256616,
0.1659188199,
0.2248817088,
float("NaN"),
0.5331123764,
],
[
0.1289084933,
0.104849174,
0.163342716,
0.2223457567,
float("NaN"),
0.5331123764,
],
[0.0539900809, 0.0284154374, 0.0, 0.0993830606, float("NaN"), 0.0],
[
0.0899374047,
0.0709061236,
0.1108349041,
0.1403617724,
float("NaN"),
0.5331123764,
],
[0.0899374047, 0.0857336265, 0.1452191695, 0.1975240786, float("NaN"), 0.0],
[0.0, 0.0, 0.0, 0.0, float("NaN"), 0.0],
[0.0524385075, 0.0387372796, 0.0573464003, 0.0993830606, float("NaN"), 0.0],
]
30 changes: 30 additions & 0 deletions tests/expectations/cat-x-cat-pruning-hs-table-prop-moe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[
[
0.0947425342,
0.1023031735,
0.0851416179,
0.064011466,
0.0,
0.0210138857,
],
[
0.0548548061,
0.083257033,
0.0691062699,
0.0577563348,
0.0,
0.030001713,
],
[0.0216468583, 0.0216468583, 0.0, 0.0210138857, 0.0, 0.0],
[
0.0367141987,
0.054641654,
0.0419511734,
0.0304381859,
0.0,
0.030001713,
],
[0.0367141987, 0.0666431728, 0.0577563348, 0.046236001, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0210138857, 0.0295581821, 0.0210138857, 0.0210138857, 0.0, 0.0],
]
44 changes: 6 additions & 38 deletions tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,8 @@
[
[
13.03595844,
7.67698551,
3.46251469,
4.55693081,
4.13969905,
3.06644326,
7.58177966,
],
[
9.31746956,
8.36644659,
3.78951977,
5.23042895,
3.72360922,
3.15148999,
7.65643283,
],
[11.77008734, 8.47930382, 3.85500973, 5.5463129, 4.8153303, 3.66939254, 7.5418196],
[6.0015905, 7.16459682, 3.25399504, 4.39795907, 3.1556904, 2.63154691, 6.03640099],
[
10.57125967,
8.64082889,
3.91804373,
5.56024488,
4.45804303,
3.59253748,
8.05245981,
],
[
10.91512996,
6.50723624,
2.9825236,
4.90998204,
4.89378128,
3.57587294,
5.83679508,
],
[0.13035958, 0.07676986, 0.03462515, 0.04556931, 0.04139699, 0.03066443, 0.0758178],
[0.0931747, 0.08366447, 0.0378952, 0.05230429, 0.03723609, 0.0315149, 0.07656433],
[0.11770087, 0.08479304, 0.0385501, 0.05546313, 0.0481533, 0.03669393, 0.0754182],
[0.06001591, 0.07164597, 0.03253995, 0.04397959, 0.0315569, 0.02631547, 0.06036401],
[0.1057126, 0.08640829, 0.03918044, 0.05560245, 0.04458043, 0.03592537, 0.0805246],
[0.1091513, 0.06507236, 0.02982524, 0.04909982, 0.04893781, 0.03575873, 0.05836795],
]
18 changes: 18 additions & 0 deletions tests/expectations/econ-gender-x-ideology-weighted-col-prop-moe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
[
0.115249326,
0.072633194,
0.050491687,
0.065859452,
0.088723517,
0.147331947,
],
[
0.115249326,
0.072633194,
0.050491687,
0.065859452,
0.088723517,
0.147331947,
],
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[
[0.02175933, 0.03332428, 0.04187784, 0.03716728, 0.0308031, 0.01415677],
[0.02346025, 0.03427124, 0.04290557, 0.0354381, 0.02346025, 0.01953654],
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[
[0.24824605, 0.3801874, 0.47777249, 0.42403103, 0.35142393, 0.16151057],
[0.26765143, 0.39099102, 0.48949758, 0.40430337, 0.26765143, 0.22288691],
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
[
0.0110701312,
0.0175249771,
0.0236182549,
0.0199602358,
0.016037868,
0.0071269548,
],
[
0.0119745296,
0.0181024584,
0.0247465565,
0.0188321449,
0.0119745296,
0.0099024628,
],
]
Loading

0 comments on commit 77deb7e

Please sign in to comment.