Skip to content

Commit

Permalink
cr: address steve's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Oct 30, 2020
1 parent 71fdbc3 commit 4d10e24
Show file tree
Hide file tree
Showing 8 changed files with 259 additions and 227 deletions.
71 changes: 41 additions & 30 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def dimension_types(self):
return tuple(d.dimension_type for d in self._dimensions)

def evaluate(self, measure_expr):
"""-> 1D/2D ndarray, values evaluated given the function specification
"""Returns 1D/2D ndarray, values evaluated given the function specification
The `function_spec` contains the function to apply and its parameters, e.g.:
```
Expand Down Expand Up @@ -239,8 +239,9 @@ class _Slice(CubePartition):
dimensions which can be crosstabbed in a slice.
"""

# quantile of the normal cdf at .975 because the confidence
# interval is ± (.025 on each side)
# ---This is the quantile of the normal Cumulative Distribution Function (CDF) at
# ---probability 97.5% (p=.975), since the computed confidence interval
# ---is ±2.5% (.025) on each side of the CDF.
Z_975 = 1.959964

def __init__(self, cube, slice_idx, transforms, population, mask_size):
Expand Down Expand Up @@ -296,22 +297,26 @@ def columns_margin(self):
return np.array([column.margin for column in self._matrix.columns]).T

@lazyproperty
def columns_moe(self):
"""Returns the margin of error (MoE) for col percentages
`moe = Z_975 * 100 * std_error` (the * 100 part accounts for percentages)
def columns_percentages_moe(self):
"""1D/2D np.float64 ndarray of margin-of-error (MoE) for columns percentages.
The values are represented as percentages, analogue to the `table_percentages`
property. This means that the value of 3.5% will have the value 3.5 (not 0.035).
The values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective columns margin is 0.
"""
return self.Z_975 * 100 * self.columns_std_err

@lazyproperty
def columns_std_dev(self):
"""Returns the standard deviation for col percentages
"""Returns the standard deviation for column percentages
`std_deviation = sqrt(variance)`
"""
return np.sqrt(self._columns_variance)

@lazyproperty
def columns_std_err(self):
"""Returns the standard error for col percentages
"""Returns the standard error for column percentages
`std_error = sqrt(variance/N)`
"""
return np.sqrt(self._columns_variance / self.columns_margin)
Expand Down Expand Up @@ -347,7 +352,8 @@ def inserted_row_idxs(self):

@lazyproperty
def insertions(self):
"""Returns masked array with residuals for insertions
"""2D np.float64 np.ma.core.MaskedArray of residuals for insertions.
0 1 2 3 4 5 6
0 inf inf inf inf inf -2.9 inf
1 inf inf inf inf inf -4.3 inf
Expand All @@ -356,7 +362,8 @@ def insertions(self):
4 -1.16 2.20 5.84 1.78 -8.48 -5.92 0.93
5 inf inf inf inf inf 9.70 inf
Only the insertions residuals are showed in a inf masked array"""
Only the insertions residuals are showed in a inf masked array.
"""
inserted_rows = self.inserted_row_idxs
inserted_cols = self.inserted_column_idxs
if not inserted_cols and not inserted_rows:
Expand Down Expand Up @@ -610,7 +617,7 @@ def scale_means_rows_margin(self):

@lazyproperty
def scale_median_column(self):
"""-> np.int64 ndarray of the columns scale median
"""np.int64 ndarray of the columns scale median
The median is calculated using the standard algebra applied to the numeric
values repeated for each related counts value
Expand All @@ -630,7 +637,7 @@ def scale_median_column(self):

@lazyproperty
def scale_median_row(self):
"""-> np.int64 ndarray of the rows scale median
"""np.int64 ndarray of the rows scale median
The median is calculated using the standard algebra applied to the numeric
values repeated for each related counts value
Expand All @@ -650,7 +657,7 @@ def scale_median_row(self):

@lazyproperty
def scale_median_column_margin(self):
""" -> np.int64 represents the column scale median margin"""
"""np.int64 represents the column scale median margin"""
if np.all(np.isnan(self._columns_dimension_numeric_values)):
return None
columns_margin = self.columns_margin
Expand All @@ -666,7 +673,7 @@ def scale_median_column_margin(self):

@lazyproperty
def scale_median_row_margin(self):
""" -> np.int64 represents the rows scale median margin"""
"""np.int64 represents the rows scale median margin"""
if np.all(np.isnan(self._rows_dimension_numeric_values)):
return None
rows_margin = self.rows_margin
Expand All @@ -682,28 +689,28 @@ def scale_median_row_margin(self):

@lazyproperty
def scale_std_dev_column(self):
""" -> 1D np.ndarray of the standard deviation column of scales"""
"""1D np.ndarray of the standard deviation column of scales"""
if np.all(np.isnan(self._columns_dimension_numeric_values)):
return None
return np.sqrt(self.var_scale_means_column)

@lazyproperty
def scale_std_dev_row(self):
""" -> 1D np.ndarray of the standard deviation row of scales"""
"""1D np.ndarray of the standard deviation row of scales"""
if np.all(np.isnan(self._rows_dimension_numeric_values)):
return None
return np.sqrt(self.var_scale_means_row)

@lazyproperty
def scale_std_err_column(self):
""" -> 1D np.ndarray of the standard error column of scales"""
"""1D np.ndarray of the standard error column of scales"""
if np.all(np.isnan(self._columns_dimension_numeric_values)):
return None
return self.scale_std_dev_column / np.sqrt(self.rows_margin)

@lazyproperty
def scale_std_err_row(self):
""" -> 1D np.ndarray of the standard error row of scales"""
"""1D np.ndarray of the standard error row of scales"""
if np.all(np.isnan(self._rows_dimension_numeric_values)):
return None
return self.scale_std_dev_row / np.sqrt(self.columns_margin)
Expand Down Expand Up @@ -774,9 +781,13 @@ def table_margin_unpruned(self):
return self._matrix.table_margin_unpruned

@lazyproperty
def table_moe(self):
"""Returns the margin of error (MoE) for table percentages
`moe = Z_975 * 100 * std_error` (the * 100 part accounts for percentages)
def table_percentages_moe(self):
"""1D/2D np.float64 ndarray of margin-of-error (MoE) for table percentages.
The values are represented as percentages, analogue to the `table_percentages`
property. This means that the value of 3.5% will have the value 3.5 (not 0.035).
The values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective table margin is 0.
"""
return self.Z_975 * 100 * self.table_std_err

Expand Down Expand Up @@ -817,7 +828,7 @@ def unweighted_counts(self):

@lazyproperty
def var_scale_means_column(self):
"""-> 1D np.ndarray of the column variance values for scales
"""1D np.ndarray of the column variance values for scales
Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the
mean of the data.
Expand All @@ -838,7 +849,7 @@ def var_scale_means_column(self):

@lazyproperty
def var_scale_means_row(self):
"""-> 1D np.ndarray of the row variance values for scales
"""1D np.ndarray of the row variance values for scales
Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the
mean of the data.
Expand Down Expand Up @@ -878,7 +889,7 @@ def _columns_dimension_numeric_values(self):

@lazyproperty
def _columns_variance(self):
"""Returns the variance for col percentages
"""Returns the variance for column percentages
`variance = p * (1-p)`
"""
return (
Expand Down Expand Up @@ -1055,7 +1066,7 @@ def scale_mean(self):

@lazyproperty
def scale_median(self):
"""-> np.int64 the median of scales
"""np.int64 the median of scales
The median is calculated using the standard algebra applied to the numeric
values repeated for each related counts value
Expand All @@ -1069,14 +1080,14 @@ def scale_median(self):

@lazyproperty
def scale_std_dev(self):
""" -> np.float64, the standard deviation of scales"""
"""np.float64, the standard deviation of scales"""
if np.all(np.isnan(self._numeric_values)):
return None
return np.sqrt(self.var_scale_mean)

@lazyproperty
def scale_std_err(self):
""" -> np.float64, the standard error of scales"""
"""np.float64, the standard error of scales"""
if np.all(np.isnan(self._numeric_values)):
return None
counts = self._counts_as_array[self._numeric_values_mask]
Expand All @@ -1101,12 +1112,12 @@ def smoothed_dimension_dict(self):

@lazyproperty
def standard_deviation(self):
""" -> np.ndarray percentages standard deviation"""
"""np.ndarray percentages standard deviation"""
return np.sqrt(self._variance)

@lazyproperty
def standard_error(self):
""" -> np.ndarray percentages standard error"""
"""np.ndarray percentages standard error"""
if self.dimension_types[0] == DT.MR:
return np.sqrt(self._variance / self.bases)
return np.sqrt(self._variance / np.sum(self.rows_margin))
Expand Down Expand Up @@ -1223,7 +1234,7 @@ def _numeric_values(self):

@lazyproperty
def _numeric_values_mask(self):
"""-> np.ndarray boolean elements for each element in rows dimension."
"""np.ndarray boolean elements for each element in rows dimension."
This array contains True or False according to the nan in the numeric_values
array
Expand Down
56 changes: 56 additions & 0 deletions tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
[
[
13.03595844,
7.67698551,
3.46251469,
4.55693081,
4.13969905,
3.06644326,
7.58177966,
],
[
9.31746956,
8.36644659,
3.78951977,
5.23042895,
3.72360922,
3.15148999,
7.65643283,
],
[
11.77008734,
8.47930382,
3.85500973,
5.5463129,
4.8153303,
3.66939254,
7.5418196,
],
[
6.0015905,
7.16459682,
3.25399504,
4.39795907,
3.1556904,
2.63154691,
6.03640099,
],
[
10.57125967,
8.64082889,
3.91804373,
5.56024488,
4.45804303,
3.59253748,
8.05245981,
],
[
10.91512996,
6.50723624,
2.9825236,
4.90998204,
4.89378128,
3.57587294,
5.83679508,
],
]
56 changes: 56 additions & 0 deletions tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
[
[
0.49326036,
0.43967108,
0.43739495,
0.4093598,
0.42242603,
0.41688475,
0.47060217,
],
[
0.35255854,
0.47915742,
0.47870319,
0.46986171,
0.3799671,
0.42844691,
0.4752359,
],
[
0.44536177,
0.48562091,
0.48697607,
0.49823831,
0.49136926,
0.49885606,
0.46812184,
],
[
0.22709084,
0.4103259,
0.41105414,
0.39507899,
0.32201514,
0.35776034,
0.37468029,
],
[
0.4,
0.49487166,
0.49493871,
0.49948985,
0.45491071,
0.48840757,
0.49981735,
],
[
0.41301152,
0.372678,
0.37676108,
0.44107522,
0.49937461,
0.48614202,
0.36229072,
],
]
Loading

0 comments on commit 4d10e24

Please sign in to comment.