Skip to content

Commit

Permalink
Merge 21d6d76 into 85cd116
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Oct 30, 2020
2 parents 85cd116 + 21d6d76 commit 49bfdd6
Show file tree
Hide file tree
Showing 11 changed files with 508 additions and 147 deletions.
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@ repos:
rev: stable
hooks:
- id: black
language_version: python3.6
93 changes: 62 additions & 31 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def dimension_types(self):
return tuple(d.dimension_type for d in self._dimensions)

def evaluate(self, measure_expr):
""" -> 1D/2D ndarray, values evaluated given the function specification
"""Return 1D/2D ndarray, values evaluated given the function specification

The `function_spec` contains the function to apply and its parameters, e.g.:
```
Expand Down Expand Up @@ -113,7 +113,7 @@ def ndim(self):

@lazyproperty
def population_fraction(self):
"""Returns the population fraction of the cube"""
"""population fraction of the cube"""
return self._cube.population_fraction

@lazyproperty
Expand Down Expand Up @@ -239,6 +239,11 @@ class _Slice(CubePartition):
dimensions which can be crosstabbed in a slice.
"""

# ---This is the quantile of the normal Cumulative Distribution Function (CDF) at
# ---probability 97.5% (p=.975), since the computed confidence interval
# ---is ±2.5% (.025) on each side of the CDF.
Z_975 = 1.959964

def __init__(self, cube, slice_idx, transforms, population, mask_size):
super(_Slice, self).__init__(cube, transforms)
self._slice_idx = slice_idx
Expand Down Expand Up @@ -291,16 +296,29 @@ def columns_dimension_type(self):
def columns_margin(self):
return np.array([column.margin for column in self._matrix.columns]).T

@lazyproperty
def columns_percentages_moe(self):
"""1D/2D np.float64 ndarray of margin-of-error (MoE) for columns percentages.

The values are represented as percentages, analogue to the `table_percentages`
property. This means that the value of 3.5% will have the value 3.5 (not 0.035).
The values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective columns margin is 0.
"""
return self.Z_975 * 100 * self.columns_std_err

@lazyproperty
def columns_std_dev(self):
"""Returns the standard deviation for cell percentages
"""standard deviation for column percentages

`std_deviation = sqrt(variance)`
"""
return np.sqrt(self._columns_variance)

@lazyproperty
def columns_std_err(self):
"""Returns the standard error for cell percentages
"""standard error for column percentages

`std_error = sqrt(variance/N)`
"""
return np.sqrt(self._columns_variance / self.columns_margin)
Expand Down Expand Up @@ -336,17 +354,17 @@ def inserted_row_idxs(self):

@lazyproperty
def insertions(self):
"""Returns masked array with residuals for insertions
"""2D np.float64 np.ma.core.MaskedArray of residuals for insertions.

0 1 2 3 4 5 6
0 inf inf inf inf inf -2.9 inf
1 inf inf inf inf inf -4.3 inf
2 2.5 1.3 3.3 -0.70 -7.25 -6.52 2.25
3 inf inf inf inf inf -2.51 inf
4 -1.16 2.20 5.84 1.78 -8.48 -5.92 0.93
5 inf inf inf inf inf 9.70 inf
0 1 2 3 4 5 6
0 inf inf inf inf inf -2.9 inf
1 inf inf inf inf inf -4.3 inf
2 2.5 1.3 3.3 -0.70 -7.25 -6.52 2.25
3 inf inf inf inf inf -2.51 inf
4 -1.16 2.20 5.84 1.78 -8.48 -5.92 0.93
5 inf inf inf inf inf 9.70 inf

Only the insertions residuals are showed in a inf masked array
Only the insertions residuals are showed in a inf masked array.
"""
inserted_rows = self.inserted_row_idxs
inserted_cols = self.inserted_column_idxs
Expand Down Expand Up @@ -601,7 +619,7 @@ def scale_means_rows_margin(self):

@lazyproperty
def scale_median_column(self):
""" -> np.int64 ndarray of the columns scale median
"""np.int64 ndarray of the columns scale median

The median is calculated using the standard algebra applied to the numeric
values repeated for each related counts value
Expand All @@ -621,7 +639,7 @@ def scale_median_column(self):

@lazyproperty
def scale_median_row(self):
""" -> np.int64 ndarray of the rows scale median
"""np.int64 ndarray of the rows scale median

The median is calculated using the standard algebra applied to the numeric
values repeated for each related counts value
Expand All @@ -641,7 +659,7 @@ def scale_median_row(self):

@lazyproperty
def scale_median_column_margin(self):
""" -> np.int64, represents the column scale median margin"""
"""np.int64 represents the column scale median margin"""
if np.all(np.isnan(self._columns_dimension_numeric_values)):
return None
columns_margin = self.columns_margin
Expand All @@ -657,7 +675,7 @@ def scale_median_column_margin(self):

@lazyproperty
def scale_median_row_margin(self):
""" -> np.int64, represents the rows scale median margin"""
"""np.int64 represents the rows scale median margin"""
if np.all(np.isnan(self._rows_dimension_numeric_values)):
return None
rows_margin = self.rows_margin
Expand All @@ -673,28 +691,28 @@ def scale_median_row_margin(self):

@lazyproperty
def scale_std_dev_column(self):
""" -> 1D np.ndarray of the standard deviation column of scales"""
"""1D np.ndarray of the standard deviation column of scales"""
if np.all(np.isnan(self._columns_dimension_numeric_values)):
return None
return np.sqrt(self.var_scale_means_column)

@lazyproperty
def scale_std_dev_row(self):
""" -> 1D np.ndarray of the standard deviation row of scales"""
"""1D np.ndarray of the standard deviation row of scales"""
if np.all(np.isnan(self._rows_dimension_numeric_values)):
return None
return np.sqrt(self.var_scale_means_row)

@lazyproperty
def scale_std_err_column(self):
""" -> 1D np.ndarray of the standard error column of scales"""
"""1D np.ndarray of the standard error column of scales"""
if np.all(np.isnan(self._columns_dimension_numeric_values)):
return None
return self.scale_std_dev_column / np.sqrt(self.rows_margin)

@lazyproperty
def scale_std_err_row(self):
""" -> 1D np.ndarray of the standard error row of scales"""
"""1D np.ndarray of the standard error row of scales"""
if np.all(np.isnan(self._rows_dimension_numeric_values)):
return None
return self.scale_std_dev_row / np.sqrt(self.columns_margin)
Expand Down Expand Up @@ -782,6 +800,17 @@ def table_name(self):
def table_percentages(self):
return self.table_proportions * 100

@lazyproperty
def table_percentages_moe(self):
"""1D/2D np.float64 ndarray of margin-of-error (MoE) for table percentages.

The values are represented as percentages, analogue to the `table_percentages`
property. This means that the value of 3.5% will have the value 3.5 (not 0.035).
The values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective table margin is 0.
"""
return self.Z_975 * 100 * self.table_std_err

@lazyproperty
def table_proportions(self):
return np.array([row.table_proportions for row in self._matrix.rows])
Expand All @@ -801,7 +830,7 @@ def unweighted_counts(self):

@lazyproperty
def var_scale_means_column(self):
""" -> 1D np.ndarray of the column variance values for scales
"""1D np.ndarray of the column variance values for scales

Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the
mean of the data.
Expand All @@ -822,7 +851,7 @@ def var_scale_means_column(self):

@lazyproperty
def var_scale_means_row(self):
""" -> 1D np.ndarray of the row variance values for scales
"""1D np.ndarray of the row variance values for scales

Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the
mean of the data.
Expand Down Expand Up @@ -862,7 +891,8 @@ def _columns_dimension_numeric_values(self):

@lazyproperty
def _columns_variance(self):
"""Returns the variance for cell percentages
"""variance for column percentages

`variance = p * (1-p)`
"""
return (
Expand Down Expand Up @@ -1039,7 +1069,7 @@ def scale_mean(self):

@lazyproperty
def scale_median(self):
""" -> np.int64, the median of scales
"""np.int64 the median of scales

The median is calculated using the standard algebra applied to the numeric
values repeated for each related counts value
Expand All @@ -1053,14 +1083,14 @@ def scale_median(self):

@lazyproperty
def scale_std_dev(self):
""" -> np.float64, the standard deviation of scales"""
"""np.float64, the standard deviation of scales"""
if np.all(np.isnan(self._numeric_values)):
return None
return np.sqrt(self.var_scale_mean)

@lazyproperty
def scale_std_err(self):
""" -> np.float64, the standard error of scales"""
"""np.float64, the standard error of scales"""
if np.all(np.isnan(self._numeric_values)):
return None
counts = self._counts_as_array[self._numeric_values_mask]
Expand All @@ -1085,12 +1115,12 @@ def smoothed_dimension_dict(self):

@lazyproperty
def standard_deviation(self):
""" -> np.ndarray, percentages standard deviation"""
"""np.ndarray percentages standard deviation"""
return np.sqrt(self._variance)

@lazyproperty
def standard_error(self):
""" -> np.ndarray, percentages standard error"""
"""np.ndarray percentages standard error"""
if self.dimension_types[0] == DT.MR:
return np.sqrt(self._variance / self.bases)
return np.sqrt(self._variance / np.sum(self.rows_margin))
Expand Down Expand Up @@ -1207,7 +1237,7 @@ def _numeric_values(self):

@lazyproperty
def _numeric_values_mask(self):
""" -> np.ndarray, boolean elements for each element in rows dimension."
"""np.ndarray boolean elements for each element in rows dimension."

This array contains True or False according to the nan in the numeric_values
array
Expand Down Expand Up @@ -1238,7 +1268,8 @@ def _table_proportions_as_array(self):

@lazyproperty
def _variance(self):
"""Returns the variance for cell percentages
"""variance for cell percentages

`variance = p * (1-p)`
"""
p = self._table_proportions_as_array
Expand Down
10 changes: 10 additions & 0 deletions tests/expectations/cat-hs-x-mr-col-moe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
[17.30181459, 7.76167031, 2.58919072, 1.96696835, 2.47551804],
[13.7464937, 8.90617586, 6.33244289, 3.30105103, 3.99757694],
[16.79140176, 10.72624566, 6.68369737, 3.71703613, 4.42265749],
[0.0, 0.0, 0.0, 0.0, 0.0],
[11.79621344, 11.12694011, 8.58251349, 4.91075221, 4.79812657],
[14.76383504, 11.06914477, 8.61521467, 5.11357472, 4.93988229],
[0.0, 0.0, 0.0, 0.0, 0.0],
[16.79140176, 10.72624566, 6.68369737, 3.71703613, 4.42265749],
]
56 changes: 56 additions & 0 deletions tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
[
[
13.03595844,
7.67698551,
3.46251469,
4.55693081,
4.13969905,
3.06644326,
7.58177966,
],
[
9.31746956,
8.36644659,
3.78951977,
5.23042895,
3.72360922,
3.15148999,
7.65643283,
],
[
11.77008734,
8.47930382,
3.85500973,
5.5463129,
4.8153303,
3.66939254,
7.5418196,
],
[
6.0015905,
7.16459682,
3.25399504,
4.39795907,
3.1556904,
2.63154691,
6.03640099,
],
[
10.57125967,
8.64082889,
3.91804373,
5.56024488,
4.45804303,
3.59253748,
8.05245981,
],
[
10.91512996,
6.50723624,
2.9825236,
4.90998204,
4.89378128,
3.57587294,
5.83679508,
],
]
56 changes: 56 additions & 0 deletions tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
[
[
0.49326036,
0.43967108,
0.43739495,
0.4093598,
0.42242603,
0.41688475,
0.47060217,
],
[
0.35255854,
0.47915742,
0.47870319,
0.46986171,
0.3799671,
0.42844691,
0.4752359,
],
[
0.44536177,
0.48562091,
0.48697607,
0.49823831,
0.49136926,
0.49885606,
0.46812184,
],
[
0.22709084,
0.4103259,
0.41105414,
0.39507899,
0.32201514,
0.35776034,
0.37468029,
],
[
0.4,
0.49487166,
0.49493871,
0.49948985,
0.45491071,
0.48840757,
0.49981735,
],
[
0.41301152,
0.372678,
0.37676108,
0.44107522,
0.49937461,
0.48614202,
0.36229072,
],
]
Loading

0 comments on commit 49bfdd6

Please sign in to comment.