cr: address steve's comments

Crunch-io · Oct 30, 2020 · 4d10e24 · 4d10e24
1 parent 71fdbc3
commit 4d10e24
Show file tree

Hide file tree

Showing 8 changed files with 259 additions and 227 deletions.
diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py
@@ -84,7 +84,7 @@ def dimension_types(self):
         return tuple(d.dimension_type for d in self._dimensions)
 
     def evaluate(self, measure_expr):
-        """-> 1D/2D ndarray, values evaluated given the function specification
+        """Returns 1D/2D ndarray, values evaluated given the function specification
 
         The `function_spec` contains the function to apply and its parameters, e.g.:
         ```
@@ -239,8 +239,9 @@ class _Slice(CubePartition):
     dimensions which can be crosstabbed in a slice.
     """
 
-    # quantile of the normal cdf at .975 because the confidence
-    # interval is ± (.025 on each side)
+    # ---This is the quantile of the normal Cumulative Distribution Function (CDF) at
+    # ---probability 97.5% (p=.975), since the computed confidence interval
+    # ---is ±2.5% (.025) on each side of the CDF.
     Z_975 = 1.959964
 
     def __init__(self, cube, slice_idx, transforms, population, mask_size):
@@ -296,22 +297,26 @@ def columns_margin(self):
         return np.array([column.margin for column in self._matrix.columns]).T
 
     @lazyproperty
-    def columns_moe(self):
-        """Returns the margin of error (MoE) for col percentages
-        `moe = Z_975 * 100 * std_error` (the * 100 part accounts for percentages)
+    def columns_percentages_moe(self):
+        """1D/2D np.float64 ndarray of margin-of-error (MoE) for columns percentages.
+
+        The values are represented as percentages, analogue to the `table_percentages`
+        property. This means that the value of 3.5% will have the value 3.5 (not 0.035).
+        The values can be np.nan when the corresponding percentage is also np.nan, which
+        happens when the respective columns margin is 0.
         """
         return self.Z_975 * 100 * self.columns_std_err
 
     @lazyproperty
     def columns_std_dev(self):
-        """Returns the standard deviation for col percentages
+        """Returns the standard deviation for column percentages
         `std_deviation = sqrt(variance)`
         """
         return np.sqrt(self._columns_variance)
 
     @lazyproperty
     def columns_std_err(self):
-        """Returns the standard error for col percentages
+        """Returns the standard error for column percentages
         `std_error = sqrt(variance/N)`
         """
         return np.sqrt(self._columns_variance / self.columns_margin)
@@ -347,7 +352,8 @@ def inserted_row_idxs(self):
 
     @lazyproperty
     def insertions(self):
-        """Returns masked array with residuals for insertions
+        """2D np.float64 np.ma.core.MaskedArray of residuals for insertions.
+
               0     1     2     3      4       5       6
         0   inf   inf   inf   inf    inf    -2.9     inf
         1   inf   inf   inf   inf    inf    -4.3     inf
@@ -356,7 +362,8 @@ def insertions(self):
         4  -1.16  2.20  5.84  1.78  -8.48   -5.92    0.93
         5   inf   inf   inf   inf    inf     9.70    inf
 
-        Only the insertions residuals are showed in a inf masked array"""
+        Only the insertions residuals are showed in a inf masked array.
+        """
         inserted_rows = self.inserted_row_idxs
         inserted_cols = self.inserted_column_idxs
         if not inserted_cols and not inserted_rows:
@@ -610,7 +617,7 @@ def scale_means_rows_margin(self):
 
     @lazyproperty
     def scale_median_column(self):
-        """-> np.int64 ndarray of the columns scale median
+        """np.int64 ndarray of the columns scale median
 
         The median is calculated using the standard algebra applied to the numeric
         values repeated for each related counts value
@@ -630,7 +637,7 @@ def scale_median_column(self):
 
     @lazyproperty
     def scale_median_row(self):
-        """-> np.int64 ndarray of the rows scale median
+        """np.int64 ndarray of the rows scale median
 
         The median is calculated using the standard algebra applied to the numeric
         values repeated for each related counts value
@@ -650,7 +657,7 @@ def scale_median_row(self):
 
     @lazyproperty
     def scale_median_column_margin(self):
-        """ -> np.int64 represents the column scale median margin"""
+        """np.int64 represents the column scale median margin"""
         if np.all(np.isnan(self._columns_dimension_numeric_values)):
             return None
         columns_margin = self.columns_margin
@@ -666,7 +673,7 @@ def scale_median_column_margin(self):
 
     @lazyproperty
     def scale_median_row_margin(self):
-        """ -> np.int64 represents the rows scale median margin"""
+        """np.int64 represents the rows scale median margin"""
         if np.all(np.isnan(self._rows_dimension_numeric_values)):
             return None
         rows_margin = self.rows_margin
@@ -682,28 +689,28 @@ def scale_median_row_margin(self):
 
     @lazyproperty
     def scale_std_dev_column(self):
-        """ -> 1D np.ndarray of the standard deviation column of scales"""
+        """1D np.ndarray of the standard deviation column of scales"""
         if np.all(np.isnan(self._columns_dimension_numeric_values)):
             return None
         return np.sqrt(self.var_scale_means_column)
 
     @lazyproperty
     def scale_std_dev_row(self):
-        """ -> 1D np.ndarray of the standard deviation row of scales"""
+        """1D np.ndarray of the standard deviation row of scales"""
         if np.all(np.isnan(self._rows_dimension_numeric_values)):
             return None
         return np.sqrt(self.var_scale_means_row)
 
     @lazyproperty
     def scale_std_err_column(self):
-        """ -> 1D np.ndarray of the standard error column of scales"""
+        """1D np.ndarray of the standard error column of scales"""
         if np.all(np.isnan(self._columns_dimension_numeric_values)):
             return None
         return self.scale_std_dev_column / np.sqrt(self.rows_margin)
 
     @lazyproperty
     def scale_std_err_row(self):
-        """ -> 1D np.ndarray of the standard error row of scales"""
+        """1D np.ndarray of the standard error row of scales"""
         if np.all(np.isnan(self._rows_dimension_numeric_values)):
             return None
         return self.scale_std_dev_row / np.sqrt(self.columns_margin)
@@ -774,9 +781,13 @@ def table_margin_unpruned(self):
         return self._matrix.table_margin_unpruned
 
     @lazyproperty
-    def table_moe(self):
-        """Returns the margin of error (MoE) for table percentages
-        `moe = Z_975 * 100 * std_error` (the * 100 part accounts for percentages)
+    def table_percentages_moe(self):
+        """1D/2D np.float64 ndarray of margin-of-error (MoE) for table percentages.
+
+        The values are represented as percentages, analogue to the `table_percentages`
+        property. This means that the value of 3.5% will have the value 3.5 (not 0.035).
+        The values can be np.nan when the corresponding percentage is also np.nan, which
+        happens when the respective table margin is 0.
         """
         return self.Z_975 * 100 * self.table_std_err
 
@@ -817,7 +828,7 @@ def unweighted_counts(self):
 
     @lazyproperty
     def var_scale_means_column(self):
-        """-> 1D np.ndarray of the column variance values for scales
+        """1D np.ndarray of the column variance values for scales
 
         Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the
               mean of the data.
@@ -838,7 +849,7 @@ def var_scale_means_column(self):
 
     @lazyproperty
     def var_scale_means_row(self):
-        """-> 1D np.ndarray of the row variance values for scales
+        """1D np.ndarray of the row variance values for scales
 
         Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is the
               mean of the data.
@@ -878,7 +889,7 @@ def _columns_dimension_numeric_values(self):
 
     @lazyproperty
     def _columns_variance(self):
-        """Returns the variance for col percentages
+        """Returns the variance for column percentages
         `variance = p * (1-p)`
         """
         return (
@@ -1055,7 +1066,7 @@ def scale_mean(self):
 
     @lazyproperty
     def scale_median(self):
-        """-> np.int64 the median of scales
+        """np.int64 the median of scales
 
         The median is calculated using the standard algebra applied to the numeric
         values repeated for each related counts value
@@ -1069,14 +1080,14 @@ def scale_median(self):
 
     @lazyproperty
     def scale_std_dev(self):
-        """ -> np.float64, the standard deviation of scales"""
+        """np.float64, the standard deviation of scales"""
         if np.all(np.isnan(self._numeric_values)):
             return None
         return np.sqrt(self.var_scale_mean)
 
     @lazyproperty
     def scale_std_err(self):
-        """ -> np.float64, the standard error of scales"""
+        """np.float64, the standard error of scales"""
         if np.all(np.isnan(self._numeric_values)):
             return None
         counts = self._counts_as_array[self._numeric_values_mask]
@@ -1101,12 +1112,12 @@ def smoothed_dimension_dict(self):
 
     @lazyproperty
     def standard_deviation(self):
-        """ -> np.ndarray percentages standard deviation"""
+        """np.ndarray percentages standard deviation"""
         return np.sqrt(self._variance)
 
     @lazyproperty
     def standard_error(self):
-        """ -> np.ndarray percentages standard error"""
+        """np.ndarray percentages standard error"""
         if self.dimension_types[0] == DT.MR:
             return np.sqrt(self._variance / self.bases)
         return np.sqrt(self._variance / np.sum(self.rows_margin))
@@ -1223,7 +1234,7 @@ def _numeric_values(self):
 
     @lazyproperty
     def _numeric_values_mask(self):
-        """-> np.ndarray boolean elements for each element in rows dimension."
+        """np.ndarray boolean elements for each element in rows dimension."
 
         This array contains True or False according to the nan in the numeric_values
         array

diff --git a/tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py b/tests/expectations/col-per-moe-cat-x-cat-hs-2rows-1col.py
@@ -0,0 +1,56 @@
+[
+    [
+        13.03595844,
+        7.67698551,
+        3.46251469,
+        4.55693081,
+        4.13969905,
+        3.06644326,
+        7.58177966,
+    ],
+    [
+        9.31746956,
+        8.36644659,
+        3.78951977,
+        5.23042895,
+        3.72360922,
+        3.15148999,
+        7.65643283,
+    ],
+    [
+        11.77008734,
+        8.47930382,
+        3.85500973,
+        5.5463129,
+        4.8153303,
+        3.66939254,
+        7.5418196,
+    ],
+    [
+        6.0015905,
+        7.16459682,
+        3.25399504,
+        4.39795907,
+        3.1556904,
+        2.63154691,
+        6.03640099,
+    ],
+    [
+        10.57125967,
+        8.64082889,
+        3.91804373,
+        5.56024488,
+        4.45804303,
+        3.59253748,
+        8.05245981,
+    ],
+    [
+        10.91512996,
+        6.50723624,
+        2.9825236,
+        4.90998204,
+        4.89378128,
+        3.57587294,
+        5.83679508,
+    ],
+]
diff --git a/tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py b/tests/expectations/col-std-dev-cat-x-cat-hs-2rows-1col.py
@@ -0,0 +1,56 @@
+[
+    [
+        0.49326036,
+        0.43967108,
+        0.43739495,
+        0.4093598,
+        0.42242603,
+        0.41688475,
+        0.47060217,
+    ],
+    [
+        0.35255854,
+        0.47915742,
+        0.47870319,
+        0.46986171,
+        0.3799671,
+        0.42844691,
+        0.4752359,
+    ],
+    [
+        0.44536177,
+        0.48562091,
+        0.48697607,
+        0.49823831,
+        0.49136926,
+        0.49885606,
+        0.46812184,
+    ],
+    [
+        0.22709084,
+        0.4103259,
+        0.41105414,
+        0.39507899,
+        0.32201514,
+        0.35776034,
+        0.37468029,
+    ],
+    [
+        0.4,
+        0.49487166,
+        0.49493871,
+        0.49948985,
+        0.45491071,
+        0.48840757,
+        0.49981735,
+    ],
+    [
+        0.41301152,
+        0.372678,
+        0.37676108,
+        0.44107522,
+        0.49937461,
+        0.48614202,
+        0.36229072,
+    ],
+]