Merge 0ffbbee into 5979508

Crunch-io · May 7, 2020 · 325c483 · 325c483
2 parents 5979508 + 0ffbbee
commit 325c483
Show file tree

Hide file tree

Showing 6 changed files with 2,420 additions and 70 deletions.
diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py
@@ -167,6 +167,20 @@ def columns_dimension_type(self):
     def columns_margin(self):
         return np.array([column.margin for column in self._matrix.columns]).T
 
+    @lazyproperty
+    def columns_std_dev(self):
+        """Returns the standard deviation for cell percentages
+        `std_deviation = sqrt(variance)`
+        """
+        return np.sqrt(self._columns_variance)
+
+    @lazyproperty
+    def columns_std_err(self):
+        """Returns the standard error for cell percentages
+        `std_error = sqrt(variance/N)`
+        """
+        return np.sqrt(self._columns_variance / self.columns_margin)
+
     @lazyproperty
     def counts(self):
         return np.array([row.values for row in self._matrix.rows])
@@ -253,16 +267,6 @@ def pairwise_indices(self):
             self, alpha=alpha, only_larger=only_larger
         ).pairwise_indices
 
-    @lazyproperty
-    def scale_mean_pairwise_indices(self):
-        alpha = self._transforms_dict.get("pairwise_indices", {}).get("alpha", 0.05)
-        only_larger = self._transforms_dict.get("pairwise_indices", {}).get(
-            "only_larger", True
-        )
-        return NewPairwiseSignificance(
-            self, alpha=alpha, only_larger=only_larger
-        ).scale_mean_pairwise_indices
-
     @lazyproperty
     def pairwise_significance_tests(self):
         """tuple of _ColumnPairwiseSignificance tests.
@@ -360,20 +364,14 @@ def scale_means_column(self):
         return inner / denominator
 
     @lazyproperty
-    def var_scale_means_column(self):
-        if np.all(np.isnan(self._columns_dimension_numeric)):
-            return None
-
-        not_a_nan_index = ~np.isnan(self._columns_dimension_numeric)
-        col_dim_numeric = self._columns_dimension_numeric[not_a_nan_index]
-
-        numerator = self.counts[:, not_a_nan_index] * pow(
-            np.broadcast_to(col_dim_numeric, self.counts[:, not_a_nan_index].shape)
-            - self.scale_means_column.reshape(-1, 1),
-            2,
+    def scale_mean_pairwise_indices(self):
+        alpha = self._transforms_dict.get("pairwise_indices", {}).get("alpha", 0.05)
+        only_larger = self._transforms_dict.get("pairwise_indices", {}).get(
+            "only_larger", True
         )
-        denominator = np.sum(self.counts[:, not_a_nan_index], axis=1)
-        return np.nansum(numerator, axis=1) / denominator
+        return NewPairwiseSignificance(
+            self, alpha=alpha, only_larger=only_larger
+        ).scale_mean_pairwise_indices
 
     @lazyproperty
     def scale_means_columns_margin(self):
@@ -400,26 +398,6 @@ def scale_means_row(self):
         denominator = np.sum(self.counts[not_a_nan_index, :], axis=0)
         return inner / denominator
 
-    @lazyproperty
-    def var_scale_means_row(self):
-        if np.all(np.isnan(self._rows_dimension_numeric)):
-            return None
-
-        not_a_nan_index = ~np.isnan(self._rows_dimension_numeric)
-        row_dim_numeric = self._rows_dimension_numeric[not_a_nan_index]
-        numerator = (
-            self.counts[not_a_nan_index, :]
-            * pow(
-                np.broadcast_to(
-                    row_dim_numeric, self.counts[not_a_nan_index, :].T.shape
-                )
-                - self.scale_means_row.reshape(-1, 1),
-                2,
-            ).T
-        )
-        denominator = np.sum(self.counts[not_a_nan_index, :], axis=0)
-        return np.nansum(numerator, axis=0) / denominator
-
     @lazyproperty
     def scale_means_rows_margin(self):
         if np.all(np.isnan(self._rows_dimension_numeric)):
@@ -521,6 +499,50 @@ def table_percentages(self):
     def table_proportions(self):
         return np.array([row.table_proportions for row in self._matrix.rows])
 
+    @lazyproperty
+    def table_std_dev(self):
+        return np.array([row.table_std_dev for row in self._matrix.rows])
+
+    @lazyproperty
+    def table_std_err(self):
+        return np.array([row.table_std_err for row in self._matrix.rows])
+
+    @lazyproperty
+    def var_scale_means_column(self):
+        if np.all(np.isnan(self._columns_dimension_numeric)):
+            return None
+
+        not_a_nan_index = ~np.isnan(self._columns_dimension_numeric)
+        col_dim_numeric = self._columns_dimension_numeric[not_a_nan_index]
+
+        numerator = self.counts[:, not_a_nan_index] * pow(
+            np.broadcast_to(col_dim_numeric, self.counts[:, not_a_nan_index].shape)
+            - self.scale_means_column.reshape(-1, 1),
+            2,
+        )
+        denominator = np.sum(self.counts[:, not_a_nan_index], axis=1)
+        return np.nansum(numerator, axis=1) / denominator
+
+    @lazyproperty
+    def var_scale_means_row(self):
+        if np.all(np.isnan(self._rows_dimension_numeric)):
+            return None
+
+        not_a_nan_index = ~np.isnan(self._rows_dimension_numeric)
+        row_dim_numeric = self._rows_dimension_numeric[not_a_nan_index]
+        numerator = (
+            self.counts[not_a_nan_index, :]
+            * pow(
+                np.broadcast_to(
+                    row_dim_numeric, self.counts[not_a_nan_index, :].T.shape
+                )
+                - self.scale_means_row.reshape(-1, 1),
+                2,
+            ).T
+        )
+        denominator = np.sum(self.counts[not_a_nan_index, :], axis=0)
+        return np.nansum(numerator, axis=0) / denominator
+
     @lazyproperty
     def zscore(self):
         return np.array([row.zscore for row in self._matrix.rows])
@@ -582,6 +604,15 @@ def _transforms_dict(self):
         """
         return self._transforms_arg if self._transforms_arg is not None else {}
 
+    @lazyproperty
+    def _columns_variance(self):
+        """Returns the variance for cell percentages
+        `variance = p * (1-p)`
+        """
+        return (
+            self.counts / self.columns_margin * (1 - self.counts / self.columns_margin)
+        )
+
 
 class _Strand(CubePartition):
     """1D cube-partition.