Crunch-io · scanny · Nov 9, 2020 · Nov 9, 2020 · Nov 9, 2020 · Nov 9, 2020
diff --git a/src/cr/cube/collator.py b/src/cr/cube/collator.py
diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py
@@ -134,7 +134,26 @@ def population_fraction(self):
     @lazyproperty
     def _cubes(self):
         """Sequence of Cube objects containing data for this analysis."""
-        return tuple(self._iter_cubes())
+
+        def iter_cubes():
+            """Generate a Cube object for each of cube_responses.
+
+            0D cube-responses and 1D second-and-later cubes are "inflated" to add their
+            missing row dimension.
+            """
+            for idx, cube_response in enumerate(self._cube_responses):
+                cube = Cube(
+                    cube_response,
+                    cube_idx=idx if self._is_multi_cube else None,
+                    transforms=self._transforms_dicts[idx],
+                    population=self._population,
+                    mask_size=self._min_base,
+                )
+                # --- numeric-mean cubes require inflation to restore their
+                # --- rows-dimension, others don't
+                yield cube.inflate() if self._is_numeric_mean else cube
+
+        return tuple(iter_cubes())
 
     @lazyproperty
     def _is_multi_cube(self):
@@ -166,24 +185,6 @@ def _is_numeric_mean(self):
         # --- construction is low-overhead because all Cube properties are lazy.
         return Cube(self._cube_responses[0]).ndim == 0
 
-    def _iter_cubes(self):
-        """Generate a Cube object for each of cube_responses.
-
-        0D cube-responses and 1D second-and-later cubes are "inflated" to add their
-        missing row dimension.
-        """
-        for idx, cube_response in enumerate(self._cube_responses):
-            cube = Cube(
-                cube_response,
-                cube_idx=idx if self._is_multi_cube else None,
-                transforms=self._transforms_dicts[idx],
-                population=self._population,
-                mask_size=self._min_base,
-            )
-            # --- all numeric-mean cubes require inflation to restore their
-            # --- rows-dimension, others don't
-            yield cube.inflate() if self._is_numeric_mean else cube
-
 
 class Cube(object):
     """Provides access to individual slices on a cube-result.

diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py
@@ -554,18 +554,18 @@ def scale_mean_pairwise_indices_alt(self):
 
     @lazyproperty
     def scale_means_column(self):
-        """1D float64 ndarray of column scale means
+        """1D float64 ndarray of scale mean for each row (making a summary "column").
 
-        The calculation is based on multiply of the numeric values by the
-        row_proportions and divide by the rows_margin.
+        Each scale mean is based on the numeric values of the *columns-dimension*
+        elements.
         """
-        if np.all(np.isnan(self._columns_dimension_numeric_values)):
+        column_numeric_values = self._columns_dimension_numeric_values
+
+        if np.all(np.isnan(column_numeric_values)):
             return None
 
-        inner = np.nansum(
-            self._columns_dimension_numeric_values * self.row_proportions, axis=1
-        )
-        not_a_nan_index = ~np.isnan(self._columns_dimension_numeric_values)
+        inner = np.nansum(column_numeric_values * self.row_proportions, axis=1)
+        not_a_nan_index = ~np.isnan(column_numeric_values)
         denominator = np.sum(self.row_proportions[:, not_a_nan_index], axis=1)
         return inner / denominator
 

diff --git a/src/cr/cube/dimension.py b/src/cr/cube/dimension.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 
-from cr.cube.enums import DIMENSION_TYPE as DT
+from cr.cube.enums import COLLATION_METHOD as CM, DIMENSION_TYPE as DT
 from cr.cube.util import lazyproperty
 
 
@@ -261,7 +261,7 @@ class Dimension(object):
     def __init__(self, dimension_dict, dimension_type, dimension_transforms=None):
         self._dimension_dict = dimension_dict
         self._dimension_type = dimension_type
-        self._dimension_transforms_arg = dimension_transforms
+        self._dimension_transforms_dict = dimension_transforms or {}
 
     @lazyproperty
     def alias(self):
@@ -293,6 +293,14 @@ def apply_transforms(self, dimension_transforms):
             self._dimension_dict, self._dimension_type, dimension_transforms
         )
 
+    @lazyproperty
+    def collation_method(self):
+        """Member of COLLATION_METHOD specifying ordering of dimension elements."""
+        method_keyword = self.order_dict.get("type")
+        if method_keyword is None:
+            return CM.PAYLOAD_ORDER
+        return CM(method_keyword)
+
     @lazyproperty
     def description(self):
         """str description of this dimension."""
@@ -314,29 +322,12 @@ def dimension_type(self):
         return self._dimension_type
 
     @lazyproperty
-    def display_order(self):
-        """Sequence of int element indices specifying display order of elements.
-
-        The sequence includes only valid elements; missing elements do not appear.
-        Further, each index represents the document-order position of the element in the
-        sequence of valid elements; missing elements are skipped in the assignment of
-        indexes. The returned sequence is exhaustive; all valid elements are
-        represented.
-
-        The sequence reflects the resolved cascade of any *explicit* ordering
-        transforms, but does *not* reflect any *sort* transforms, which cannot be
-        resolved by the dimension. Use the `.sort` property to access any sort transform
-        that may apply.
-
-        Example with explicit-order transform:
-
-            (3, 0, 2, 1, 4)
-
-        Example with no explicit-order transform:
+    def element_ids(self):
+        """tuple of int element-id for each valid element in this dimension.
 
-            (0, 1, 2, 3, 4)
+        Element-ids appear in the order defined in the cube-result.
         """
-        return self.valid_elements.display_order
+        return tuple(e.element_id for e in self.valid_elements)
 
     @lazyproperty
     def name(self):
@@ -373,6 +364,14 @@ def numeric_values(self):
         """
         return tuple(element.numeric_value for element in self.valid_elements)
 
+    @lazyproperty
+    def order_dict(self):
+        """dict "order": field in dimensions.transforms parsed from JSON payload.
+
+        Value is `{}` if no "order": field is present.
+        """
+        return self._dimension_transforms_dict.get("order", {})
+
     @lazyproperty
     def prune(self):
         """True if empty elements should be automatically hidden on this dimension."""
@@ -385,35 +384,24 @@ def prune(self):
     def shape(self):
         return len(self.all_elements)
 
-    @lazyproperty
-    def sort(self):
-        """A _BaseSort-subclass object or None, describing the applied sort method.
-
-        This value is None if no sort transform was specified for this dimension.
-        Currently that is its only possible value. The returned sort object describes
-        the sort method which can include sorting on the value of an opposing element or
-        on the margin and specify ascending or descending order.
-        """
-        return None  # pragma: no cover
-
     @lazyproperty
     def subtotals(self):
         """_Subtotals sequence object for this dimension.
 
         Each item in the sequence is a _Subtotal object specifying a subtotal, including
         its addends and anchor.
         """
-        # ---insertions in dimension-transforms override those on dimension itself---
-        insertion_dicts = self._dimension_transforms_dict.get("insertions")
-        if insertion_dicts is not None:
-            return _Subtotals(insertion_dicts, self.valid_elements, self.prune)
+        # --- insertions in dimension-transforms override those on dimension itself ---
+        if "insertions" in self._dimension_transforms_dict:
+            return _Subtotals(
+                self._dimension_transforms_dict["insertions"],
+                self.valid_elements,
+                self.prune,
+            )
 
-        # ---otherwise, insertions defined as default transforms apply---
-        view = self._dimension_dict.get("references", {}).get("view", {})
-        # ---view can be both None and {}, thus the edge case.---
-        insertion_dicts = (
-            [] if view is None else view.get("transform", {}).get("insertions", [])
-        )
+        # --- otherwise, insertions defined in cube as default transforms apply ---
+        view = self._dimension_dict.get("references", {}).get("view") or {}
+        insertion_dicts = view.get("transform", {}).get("insertions", [])
         return _Subtotals(insertion_dicts, self.valid_elements, self.prune)
 
     @lazyproperty
@@ -426,19 +414,6 @@ def valid_elements(self):
         """
         return self.all_elements.valid_elements
 
-    @lazyproperty
-    def _dimension_transforms_dict(self):
-        """dict complying with dimension-transforms schema for this dimension.
-
-        This value derives from the `dimension_transforms` argument passed on
-        construction. When that argument is not specified, this value is an empty dict.
-        """
-        return (
-            self._dimension_transforms_arg
-            if self._dimension_transforms_arg is not None
-            else {}
-        )
-
 
 class _BaseElements(Sequence):
     """Base class for element sequence containers."""
@@ -509,7 +484,7 @@ def __init__(self, type_dict, dimension_transforms_dict):
     @lazyproperty
     def valid_elements(self):
         """_ValidElements object containing only non-missing elements."""
-        return _ValidElements(self._elements, self._dimension_transforms_dict)
+        return _ValidElements(self._elements)
 
     @lazyproperty
     def _element_dicts(self):
@@ -570,72 +545,14 @@ class _ValidElements(_BaseElements):
     directly.
     """
 
-    def __init__(self, all_elements, dimension_transforms_dict):
+    def __init__(self, all_elements):
         self._all_elements = all_elements
-        self._dimension_transforms_dict = dimension_transforms_dict
-
-    @lazyproperty
-    def display_order(self):
-        """Sequence of int element-idx reflecting order in which to display elements.
-
-        This order reflects the application of any explicit element-order transforms,
-        including resolution of any cascade. It does *not* reflect the results of
-        a *sort* transform, which can only be resolved at a higher level, where vector
-        values are known.
-        """
-        return (
-            self._explicit_order
-            if self._explicit_order
-            else tuple(range(len(self._elements)))
-        )
 
     @lazyproperty
     def _elements(self):
-        """tuple containing actual sequence of element objects."""
+        """tuple containing valid (non-missing) element objects in payload order."""
         return tuple(element for element in self._all_elements if not element.missing)
 
-    @lazyproperty
-    def _explicit_order(self):
-        """Sequence of int element-idx or None, reflecting explicit-order transform.
-
-        This value is None if no explicit-order transform is specified. Otherwise, it is
-        an exhaustive collection of (valid) element offsets, in the order specified (and
-        in some cases implied) by the order transform.
-        """
-        # ---get order transform if any, aborting if no explicit order transform---
-        order_dict = self._dimension_transforms_dict.get("order", {})
-        order_type = order_dict.get("type")
-        ordered_element_ids = order_dict.get("element_ids")
-        if order_type != "explicit" or not isinstance(ordered_element_ids, list):
-            return None
-
-        # ---list like [0, 1, 2, -1], perhaps ["0001", "0002", etc.], reflecting element
-        # ---ids in the order they appear in the cube result. We'll use this to map
-        # ---element-id to its index in the valid-elements sequence.
-        cube_result_order = tuple(element.element_id for element in self)
-        # ---this is a copy of the same, but we're going to mutate this one. This is
-        # ---required to implement the "no-duplicates" behavior.
-        remaining_element_ids = list(cube_result_order)
-
-        # ---we'll collect the results in this---
-        ordered_idxs = []
-        # ---append idx of each element mentioned by id in transform, in order. Remove
-        # ---each element-id from remaining as we go to keep track of dups and leftovers
-        for element_id in ordered_element_ids:
-            # ---An element-id appearing in transform but not in dimension is ignored.
-            # ---Also, a duplicated element-id is only used on first encounter.
-            if element_id not in remaining_element_ids:
-                continue
-            ordered_idxs.append(cube_result_order.index(element_id))
-            remaining_element_ids.remove(element_id)
-
-        # ---any remaining elements are tacked onto the end of the list in the order
-        # ---they originally appeared in the cube-result.
-        for element_id in remaining_element_ids:
-            ordered_idxs.append(cube_result_order.index(element_id))
-
-        return tuple(ordered_idxs)
-
 
 class _Element(object):
     """A category or subvariable of a dimension.
@@ -854,11 +771,8 @@ def iter_for_anchor(self, anchor):
 
     @lazyproperty
     def _anchors(self):
-        """List of int or str indicating element under which to insert this subtotal."""
-        return list(
-            _Subtotal(subtotal_dict, self._valid_elements, self._prune).anchor
-            for subtotal_dict in self._iter_valid_subtotal_dicts()
-        )
+        """Sequence of int or str anchor for each subtotal."""
+        return tuple(s.anchor for s in self._subtotals)
 
     @lazyproperty
     def _element_ids(self):
@@ -896,18 +810,24 @@ def _iter_valid_subtotal_dicts(self):
     def _subtotals(self):
         """Composed tuple storing actual sequence of _Subtotal objects."""
         return tuple(
-            _Subtotal(subtotal_dict, self._valid_elements, self._prune)
-            for subtotal_dict in self._iter_valid_subtotal_dicts()
+            _Subtotal(subtotal_dict, self._valid_elements, self._prune, idx + 1)
+            for idx, subtotal_dict in enumerate(self._iter_valid_subtotal_dicts())
         )
 
 
 class _Subtotal(object):
-    """A subtotal insertion on a cube dimension."""
+    """A subtotal insertion on a cube dimension.
+
+    `fallback_insertion_id` is a fallback unique identifier for this insertion, until
+    real insertion-ids can be added. Its value is just the index+1 of this subtotal
+    within the insertions transform collection.
+    """
 
-    def __init__(self, subtotal_dict, valid_elements, prune):
+    def __init__(self, subtotal_dict, valid_elements, prune, fallback_insertion_id):
         self._subtotal_dict = subtotal_dict
         self._valid_elements = valid_elements
         self._prune = prune
+        self._fallback_insertion_id = fallback_insertion_id
 
     @lazyproperty
     def anchor(self):
@@ -966,6 +886,11 @@ def addend_idxs(self):
             dtype=int,
         )
 
+    @lazyproperty
+    def insertion_id(self):
+        """int unique identifier of this subtotal within this dimension's insertions."""
+        return self._subtotal_dict.get("insertion_id", self._fallback_insertion_id)
+
     @lazyproperty
     def label(self):
         """str display name for this subtotal, suitable for use as label."""

diff --git a/src/cr/cube/enums.py b/src/cr/cube/enums.py
@@ -2,6 +2,8 @@
 
 """Enumerated sets related to cubes."""
 
+from enum import Enum
+
 from cr.cube.util import lazyproperty
 
 
@@ -53,3 +55,13 @@ class DIMENSION_TYPE(object):
     ALLOWED_PAIRWISE_TYPES = frozenset(
         (BINNED_NUMERIC, CA, CAT, CA_CAT, DATETIME, MR, TEXT)
     )
+
+
+class COLLATION_METHOD(Enum):
+    """Enumerated values representing the methods of sorting dimension elements."""
+
+    EXPLICIT_ORDER = "explicit"
+    MARGINAL = "marginal"
+    OPPOSING_ELEMENT = "opposing_element"
+    OPPOSING_SUBTOTAL = "opposing_subtotal"
+    PAYLOAD_ORDER = "payload_order"