Skip to content

Commit

Permalink
spike: sort-by-value
Browse files Browse the repository at this point in the history
OPPOSING_ELEMENT doesn't handle HS yet, need to add some tests for that.
  • Loading branch information
scanny committed Nov 10, 2020
1 parent 410bdc6 commit ebe0f2c
Show file tree
Hide file tree
Showing 20 changed files with 718 additions and 579 deletions.
41 changes: 34 additions & 7 deletions src/cr/cube/collator.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class _BaseAnchoredCollator(_BaseCollator):

@classmethod
def display_order(cls, dimension):
""" -> sequence of int element-idx specifying ordering of dimension elements.
"""Return sequence of int element-idx specifying ordering of dimension elements.
The returned indices are "signed", with positive indices applying to base
vectors and negative indices applying to inserted vectors. Both work for
Expand Down Expand Up @@ -289,8 +289,7 @@ class _BaseSortByValueCollator(_BaseCollator):
In general, the anchors used to position inserted subtotals lose their meaning when
the dimension is sorted by-value. In sort-by-value cases, subtotals are grouped at
the top (when sort direction is descending (default)) or the bottom (when direction
is ascending), while also being sorted within the group of subtotal by the specified
value.
is ascending), while also being sorted by the specified value.
"""

@property
Expand Down Expand Up @@ -468,7 +467,7 @@ def __init__(self, dimension, vectors, inserted_vectors):

@classmethod
def display_order(cls, dimension, vectors, inserted_vectors):
""" -> sequence of int element-idx, reflecting sort-by-marginal transform.
"""Return sequence of int element-idx, reflecting sort-by-marginal transform.
This value is an exhaustive collection of (valid) element offsets, sorted by the
value of their margin value.
Expand Down Expand Up @@ -530,9 +529,23 @@ def __init__(self, dimension, opposing_vectors):

@classmethod
def display_order(cls, dimension, opposing_vectors):
""" -> sequence of int element-idx specifying ordering of dimension elements."""
"""Return sequence of int element-idx in opposing element order."""
return cls(dimension, opposing_vectors)._display_order

@property
def _display_order(self):
"""tuple of int element-idx specifying ordering of dimension elements."""
# --- The opposing vector that provides the basis for the sort is specified by
# --- its id. Because the ordering-transform is part of a saved analysis, it's
# --- possible for the specified sort-key category or subvar to be deleted
# --- between authoring time and export time (which could be weeks or more
# --- later). If the sort-key element is not found in the opposing dimension,
# --- fall back to payload order.
try:
return super(OpposingElementCollator, self)._display_order
except ValueError:
return PayloadOrderCollator.display_order(self._dimension)

@lazyproperty
def _element_values(self):
"""tuple of meaure values in the specified opposing vector, in payload order.
Expand Down Expand Up @@ -579,19 +592,33 @@ def __init__(self, dimension, opposing_inserted_vectors):

@classmethod
def display_order(cls, dimension, opposing_inserted_vectors):
""" -> sequence of int element-idx specifying ordering of dimension elements.
"""Return sequence of int element-idx specifying ordering of dimension elements.
The returned indices are "signed", with positive indices applying to base
vectors and negative indices applying to inserted vectors. Both work for
indexing in their respective unordered collections.
"""
return cls(dimension, opposing_inserted_vectors)._display_order

@property
def _display_order(self):
"""tuple of int element-idx specifying ordering of dimension elements."""
# --- The opposing subtotal that provides the basis for the sort is specified by
# --- its id. Because the ordering-transform is part of a saved analysis, it's
# --- possible for the specified sort-key subtotal to be deleted between
# --- authoring time and export time (which could be weeks or more later). If
# --- the sort-key element is not found in the opposing dimension, fall back to
# --- payload order.
try:
return super(OpposingSubtotalCollator, self)._display_order
except ValueError:
return PayloadOrderCollator.display_order(self._dimension)

@lazyproperty
def _element_values(self):
"""tuple of measure values in the specified opposing subtotal vector.
Values appear in payload-order without interleaved subtotals. The payload-order
Values appear in payload order without interleaved subtotals. The payload-order
index of each value can be inferred from its position in this sequence. Can
possibly include NaN values.
"""
Expand Down
108 changes: 19 additions & 89 deletions src/cr/cube/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import numpy as np

from cr.cube.enums import DIMENSION_TYPE as DT
from cr.cube.enums import COLLATION_METHOD as CM, DIMENSION_TYPE as DT
from cr.cube.util import lazyproperty


Expand Down Expand Up @@ -293,6 +293,14 @@ def apply_transforms(self, dimension_transforms):
self._dimension_dict, self._dimension_type, dimension_transforms
)

@lazyproperty
def collation_method(self):
"""Member of COLLATION_METHOD specifying ordering of dimension elements."""
method_keyword = self.order_dict.get("type")
if method_keyword is None:
return CM.PAYLOAD_ORDER
return CM(method_keyword)

@lazyproperty
def description(self):
"""str description of this dimension."""
Expand All @@ -313,38 +321,13 @@ def dimension_type(self):
"""Member of DIMENSION_TYPE appropriate to this cube dimension."""
return self._dimension_type

@lazyproperty
def display_order(self):
"""Sequence of int element indices specifying display order of elements.
The sequence includes only valid elements; missing elements do not appear.
Further, each index represents the document-order position of the element in the
sequence of valid elements; missing elements are skipped in the assignment of
indexes. The returned sequence is exhaustive; all valid elements are
represented.
The sequence reflects the resolved cascade of any *explicit* ordering
transforms, but does *not* reflect any *sort* transforms, which cannot be
resolved by the dimension. Use the `.sort` property to access any sort transform
that may apply.
Example with explicit-order transform:
(3, 0, 2, 1, 4)
Example with no explicit-order transform:
(0, 1, 2, 3, 4)
"""
return self.valid_elements.display_order

@lazyproperty
def element_ids(self):
"""tuple of int element-id for each valid element in this dimension.
Element-ids appear in the order defined in the cube-result.
"""
raise NotImplementedError
return tuple(e.element_id for e in self.valid_elements)

@lazyproperty
def name(self):
Expand Down Expand Up @@ -501,7 +484,7 @@ def __init__(self, type_dict, dimension_transforms_dict):
@lazyproperty
def valid_elements(self):
"""_ValidElements object containing only non-missing elements."""
return _ValidElements(self._elements, self._dimension_transforms_dict)
return _ValidElements(self._elements)

@lazyproperty
def _element_dicts(self):
Expand Down Expand Up @@ -562,72 +545,14 @@ class _ValidElements(_BaseElements):
directly.
"""

def __init__(self, all_elements, dimension_transforms_dict):
def __init__(self, all_elements):
self._all_elements = all_elements
self._dimension_transforms_dict = dimension_transforms_dict

@lazyproperty
def display_order(self):
"""Sequence of int element-idx reflecting order in which to display elements.
This order reflects the application of any explicit element-order transforms,
including resolution of any cascade. It does *not* reflect the results of
a *sort* transform, which can only be resolved at a higher level, where vector
values are known.
"""
return (
self._explicit_order
if self._explicit_order
else tuple(range(len(self._elements)))
)

@lazyproperty
def _elements(self):
"""tuple containing actual sequence of element objects."""
"""tuple containing valid (non-missing) element objects in payload order."""
return tuple(element for element in self._all_elements if not element.missing)

@lazyproperty
def _explicit_order(self):
"""Sequence of int element-idx or None, reflecting explicit-order transform.
This value is None if no explicit-order transform is specified. Otherwise, it is
an exhaustive collection of (valid) element offsets, in the order specified (and
in some cases implied) by the order transform.
"""
# ---get order transform if any, aborting if no explicit order transform---
order_dict = self._dimension_transforms_dict.get("order", {})
order_type = order_dict.get("type")
ordered_element_ids = order_dict.get("element_ids")
if order_type != "explicit" or not isinstance(ordered_element_ids, list):
return None

# ---list like [0, 1, 2, -1], perhaps ["0001", "0002", etc.], reflecting element
# ---ids in the order they appear in the cube result. We'll use this to map
# ---element-id to its index in the valid-elements sequence.
cube_result_order = tuple(element.element_id for element in self)
# ---this is a copy of the same, but we're going to mutate this one. This is
# ---required to implement the "no-duplicates" behavior.
remaining_element_ids = list(cube_result_order)

# ---we'll collect the results in this---
ordered_idxs = []
# ---append idx of each element mentioned by id in transform, in order. Remove
# ---each element-id from remaining as we go to keep track of dups and leftovers
for element_id in ordered_element_ids:
# ---An element-id appearing in transform but not in dimension is ignored.
# ---Also, a duplicated element-id is only used on first encounter.
if element_id not in remaining_element_ids:
continue
ordered_idxs.append(cube_result_order.index(element_id))
remaining_element_ids.remove(element_id)

# ---any remaining elements are tacked onto the end of the list in the order
# ---they originally appeared in the cube-result.
for element_id in remaining_element_ids:
ordered_idxs.append(cube_result_order.index(element_id))

return tuple(ordered_idxs)


class _Element(object):
"""A category or subvariable of a dimension.
Expand Down Expand Up @@ -891,7 +816,12 @@ def _subtotals(self):


class _Subtotal(object):
"""A subtotal insertion on a cube dimension."""
"""A subtotal insertion on a cube dimension.
`fallback_insertion_id` is a fallback unique identifier for this insertion, until
real insertion-ids can be added. Its value is just the index+1 of this subtotal
within the insertions transform collection.
"""

def __init__(self, subtotal_dict, valid_elements, prune, fallback_insertion_id):
self._subtotal_dict = subtotal_dict
Expand Down
12 changes: 12 additions & 0 deletions src/cr/cube/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

"""Enumerated sets related to cubes."""

from enum import Enum

from cr.cube.util import lazyproperty


Expand Down Expand Up @@ -53,3 +55,13 @@ class DIMENSION_TYPE(object):
ALLOWED_PAIRWISE_TYPES = frozenset(
(BINNED_NUMERIC, CA, CAT, CA_CAT, DATETIME, MR, TEXT)
)


class COLLATION_METHOD(Enum):
"""Enumerated values representing the methods of sorting dimension elements."""

EXPLICIT_ORDER = "explicit"
MARGINAL = "marginal"
OPPOSING_ELEMENT = "opposing_element"
OPPOSING_SUBTOTAL = "opposing_subtotal"
PAYLOAD_ORDER = "payload_order"
Loading

0 comments on commit ebe0f2c

Please sign in to comment.