Skip to content

Commit

Permalink
Merge ebe0f2c into 9745bb1
Browse files Browse the repository at this point in the history
  • Loading branch information
scanny committed Nov 10, 2020
2 parents 9745bb1 + ebe0f2c commit 4fe80e8
Show file tree
Hide file tree
Showing 27 changed files with 2,912 additions and 699 deletions.
652 changes: 652 additions & 0 deletions src/cr/cube/collator.py

Large diffs are not rendered by default.

39 changes: 20 additions & 19 deletions src/cr/cube/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,26 @@ def population_fraction(self):
@lazyproperty
def _cubes(self):
"""Sequence of Cube objects containing data for this analysis."""
return tuple(self._iter_cubes())

def iter_cubes():
"""Generate a Cube object for each of cube_responses.
0D cube-responses and 1D second-and-later cubes are "inflated" to add their
missing row dimension.
"""
for idx, cube_response in enumerate(self._cube_responses):
cube = Cube(
cube_response,
cube_idx=idx if self._is_multi_cube else None,
transforms=self._transforms_dicts[idx],
population=self._population,
mask_size=self._min_base,
)
# --- numeric-mean cubes require inflation to restore their
# --- rows-dimension, others don't
yield cube.inflate() if self._is_numeric_mean else cube

return tuple(iter_cubes())

@lazyproperty
def _is_multi_cube(self):
Expand Down Expand Up @@ -166,24 +185,6 @@ def _is_numeric_mean(self):
# --- construction is low-overhead because all Cube properties are lazy.
return Cube(self._cube_responses[0]).ndim == 0

def _iter_cubes(self):
"""Generate a Cube object for each of cube_responses.
0D cube-responses and 1D second-and-later cubes are "inflated" to add their
missing row dimension.
"""
for idx, cube_response in enumerate(self._cube_responses):
cube = Cube(
cube_response,
cube_idx=idx if self._is_multi_cube else None,
transforms=self._transforms_dicts[idx],
population=self._population,
mask_size=self._min_base,
)
# --- all numeric-mean cubes require inflation to restore their
# --- rows-dimension, others don't
yield cube.inflate() if self._is_numeric_mean else cube


class Cube(object):
"""Provides access to individual slices on a cube-result.
Expand Down
16 changes: 8 additions & 8 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,18 +554,18 @@ def scale_mean_pairwise_indices_alt(self):

@lazyproperty
def scale_means_column(self):
"""1D float64 ndarray of column scale means
"""1D float64 ndarray of scale mean for each row (making a summary "column").
The calculation is based on multiply of the numeric values by the
row_proportions and divide by the rows_margin.
Each scale mean is based on the numeric values of the *columns-dimension*
elements.
"""
if np.all(np.isnan(self._columns_dimension_numeric_values)):
column_numeric_values = self._columns_dimension_numeric_values

if np.all(np.isnan(column_numeric_values)):
return None

inner = np.nansum(
self._columns_dimension_numeric_values * self.row_proportions, axis=1
)
not_a_nan_index = ~np.isnan(self._columns_dimension_numeric_values)
inner = np.nansum(column_numeric_values * self.row_proportions, axis=1)
not_a_nan_index = ~np.isnan(column_numeric_values)
denominator = np.sum(self.row_proportions[:, not_a_nan_index], axis=1)
return inner / denominator

Expand Down
179 changes: 52 additions & 127 deletions src/cr/cube/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import numpy as np

from cr.cube.enums import DIMENSION_TYPE as DT
from cr.cube.enums import COLLATION_METHOD as CM, DIMENSION_TYPE as DT
from cr.cube.util import lazyproperty


Expand Down Expand Up @@ -261,7 +261,7 @@ class Dimension(object):
def __init__(self, dimension_dict, dimension_type, dimension_transforms=None):
self._dimension_dict = dimension_dict
self._dimension_type = dimension_type
self._dimension_transforms_arg = dimension_transforms
self._dimension_transforms_dict = dimension_transforms or {}

@lazyproperty
def alias(self):
Expand Down Expand Up @@ -293,6 +293,14 @@ def apply_transforms(self, dimension_transforms):
self._dimension_dict, self._dimension_type, dimension_transforms
)

@lazyproperty
def collation_method(self):
"""Member of COLLATION_METHOD specifying ordering of dimension elements."""
method_keyword = self.order_dict.get("type")
if method_keyword is None:
return CM.PAYLOAD_ORDER
return CM(method_keyword)

@lazyproperty
def description(self):
"""str description of this dimension."""
Expand All @@ -314,29 +322,12 @@ def dimension_type(self):
return self._dimension_type

@lazyproperty
def display_order(self):
"""Sequence of int element indices specifying display order of elements.
The sequence includes only valid elements; missing elements do not appear.
Further, each index represents the document-order position of the element in the
sequence of valid elements; missing elements are skipped in the assignment of
indexes. The returned sequence is exhaustive; all valid elements are
represented.
The sequence reflects the resolved cascade of any *explicit* ordering
transforms, but does *not* reflect any *sort* transforms, which cannot be
resolved by the dimension. Use the `.sort` property to access any sort transform
that may apply.
Example with explicit-order transform:
(3, 0, 2, 1, 4)
Example with no explicit-order transform:
def element_ids(self):
"""tuple of int element-id for each valid element in this dimension.
(0, 1, 2, 3, 4)
Element-ids appear in the order defined in the cube-result.
"""
return self.valid_elements.display_order
return tuple(e.element_id for e in self.valid_elements)

@lazyproperty
def name(self):
Expand Down Expand Up @@ -373,6 +364,14 @@ def numeric_values(self):
"""
return tuple(element.numeric_value for element in self.valid_elements)

@lazyproperty
def order_dict(self):
"""dict "order": field in dimensions.transforms parsed from JSON payload.
Value is `{}` if no "order": field is present.
"""
return self._dimension_transforms_dict.get("order", {})

@lazyproperty
def prune(self):
"""True if empty elements should be automatically hidden on this dimension."""
Expand All @@ -385,35 +384,24 @@ def prune(self):
def shape(self):
return len(self.all_elements)

@lazyproperty
def sort(self):
"""A _BaseSort-subclass object or None, describing the applied sort method.
This value is None if no sort transform was specified for this dimension.
Currently that is its only possible value. The returned sort object describes
the sort method which can include sorting on the value of an opposing element or
on the margin and specify ascending or descending order.
"""
return None # pragma: no cover

@lazyproperty
def subtotals(self):
"""_Subtotals sequence object for this dimension.
Each item in the sequence is a _Subtotal object specifying a subtotal, including
its addends and anchor.
"""
# ---insertions in dimension-transforms override those on dimension itself---
insertion_dicts = self._dimension_transforms_dict.get("insertions")
if insertion_dicts is not None:
return _Subtotals(insertion_dicts, self.valid_elements, self.prune)
# --- insertions in dimension-transforms override those on dimension itself ---
if "insertions" in self._dimension_transforms_dict:
return _Subtotals(
self._dimension_transforms_dict["insertions"],
self.valid_elements,
self.prune,
)

# ---otherwise, insertions defined as default transforms apply---
view = self._dimension_dict.get("references", {}).get("view", {})
# ---view can be both None and {}, thus the edge case.---
insertion_dicts = (
[] if view is None else view.get("transform", {}).get("insertions", [])
)
# --- otherwise, insertions defined in cube as default transforms apply ---
view = self._dimension_dict.get("references", {}).get("view") or {}
insertion_dicts = view.get("transform", {}).get("insertions", [])
return _Subtotals(insertion_dicts, self.valid_elements, self.prune)

@lazyproperty
Expand All @@ -426,19 +414,6 @@ def valid_elements(self):
"""
return self.all_elements.valid_elements

@lazyproperty
def _dimension_transforms_dict(self):
"""dict complying with dimension-transforms schema for this dimension.
This value derives from the `dimension_transforms` argument passed on
construction. When that argument is not specified, this value is an empty dict.
"""
return (
self._dimension_transforms_arg
if self._dimension_transforms_arg is not None
else {}
)


class _BaseElements(Sequence):
"""Base class for element sequence containers."""
Expand Down Expand Up @@ -509,7 +484,7 @@ def __init__(self, type_dict, dimension_transforms_dict):
@lazyproperty
def valid_elements(self):
"""_ValidElements object containing only non-missing elements."""
return _ValidElements(self._elements, self._dimension_transforms_dict)
return _ValidElements(self._elements)

@lazyproperty
def _element_dicts(self):
Expand Down Expand Up @@ -570,72 +545,14 @@ class _ValidElements(_BaseElements):
directly.
"""

def __init__(self, all_elements, dimension_transforms_dict):
def __init__(self, all_elements):
self._all_elements = all_elements
self._dimension_transforms_dict = dimension_transforms_dict

@lazyproperty
def display_order(self):
"""Sequence of int element-idx reflecting order in which to display elements.
This order reflects the application of any explicit element-order transforms,
including resolution of any cascade. It does *not* reflect the results of
a *sort* transform, which can only be resolved at a higher level, where vector
values are known.
"""
return (
self._explicit_order
if self._explicit_order
else tuple(range(len(self._elements)))
)

@lazyproperty
def _elements(self):
"""tuple containing actual sequence of element objects."""
"""tuple containing valid (non-missing) element objects in payload order."""
return tuple(element for element in self._all_elements if not element.missing)

@lazyproperty
def _explicit_order(self):
"""Sequence of int element-idx or None, reflecting explicit-order transform.
This value is None if no explicit-order transform is specified. Otherwise, it is
an exhaustive collection of (valid) element offsets, in the order specified (and
in some cases implied) by the order transform.
"""
# ---get order transform if any, aborting if no explicit order transform---
order_dict = self._dimension_transforms_dict.get("order", {})
order_type = order_dict.get("type")
ordered_element_ids = order_dict.get("element_ids")
if order_type != "explicit" or not isinstance(ordered_element_ids, list):
return None

# ---list like [0, 1, 2, -1], perhaps ["0001", "0002", etc.], reflecting element
# ---ids in the order they appear in the cube result. We'll use this to map
# ---element-id to its index in the valid-elements sequence.
cube_result_order = tuple(element.element_id for element in self)
# ---this is a copy of the same, but we're going to mutate this one. This is
# ---required to implement the "no-duplicates" behavior.
remaining_element_ids = list(cube_result_order)

# ---we'll collect the results in this---
ordered_idxs = []
# ---append idx of each element mentioned by id in transform, in order. Remove
# ---each element-id from remaining as we go to keep track of dups and leftovers
for element_id in ordered_element_ids:
# ---An element-id appearing in transform but not in dimension is ignored.
# ---Also, a duplicated element-id is only used on first encounter.
if element_id not in remaining_element_ids:
continue
ordered_idxs.append(cube_result_order.index(element_id))
remaining_element_ids.remove(element_id)

# ---any remaining elements are tacked onto the end of the list in the order
# ---they originally appeared in the cube-result.
for element_id in remaining_element_ids:
ordered_idxs.append(cube_result_order.index(element_id))

return tuple(ordered_idxs)


class _Element(object):
"""A category or subvariable of a dimension.
Expand Down Expand Up @@ -854,11 +771,8 @@ def iter_for_anchor(self, anchor):

@lazyproperty
def _anchors(self):
"""List of int or str indicating element under which to insert this subtotal."""
return list(
_Subtotal(subtotal_dict, self._valid_elements, self._prune).anchor
for subtotal_dict in self._iter_valid_subtotal_dicts()
)
"""Sequence of int or str anchor for each subtotal."""
return tuple(s.anchor for s in self._subtotals)

@lazyproperty
def _element_ids(self):
Expand Down Expand Up @@ -896,18 +810,24 @@ def _iter_valid_subtotal_dicts(self):
def _subtotals(self):
"""Composed tuple storing actual sequence of _Subtotal objects."""
return tuple(
_Subtotal(subtotal_dict, self._valid_elements, self._prune)
for subtotal_dict in self._iter_valid_subtotal_dicts()
_Subtotal(subtotal_dict, self._valid_elements, self._prune, idx + 1)
for idx, subtotal_dict in enumerate(self._iter_valid_subtotal_dicts())
)


class _Subtotal(object):
"""A subtotal insertion on a cube dimension."""
"""A subtotal insertion on a cube dimension.
`fallback_insertion_id` is a fallback unique identifier for this insertion, until
real insertion-ids can be added. Its value is just the index+1 of this subtotal
within the insertions transform collection.
"""

def __init__(self, subtotal_dict, valid_elements, prune):
def __init__(self, subtotal_dict, valid_elements, prune, fallback_insertion_id):
self._subtotal_dict = subtotal_dict
self._valid_elements = valid_elements
self._prune = prune
self._fallback_insertion_id = fallback_insertion_id

@lazyproperty
def anchor(self):
Expand Down Expand Up @@ -966,6 +886,11 @@ def addend_idxs(self):
dtype=int,
)

@lazyproperty
def insertion_id(self):
"""int unique identifier of this subtotal within this dimension's insertions."""
return self._subtotal_dict.get("insertion_id", self._fallback_insertion_id)

@lazyproperty
def label(self):
"""str display name for this subtotal, suitable for use as label."""
Expand Down
12 changes: 12 additions & 0 deletions src/cr/cube/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

"""Enumerated sets related to cubes."""

from enum import Enum

from cr.cube.util import lazyproperty


Expand Down Expand Up @@ -53,3 +55,13 @@ class DIMENSION_TYPE(object):
ALLOWED_PAIRWISE_TYPES = frozenset(
(BINNED_NUMERIC, CA, CAT, CA_CAT, DATETIME, MR, TEXT)
)


class COLLATION_METHOD(Enum):
"""Enumerated values representing the methods of sorting dimension elements."""

EXPLICIT_ORDER = "explicit"
MARGINAL = "marginal"
OPPOSING_ELEMENT = "opposing_element"
OPPOSING_SUBTOTAL = "opposing_subtotal"
PAYLOAD_ORDER = "payload_order"
Loading

0 comments on commit 4fe80e8

Please sign in to comment.