Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decouple pruning and missingness #147

Merged
merged 2 commits into from
Apr 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 23 additions & 25 deletions src/cr/cube/crunch_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,8 @@ def margin(
# dividing. Those across dims which are summed across MUST NOT be
# included, because they would change the result.
hs_dims = self._hs_dims_for_den(include_transforms_for_dims, axis)
den = self._apply_missings_and_insertions(
table, hs_dims, include_missing=include_missing
den = self._apply_subtotals(
self._apply_missings(table, include_missing=include_missing), hs_dims
)

# Apply correct mask (based on the as_array shape)
Expand Down Expand Up @@ -593,7 +593,9 @@ def proportions(

# Calculate numerator from table (include all H&S dimensions).
table = self._measure(weighted).raw_cube_array
num = self._apply_missings_and_insertions(table, include_transforms_for_dims)
num = self._apply_subtotals(
self._apply_missings(table), include_transforms_for_dims
)

proportions = num / self._denominator(
weighted, include_transforms_for_dims, axis
Expand Down Expand Up @@ -624,7 +626,7 @@ def _denominator(self, weighted, include_transforms_for_dims, axis):
None if i in new_axis else slice(None) for i, _ in enumerate(table.shape)
)
hs_dims = self._hs_dims_for_den(include_transforms_for_dims, axis)
den = self._apply_missings_and_insertions(table, hs_dims)
den = self._apply_subtotals(self._apply_missings(table), hs_dims)
return np.sum(den, axis=new_axis)[index]

def pvals(self, weighted=True, prune=False, hs_dims=None):
Expand Down Expand Up @@ -820,9 +822,7 @@ def _all_dimensions(self):
"""
return AllDimensions(dimension_dicts=self._cube_dict["result"]["dimensions"])

def _apply_missings_and_insertions(
self, res, include_transforms_for_dims, include_missing=False
):
def _apply_missings(self, res, include_missing=False):
"""Return ndarray with missing and insertions as specified.

The return value is the result of the following operations on *res*,
Expand All @@ -832,9 +832,6 @@ def _apply_missings_and_insertions(
* Remove vectors (rows/cols) for missing elements if *include_missin*
is False.

* Insert subtotals (and perhaps other insertions later) for
dimensions having their apparent dimension-idx in
*include_transforms_for_dims*.

Note that it does *not* include pruning.
"""
Expand All @@ -848,12 +845,17 @@ def _apply_missings_and_insertions(
)
for d in self._all_dimensions
)
return res[np.ix_(*element_idxs)] if element_idxs else res

def _apply_subtotals(self, res, include_transforms_for_dims):
"""* Insert subtotals (and perhaps other insertions later) for
dimensions having their apparent dimension-idx in
*include_transforms_for_dims*.
"""
if not include_transforms_for_dims:
return res[np.ix_(*element_idxs)] if element_idxs else res
return res

# ---insert subtotals---
suppressed_dim_count = 0
new_valids = [i for i in element_idxs]
for (dim_idx, dim) in enumerate(self._all_dimensions):
if dim.dimension_type == DT.MR_CAT:
suppressed_dim_count += 1
Expand All @@ -868,9 +870,9 @@ def _apply_missings_and_insertions(
continue
# ---insert subtotals into result array---
insertions = self._insertions(res, dim, dim_idx)
res, new_valids = self._update_result(res, insertions, dim_idx, new_valids)
res = self._update_result(res, insertions, dim_idx)

return res[np.ix_(*new_valids)] if new_valids else res
return res

def _as_array(
self,
Expand All @@ -890,10 +892,11 @@ def _as_array(
Returns
res (ndarray): Tabular representation of crunch cube
"""
return self._apply_missings_and_insertions(
self._measure(weighted).raw_cube_array,
return self._apply_subtotals(
self._apply_missings(
self._measure(weighted).raw_cube_array, include_missing=include_missing
),
include_transforms_for_dims,
include_missing=include_missing,
)

@classmethod
Expand Down Expand Up @@ -1337,18 +1340,13 @@ def _pruning_base(self, axis=None, hs_dims=None):
axis=axis, weighted=False, include_transforms_for_dims=hs_dims
)

def _update_result(self, result, insertions, dimension_index, valid_indices):
def _update_result(self, result, insertions, dimension_index):
"""Insert subtotals into resulting ndarray."""
# TODO: valid_indices should be a tuple as a parameter and as a return
# value
for j, (ind_insertion, value) in enumerate(insertions):
result = np.insert(
result, ind_insertion + j + 1, value, axis=dimension_index
)
valid_indices = valid_indices and self._fix_valid_indices(
valid_indices, ind_insertion + j, dimension_index
)
return result, valid_indices
return result


class _Measures(object):
Expand Down
18 changes: 12 additions & 6 deletions src/cr/cube/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def _elements(self):
"""Composed tuple storing actual sequence of element objects."""
ElementCls, element_dicts = self._element_makings
return tuple(
ElementCls(element_dict, idx)
ElementCls(element_dict, idx, element_dicts)
for idx, element_dict in enumerate(element_dicts)
)

Expand All @@ -551,9 +551,10 @@ def _elements(self):
class _BaseElement(object):
"""Base class for element objects."""

def __init__(self, element_dict, index):
def __init__(self, element_dict, index, element_dicts):
self._element_dict = element_dict
self._index = index
self._element_dicts = element_dicts

@lazyproperty
def element_id(self):
Expand All @@ -569,6 +570,11 @@ def index(self):
"""
return self._index

@lazyproperty
def index_in_valids(self):
valid_ids = [el["id"] for el in self._element_dicts if not el.get("missing")]
return valid_ids.index(self.element_id)

@property
def is_insertion(self):
"""True if this item represents an insertion (e.g. subtotal).
Expand Down Expand Up @@ -596,8 +602,8 @@ def numeric_value(self):
class _Category(_BaseElement):
"""A category on a categorical dimension."""

def __init__(self, category_dict, index):
super(_Category, self).__init__(category_dict, index)
def __init__(self, category_dict, index, element_dicts):
super(_Category, self).__init__(category_dict, index, element_dicts)
self._category_dict = category_dict

@lazyproperty
Expand Down Expand Up @@ -741,7 +747,7 @@ def anchor_idx(self):
anchor = self.anchor
if anchor in ["top", "bottom"]:
return anchor
return self.valid_elements.get_by_id(anchor).index
return self.valid_elements.get_by_id(anchor).index_in_valids

@lazyproperty
def addend_ids(self):
Expand All @@ -765,7 +771,7 @@ def addend_idxs(self):
rather than its element id.
"""
return tuple(
self.valid_elements.get_by_id(addend_id).index
self.valid_elements.get_by_id(addend_id).index_in_valids
for addend_id in self.addend_ids
)

Expand Down
23 changes: 12 additions & 11 deletions tests/unit/test_dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,9 +760,9 @@ def it_creates_its_Element_objects_in_its_local_factory(
elements = all_elements._elements

assert _BaseElement_.call_args_list == [
call({"element": "dict-A"}, 0),
call({"element": "dict-B"}, 1),
call({"element": "dict-C"}, 2),
call({"element": "dict-A"}, 0, element_dicts_),
call({"element": "dict-B"}, 1, element_dicts_),
call({"element": "dict-C"}, 2, element_dicts_),
]
assert elements == (elements_[0], elements_[1], elements_[2])

Expand Down Expand Up @@ -816,20 +816,20 @@ def all_elements_(self, request):
class Describe_BaseElement(object):
def it_knows_its_element_id(self):
element_dict = {"id": 42}
element = _BaseElement(element_dict, None)
element = _BaseElement(element_dict, None, element_dict)

element_id = element.element_id

assert element_id == 42

def it_knows_its_position_among_all_the_dimension_elements(self):
element = _BaseElement(None, 17)
element = _BaseElement(None, 17, {})
index = element.index
assert index == 17

def it_knows_whether_its_missing_or_valid(self, missing_fixture):
element_dict, expected_value = missing_fixture
element = _BaseElement(element_dict, None)
element = _BaseElement(element_dict, None, None)

missing = element.missing

Expand All @@ -838,7 +838,7 @@ def it_knows_whether_its_missing_or_valid(self, missing_fixture):

def it_knows_its_numeric_value(self, numeric_value_fixture):
element_dict, expected_value = numeric_value_fixture
element = _BaseElement(element_dict, None)
element = _BaseElement(element_dict, None, None)

numeric_value = element.numeric_value

Expand Down Expand Up @@ -884,7 +884,7 @@ def numeric_value_fixture(self, request):
class Describe_Category(object):
def it_knows_its_label(self, label_fixture):
category_dict, expected_value = label_fixture
category = _Category(category_dict, None)
category = _Category(category_dict, None, None)

label = category.label

Expand All @@ -909,7 +909,7 @@ def label_fixture(self, request):
class Describe_Element(object):
def it_knows_its_label(self, label_fixture):
element_dict, expected_value = label_fixture
element = _Element(element_dict, None)
element = _Element(element_dict, None, None)

label = element.label

Expand Down Expand Up @@ -1071,7 +1071,7 @@ def it_knows_the_index_of_the_anchor_element(
anchor, index, calls, expected_value = anchor_idx_fixture
anchor_prop_.return_value = anchor
valid_elements_.get_by_id.return_value = element_
element_.index = index
element_.index_in_valids = index
subtotal = _Subtotal(None, valid_elements_)

anchor_idx = subtotal.anchor_idx
Expand All @@ -1095,7 +1095,8 @@ def it_provides_access_to_the_addend_element_indices(
):
addend_ids_prop_.return_value = (3, 6, 9)
valid_elements_.get_by_id.side_effect = iter(
instance_mock(request, _BaseElement, index=index) for index in (2, 4, 6)
instance_mock(request, _BaseElement, index_in_valids=index)
for index in (2, 4, 6)
)
subtotal = _Subtotal(None, valid_elements_)

Expand Down