diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index d4c61041e..00ebd2aed 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -325,8 +325,8 @@ def margin( # dividing. Those across dims which are summed across MUST NOT be # included, because they would change the result. hs_dims = self._hs_dims_for_den(include_transforms_for_dims, axis) - den = self._apply_missings_and_insertions( - table, hs_dims, include_missing=include_missing + den = self._apply_subtotals( + self._apply_missings(table, include_missing=include_missing), hs_dims ) # Apply correct mask (based on the as_array shape) @@ -593,7 +593,9 @@ def proportions( # Calculate numerator from table (include all H&S dimensions). table = self._measure(weighted).raw_cube_array - num = self._apply_missings_and_insertions(table, include_transforms_for_dims) + num = self._apply_subtotals( + self._apply_missings(table), include_transforms_for_dims + ) proportions = num / self._denominator( weighted, include_transforms_for_dims, axis @@ -624,7 +626,7 @@ def _denominator(self, weighted, include_transforms_for_dims, axis): None if i in new_axis else slice(None) for i, _ in enumerate(table.shape) ) hs_dims = self._hs_dims_for_den(include_transforms_for_dims, axis) - den = self._apply_missings_and_insertions(table, hs_dims) + den = self._apply_subtotals(self._apply_missings(table), hs_dims) return np.sum(den, axis=new_axis)[index] def pvals(self, weighted=True, prune=False, hs_dims=None): @@ -820,9 +822,7 @@ def _all_dimensions(self): """ return AllDimensions(dimension_dicts=self._cube_dict["result"]["dimensions"]) - def _apply_missings_and_insertions( - self, res, include_transforms_for_dims, include_missing=False - ): + def _apply_missings(self, res, include_missing=False): """Return ndarray with missing and insertions as specified. The return value is the result of the following operations on *res*, @@ -832,9 +832,6 @@ def _apply_missings_and_insertions( * Remove vectors (rows/cols) for missing elements if *include_missin* is False. - * Insert subtotals (and perhaps other insertions later) for - dimensions having their apparent dimension-idx in - *include_transforms_for_dims*. Note that it does *not* include pruning. """ @@ -848,12 +845,17 @@ def _apply_missings_and_insertions( ) for d in self._all_dimensions ) + return res[np.ix_(*element_idxs)] if element_idxs else res + + def _apply_subtotals(self, res, include_transforms_for_dims): + """* Insert subtotals (and perhaps other insertions later) for + dimensions having their apparent dimension-idx in + *include_transforms_for_dims*. + """ if not include_transforms_for_dims: - return res[np.ix_(*element_idxs)] if element_idxs else res + return res - # ---insert subtotals--- suppressed_dim_count = 0 - new_valids = [i for i in element_idxs] for (dim_idx, dim) in enumerate(self._all_dimensions): if dim.dimension_type == DT.MR_CAT: suppressed_dim_count += 1 @@ -868,9 +870,9 @@ def _apply_missings_and_insertions( continue # ---insert subtotals into result array--- insertions = self._insertions(res, dim, dim_idx) - res, new_valids = self._update_result(res, insertions, dim_idx, new_valids) + res = self._update_result(res, insertions, dim_idx) - return res[np.ix_(*new_valids)] if new_valids else res + return res def _as_array( self, @@ -890,10 +892,11 @@ def _as_array( Returns res (ndarray): Tabular representation of crunch cube """ - return self._apply_missings_and_insertions( - self._measure(weighted).raw_cube_array, + return self._apply_subtotals( + self._apply_missings( + self._measure(weighted).raw_cube_array, include_missing=include_missing + ), include_transforms_for_dims, - include_missing=include_missing, ) @classmethod @@ -1337,18 +1340,13 @@ def _pruning_base(self, axis=None, hs_dims=None): axis=axis, weighted=False, include_transforms_for_dims=hs_dims ) - def _update_result(self, result, insertions, dimension_index, valid_indices): + def _update_result(self, result, insertions, dimension_index): """Insert subtotals into resulting ndarray.""" - # TODO: valid_indices should be a tuple as a parameter and as a return - # value for j, (ind_insertion, value) in enumerate(insertions): result = np.insert( result, ind_insertion + j + 1, value, axis=dimension_index ) - valid_indices = valid_indices and self._fix_valid_indices( - valid_indices, ind_insertion + j, dimension_index - ) - return result, valid_indices + return result class _Measures(object): diff --git a/src/cr/cube/dimension.py b/src/cr/cube/dimension.py index 1a9d40ea1..fd9676899 100644 --- a/src/cr/cube/dimension.py +++ b/src/cr/cube/dimension.py @@ -525,7 +525,7 @@ def _elements(self): """Composed tuple storing actual sequence of element objects.""" ElementCls, element_dicts = self._element_makings return tuple( - ElementCls(element_dict, idx) + ElementCls(element_dict, idx, element_dicts) for idx, element_dict in enumerate(element_dicts) ) @@ -551,9 +551,10 @@ def _elements(self): class _BaseElement(object): """Base class for element objects.""" - def __init__(self, element_dict, index): + def __init__(self, element_dict, index, element_dicts): self._element_dict = element_dict self._index = index + self._element_dicts = element_dicts @lazyproperty def element_id(self): @@ -569,6 +570,11 @@ def index(self): """ return self._index + @lazyproperty + def index_in_valids(self): + valid_ids = [el["id"] for el in self._element_dicts if not el.get("missing")] + return valid_ids.index(self.element_id) + @property def is_insertion(self): """True if this item represents an insertion (e.g. subtotal). @@ -596,8 +602,8 @@ def numeric_value(self): class _Category(_BaseElement): """A category on a categorical dimension.""" - def __init__(self, category_dict, index): - super(_Category, self).__init__(category_dict, index) + def __init__(self, category_dict, index, element_dicts): + super(_Category, self).__init__(category_dict, index, element_dicts) self._category_dict = category_dict @lazyproperty @@ -741,7 +747,7 @@ def anchor_idx(self): anchor = self.anchor if anchor in ["top", "bottom"]: return anchor - return self.valid_elements.get_by_id(anchor).index + return self.valid_elements.get_by_id(anchor).index_in_valids @lazyproperty def addend_ids(self): @@ -765,7 +771,7 @@ def addend_idxs(self): rather than its element id. """ return tuple( - self.valid_elements.get_by_id(addend_id).index + self.valid_elements.get_by_id(addend_id).index_in_valids for addend_id in self.addend_ids ) diff --git a/tests/unit/test_dimension.py b/tests/unit/test_dimension.py index a17b9dee6..b970b22a0 100644 --- a/tests/unit/test_dimension.py +++ b/tests/unit/test_dimension.py @@ -760,9 +760,9 @@ def it_creates_its_Element_objects_in_its_local_factory( elements = all_elements._elements assert _BaseElement_.call_args_list == [ - call({"element": "dict-A"}, 0), - call({"element": "dict-B"}, 1), - call({"element": "dict-C"}, 2), + call({"element": "dict-A"}, 0, element_dicts_), + call({"element": "dict-B"}, 1, element_dicts_), + call({"element": "dict-C"}, 2, element_dicts_), ] assert elements == (elements_[0], elements_[1], elements_[2]) @@ -816,20 +816,20 @@ def all_elements_(self, request): class Describe_BaseElement(object): def it_knows_its_element_id(self): element_dict = {"id": 42} - element = _BaseElement(element_dict, None) + element = _BaseElement(element_dict, None, element_dict) element_id = element.element_id assert element_id == 42 def it_knows_its_position_among_all_the_dimension_elements(self): - element = _BaseElement(None, 17) + element = _BaseElement(None, 17, {}) index = element.index assert index == 17 def it_knows_whether_its_missing_or_valid(self, missing_fixture): element_dict, expected_value = missing_fixture - element = _BaseElement(element_dict, None) + element = _BaseElement(element_dict, None, None) missing = element.missing @@ -838,7 +838,7 @@ def it_knows_whether_its_missing_or_valid(self, missing_fixture): def it_knows_its_numeric_value(self, numeric_value_fixture): element_dict, expected_value = numeric_value_fixture - element = _BaseElement(element_dict, None) + element = _BaseElement(element_dict, None, None) numeric_value = element.numeric_value @@ -884,7 +884,7 @@ def numeric_value_fixture(self, request): class Describe_Category(object): def it_knows_its_label(self, label_fixture): category_dict, expected_value = label_fixture - category = _Category(category_dict, None) + category = _Category(category_dict, None, None) label = category.label @@ -909,7 +909,7 @@ def label_fixture(self, request): class Describe_Element(object): def it_knows_its_label(self, label_fixture): element_dict, expected_value = label_fixture - element = _Element(element_dict, None) + element = _Element(element_dict, None, None) label = element.label @@ -1071,7 +1071,7 @@ def it_knows_the_index_of_the_anchor_element( anchor, index, calls, expected_value = anchor_idx_fixture anchor_prop_.return_value = anchor valid_elements_.get_by_id.return_value = element_ - element_.index = index + element_.index_in_valids = index subtotal = _Subtotal(None, valid_elements_) anchor_idx = subtotal.anchor_idx @@ -1095,7 +1095,8 @@ def it_provides_access_to_the_addend_element_indices( ): addend_ids_prop_.return_value = (3, 6, 9) valid_elements_.get_by_id.side_effect = iter( - instance_mock(request, _BaseElement, index=index) for index in (2, 4, 6) + instance_mock(request, _BaseElement, index_in_valids=index) + for index in (2, 4, 6) ) subtotal = _Subtotal(None, valid_elements_)