Skip to content

Commit

Permalink
Merge fd0a35a into c1c26cb
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Nov 29, 2018
2 parents c1c26cb + fd0a35a commit 2ef6f73
Show file tree
Hide file tree
Showing 17 changed files with 7,222 additions and 204 deletions.
146 changes: 63 additions & 83 deletions src/cr/cube/crunch_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,69 +314,18 @@ def margin(self, axis=None, weighted=True, include_missing=False,
])
"""

def hs_dims_for_den(hs_dims, axis):
if axis is None or hs_dims is None:
return None
if isinstance(axis, int):
axis = [axis]
return [dim for dim in hs_dims if dim not in axis]

table = self._counts(weighted).raw_cube_array
new_axis = self._adjust_axis(axis)
index = tuple(
None if i in new_axis else slice(None)
for i, _ in enumerate(table.shape)
den = self._denominator(
axis, weighted, include_missing,
include_transforms_for_dims, prune,
)

# Calculate denominator. Only include those H&S dimensions, across
# which we DON'T sum. These H&S are needed because of the shape, when
# dividing. Those across dims which are summed across MUST NOT be
# included, because they would change the result.
hs_dims = hs_dims_for_den(include_transforms_for_dims, axis)
den = self._apply_missings_and_insertions(
table, hs_dims, include_missing=include_missing
)
# Calculate "margin" from denominator
margin = self._drop_mr_cat_dims(den, fix_valids=include_missing)

# Apply correct mask (based on the as_array shape)
arr = self._as_array(
include_transforms_for_dims=hs_dims,
include_missing=include_missing
)
if margin.shape[0] == 1 and len(margin.shape) > 1 and self.ndim < 3:
margin = margin.reshape(margin.shape[1:])

# ---prune array if pruning was requested---
if prune:
arr = self._prune_body(arr, transforms=hs_dims)

arr = self._drop_mr_cat_dims(arr, fix_valids=include_missing)

if isinstance(arr, np.ma.core.MaskedArray):
inflate_ind = tuple(
(
None
if (
d.dimension_type == DT.MR_CAT or
n <= 1 or
len(d.elements()) <= 1
) else
slice(None)
)
for d, n in zip(self._all_dimensions, table.shape)
)
mask = np.logical_or(
np.zeros(den.shape, dtype=bool),
arr.mask[inflate_ind],
)
den = np.ma.masked_array(den, mask)

if (self.ndim != 1 or axis is None or
axis == 0 and len(self._all_dimensions) == 1):
# Special case for 1D cube wigh MR, for "Table" direction
den = np.sum(den, axis=new_axis)[index]

den = self._drop_mr_cat_dims(den, fix_valids=include_missing)
if den.shape[0] == 1 and len(den.shape) > 1 and self.ndim < 3:
den = den.reshape(den.shape[1:])
return den
return margin

@lazyproperty
def missing(self):
Expand Down Expand Up @@ -572,36 +521,20 @@ def proportions(self, axis=None, weighted=True,
])
"""

def hs_dims_for_den(hs_dims, axis):
if axis is None or hs_dims is None:
return None
if isinstance(axis, int):
axis = [axis]
return [dim for dim in hs_dims if dim not in axis]

table = self._measure(weighted).raw_cube_array
new_axis = self._adjust_axis(axis)
index = tuple(
None if i in new_axis else slice(None)
for i, _ in enumerate(table.shape)
)

# Calculate denominator. Only include those H&S dimensions, across
# which we DON'T sum. These H&S are needed because of the shape, when
# dividing. Those across dims which are summed across MUST NOT be
# included, because they would change the result.
hs_dims = hs_dims_for_den(include_transforms_for_dims, axis)
den = self._apply_missings_and_insertions(table, hs_dims)
den = np.sum(den, axis=new_axis)[index]

# Calculate numerator from table (include all H&S dimensions).
num = self._apply_missings_and_insertions(
table, include_transforms_for_dims
self._measure(weighted).raw_cube_array,
include_transforms_for_dims,
)
# Always use unpruned denominator (bases), because pruning is based on
# unweighted bases explicitly
den = self._denominator(
axis, weighted, False, include_transforms_for_dims, False
)

res = self._drop_mr_cat_dims(num / den)

# Apply correct mask (based on the as_array shape)
# Apply correct pruning mask (based on the as_array shape)
arr = self.as_array(
prune=prune,
include_transforms_for_dims=include_transforms_for_dims,
Expand Down Expand Up @@ -928,6 +861,32 @@ def _cube_dict(self):
'(str) or dict.' % type(self._cube_response_arg).__name__
)

def _denominator(self, axis, weighted, include_missing,
include_transforms_for_dims, prune=False):
table = self._counts(weighted).raw_cube_array
new_axis = self._adjust_axis(axis)
index = tuple(
None if i in new_axis else slice(None)
for i, _ in enumerate(table.shape)
)

# Calculate denominator. Only include those H&S dimensions, across
# which we DON'T sum. These H&S are needed because of the shape, when
# dividing. Those across dims which are summed across MUST NOT be
# included, because they would change the result.
if prune:
# Always prune only based on _unweighted_ counts
mask = self._counts(False).raw_cube_array == 0
table = np.ma.masked_array(table, mask)
hs_dims = self._hs_dims_for_den(include_transforms_for_dims, axis)
den = self._apply_missings_and_insertions(
table, hs_dims, include_missing=include_missing
)
try:
return np.sum(den, axis=new_axis)[index]
except np.AxisError:
return den

def _drop_mr_cat_dims(self, array, fix_valids=False):
"""Return ndarray reflecting *array* with MR_CAT dims dropped.
Expand Down Expand Up @@ -991,6 +950,14 @@ def _fix_valid_indices(cls, valid_indices, insertion_index, dim):
valid_indices[dim] = indices.tolist()
return valid_indices

@staticmethod
def _hs_dims_for_den(hs_dims, axis):
if axis is None or hs_dims is None:
return None
if isinstance(axis, int):
axis = [axis]
return [dim for dim in hs_dims if dim not in axis]

def _inserted_dim_inds(self, transform_dims, axis):
dim_ind = axis if self.ndim < 3 else axis + 1
if not transform_dims or dim_ind not in transform_dims:
Expand Down Expand Up @@ -1030,6 +997,10 @@ def _is_axis_allowed(self, axis):
In case the calculation is requested over CA items dimension, it is not
valid. It's valid in all other cases.
"""
if not self.dimensions:
# In case of no dimensions any direction is not allowed
return False

if axis is None:
# If table direction was requested, we must ensure that each slice
# doesn't have the CA items dimension (thus the [-2:] part). It's
Expand Down Expand Up @@ -1300,16 +1271,25 @@ def _update_result(self, result, insertions, dimension_index,
"""Insert subtotals into resulting ndarray."""
# TODO: valid_indices should be a tuple as a parameter and as a return
# value
masked = type(result) == np.ma.core.MaskedArray
if masked:
mask = result.mask
for j, (ind_insertion, value) in enumerate(insertions):
result = np.insert(
result, ind_insertion + j + 1, value, axis=dimension_index
)
if masked:
mask = np.insert(
mask, ind_insertion + j + 1, False, axis=dimension_index
)
valid_indices = (
valid_indices and
self._fix_valid_indices(
valid_indices, ind_insertion + j, dimension_index
)
)
if masked:
result = np.ma.masked_array(result, mask)
return result, valid_indices


Expand Down
Loading

0 comments on commit 2ef6f73

Please sign in to comment.