Skip to content

Commit

Permalink
Implement correct proportions for MRs
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Apr 9, 2018
1 parent 0c0dce7 commit 6af2d71
Showing 1 changed file with 57 additions and 14 deletions.
71 changes: 57 additions & 14 deletions src/cr/cube/crunch_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def __init__(self, response):
).format(type(response)))


def _get_valid_indices(self, dimensions, include_missing, get_non_selected):
def _get_valid_indices(self, dimensions, include_missing,
get_non_selected=False, get_all_mr=False):
'''Gets valid indices for each dimension.
Main criterion for a valid index is most often the information about
Expand All @@ -89,7 +90,12 @@ def _get_valid_indices(self, dimensions, include_missing, get_non_selected):
for dim in dimensions]

mr_selections_indices = self.table.mr_selections_indices
mr_slice = [1] if get_non_selected else [0]
mr_slice = [0]
if get_all_mr:
mr_slice = [0, 1, 2]
elif get_non_selected:
mr_slice = [1]
# mr_slice = [1] if get_non_selected else [0]
if mr_selections_indices:
# In the case of MR variables, we only need to select the
# 'selected' slice of the 'selections' dimension.
Expand Down Expand Up @@ -348,14 +354,18 @@ def _calculate_along_non_mr(self, axis):
self.mr_dim_ind == 1 and axis == 1 and len(self.dimensions) > 2
)

def _mr_margin_along_non_mr_dim(self, axis, weighted):
def _mr_margin_along_non_mr_dim(self, axis, weighted,
include_transforms_for_dims=None):
'''Calculate MR margin along non-MR dimension.
For cases when margin is calculated along the axis which is not MR,
we need to perform sumation along that axis, on the tabular
representation of the cube (which is obtained with 'as_array').
'''
array = self.as_array(weighted=weighted)
array = self.as_array(
weighted=weighted,
include_transforms_for_dims=include_transforms_for_dims
)

if axis == 1 and len(array.shape) == 1:
# If array representation of the cube has less dimensions than
Expand All @@ -376,22 +386,45 @@ def _get_mr_slice(self, selected=True):
indices.append(slice(None))
return tuple(indices)

def _mr_margin(self, axis, weighted, adjusted):
def _transform_table(self, table, include_transforms_for_dims):
valid_indices = self._get_valid_indices(
self.table.all_dimensions,
include_missing=False,
get_all_mr=True
)
table = self._transform(
table,
include_transforms_for_dims,
valid_indices,
inflate=True
)
return table

def _mr_margin(self, axis, weighted, adjusted,
include_transforms_for_dims=None):
'''Margin for cube that contains MR.'''
if self.is_double_mr:
return self._double_mr_margin(axis, weighted)
elif len(self.dimensions) == 1:
return self._1d_mr_margin(axis, weighted)
elif self._calculate_along_non_mr(axis):
return self._mr_margin_along_non_mr_dim(axis, weighted)
return self._mr_margin_along_non_mr_dim(axis, weighted,
include_transforms_for_dims)

table = self.table.data(weighted)
if include_transforms_for_dims:
# In case of H&S the entire table needs to be
# transformed (with selections).
table = self._transform_table(table, include_transforms_for_dims)

# For cases when the margin is calculated for the MR dimension, we need
# the sum of selected and non-selected slices (if axis is None), or the
# sublimated version (another sum along the axis), if axis is defined.
margin = table[self.ind_selected] + table[self.ind_non_selected]
margin = margin[np.ix_(*self.valid_indices)]
if not include_transforms_for_dims:
# If entire table was transformed, we already have it with all the
# valid indices. If not, we need to apply valid indices.
margin = margin[np.ix_(*self.valid_indices)]

if axis is None:
axis = tuple([
Expand Down Expand Up @@ -422,8 +455,8 @@ def _margin(self, axis=None, weighted=True, adjusted=False,
# MR margins are calculated differently, so they need a separate method
# for them. A good example of this is the rcrunch functionality.
if self.has_mr:
return self._mr_margin(axis, weighted, adjusted)

return self._mr_margin(axis, weighted, adjusted,
include_transforms_for_dims)
# If there are no MR variables, the margins are mostly sums across
# appropriate dimensions.
transform_dims = include_transforms_for_dims and (
Expand Down Expand Up @@ -481,7 +514,14 @@ def _mr_proportions(self, axis, weighted, prune,

# The following are normal MR x something (not CA)
elif axis == 0:
res = table[:, 0, :] / (table[:, 0, :] + table[:, 1, :])
num = self.as_array(
include_transforms_for_dims=include_transforms_for_dims
)
den = self.margin(
axis=0,
include_transforms_for_dims=include_transforms_for_dims
)
return num / den
else:
num = self.as_array(
include_transforms_for_dims=include_transforms_for_dims
Expand Down Expand Up @@ -512,7 +552,6 @@ def _mr_proportions(self, axis, weighted, prune,
return self.as_array() / margin

return self._transform(res, include_transforms_for_dims, valid_indices)
# return res[np.ix_(*valid_indices)]

@property
def is_univariate_ca(self):
Expand Down Expand Up @@ -737,8 +776,7 @@ def _adjust_inserted_indices(inserted_indices_list, prune_indices_list):
inserted_inds[i] = ind
return inserted_indices_list

@staticmethod
def _insertions(result, dimension, dimension_index):
def _insertions(self, result, dimension, dimension_index):
insertions = []

for indices in dimension.hs_indices:
Expand All @@ -752,7 +790,12 @@ def _insertions(result, dimension, dimension_index):
if dimension_index == 0:
value = sum(result[ind_subtotal_elements])
else:
value = np.sum(result[:, ind_subtotal_elements], axis=1)
ind = [slice(None), ind_subtotal_elements]
axis = 1
if self.has_mr:
ind = [slice(None)] + ind
axis = 2
value = np.sum(result[ind], axis=axis)
insertions.append((ind_insertion, value))

return insertions
Expand Down

0 comments on commit 6af2d71

Please sign in to comment.