Skip to content

Commit

Permalink
Merge 1e7412f into f46f645
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Oct 17, 2018
2 parents f46f645 + 1e7412f commit 7dfa003
Show file tree
Hide file tree
Showing 12 changed files with 7,745 additions and 22 deletions.
5 changes: 5 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# History of Changes

#### 1.6.11 Deprecate `shape`
- Deprecate the `CubeSlice` `shape` property
- Use `get_shape(prune=False)` instead
- Will be removed in future versions

#### 1.6.10 Fix README on pypi

#### 1.6.9 Bugfix
Expand Down
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ The detailed description can be found

## Changes

#### 1.7.2
- Implement correct index table functionality
- Deprecate old index functionality

#### 1.7.1 Fix index error
- Fix peculiar case of CA x CAT (single elem) index error
- Support with unit tests
Expand All @@ -106,9 +110,4 @@ The detailed description can be found
- Fixed pesky numpy warnings
- Replaced vulnerable lazyproperty implementation

#### 1.6.11 Deprecate `shape`
- Deprecate the `CubeSlice` `shape` property
- Use `get_shape(prune=False)` instead
- Will be removed in future versions

For a complete list of changes see [history](https://github.com/Crunch-io/crunch-cube/blob/master/HISTORY.md).
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from setuptools import setup, find_packages

version = '1.7.1'
version = '1.7.2'


def get_long_desc():
Expand Down
53 changes: 37 additions & 16 deletions src/cr/cube/crunch_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import json
import numpy as np
import warnings

from scipy.stats import norm
from scipy.stats.contingency import expected_freq
Expand Down Expand Up @@ -255,7 +256,12 @@ def has_mr(self):
return self.mr_dim_ind is not None

def index(self, weighted=True, prune=False):
"""Get cube index measurement."""
"""Return cube index measurement.
This function is deprecated. Use index_table from CubeSlice.
"""
msg = "Deprecated. Use index_table from CubeSlice"""
warnings.warn(msg, DeprecationWarning)
return Index.data(self, weighted, prune)

def inserted_hs_indices(self, prune=False):
Expand Down Expand Up @@ -324,7 +330,7 @@ def labels(self, include_missing=False, include_transforms_for_dims=False):
for dim in self.dimensions
]

def margin(self, axis=None, weighted=True,
def margin(self, axis=None, weighted=True, include_missing=False,
include_transforms_for_dims=None, prune=False):
"""Get margin for the selected axis.
Expand Down Expand Up @@ -402,14 +408,18 @@ def hs_dims_for_den(hs_dims, axis):
# dividing. Those across dims which are summed across MUST NOT be
# included, because they would change the result.
hs_dims = hs_dims_for_den(include_transforms_for_dims, axis)
den = self._transform(table, hs_dims, inflate=True, fix=True)
den = self._transform(
table, hs_dims, inflate=True, fix=True,
include_missing=include_missing,
)

# Apply correct mask (based on the as_array shape)
arr = self.as_array(
arr = self._as_array(
prune=prune,
# include_transforms_for_dims=include_transforms_for_dims,
include_transforms_for_dims=hs_dims,
include_missing=include_missing,
)
arr = self._fix_shape(arr, fix_valids=include_missing)
if isinstance(arr, np.ma.core.MaskedArray):

inflate_ind = tuple(
Expand All @@ -435,7 +445,7 @@ def hs_dims_for_den(hs_dims, axis):
# Special case for 1D cube wigh MR, for "Table" direction
den = np.sum(den, axis=new_axis)[index]

den = self._fix_shape(den)
den = self._fix_shape(den, fix_valids=include_missing)
if den.shape[0] == 1 and len(den.shape) > 1 and self.ndim < 3:
den = den.reshape(den.shape[1:])
return den
Expand Down Expand Up @@ -788,10 +798,12 @@ def univariate_ca_main_axis(self):
dim_types = [dim.type for dim in self.dimensions]
return dim_types.index('categorical')

@lazyproperty
def valid_indices_with_selections(self):
def valid_indices_with_selections(self, include_missing=False):
"""Get all valid indices (including MR selections)."""
return [dim.valid_indices(False) for dim in self.all_dimensions]
return [
dim.valid_indices(include_missing)
for dim in self.all_dimensions
]

def zscore(self, weighted=True, prune=False, hs_dims=None):
"""Get cube zscore measurement."""
Expand Down Expand Up @@ -917,7 +929,10 @@ def _as_array(self, include_missing=False, get_non_selected=False,
dimensions = self.all_dimensions
shape = [len(dim.elements(include_missing=True)) for dim in dimensions]
res = np.array(values).reshape(shape)
res = self._transform(res, include_transforms_for_dims, inflate=True)
res = self._transform(
res, include_transforms_for_dims, inflate=True,
include_missing=include_missing,
)
res = res + adjusted

if prune:
Expand Down Expand Up @@ -987,11 +1002,10 @@ def _create_mask(res, row_prune_inds, col_prune_inds):
)
return np.logical_or(mask_rows, mask_cols)

def _fix_shape(self, array):
def _fix_shape(self, array, fix_valids=False):
"""Fixes shape of MR variables.
For MR variables, where 'selections' dims are dropped, the ndarray
needs to be reshaped, in order to seem as if those dims never existed.
Also, if any (except 1st) dimension has a single element, it is
flattened in the resulting array (which is more convenient for the
users of the CrunchCube). If the original shape of the cube is
Expand All @@ -1000,6 +1014,7 @@ def _fix_shape(self, array):
general, use private methods, if operating inside CrunchCube. API
methods should only be used from outside CrunchCube.
"""

if not array.shape or len(array.shape) != len(self.all_dimensions):
# This condition covers two cases:
# 1. In case of no dimensions, the shape of the array is empty
Expand All @@ -1016,8 +1031,11 @@ def _fix_shape(self, array):

display_ind = tuple(
0 if dim.is_mr_selections(self.all_dimensions) else slice(None)
for dim in self.all_dimensions
)
for dim, n in zip(self.all_dimensions, array.shape)
) if not fix_valids else np.ix_(*[
dim.valid_indices(False) if n > 1 else [0]
for dim, n in zip(self.all_dimensions, array.shape)
])
array = array[display_ind]

# If a first dimension only has one element, we don't want to
Expand Down Expand Up @@ -1291,9 +1309,12 @@ def _shape(self):
return tuple([dim.shape for dim in self.all_dimensions])

def _transform(self, res, include_transforms_for_dims,
inflate=False, fix=True):
valid_indices = self.valid_indices_with_selections if fix else None
inflate=False, fix=True, include_missing=False):
"""Transform the shape of the resulting ndarray."""
valid_indices = (
self.valid_indices_with_selections(include_missing)
if fix else None
)
if not include_transforms_for_dims:
return res[np.ix_(*valid_indices)] if valid_indices else res

Expand Down
51 changes: 51 additions & 0 deletions src/cr/cube/cube_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

"""CubeSlice class."""

from __future__ import division

from functools import partial
import warnings
import numpy as np
Expand Down Expand Up @@ -56,6 +58,35 @@ def __getattr__(self, attr):
# ---otherwise, the property value is the same for cube or slice---
return cube_attr

def _prepare_index_baseline(self, axis):
# First get the margin of the opposite direction of the index axis.
# We need this in order to end up with the right shape of the
# numerator vs denominator.
baseline = self.margin(axis=(1 - axis), include_missing=True)

# Now check if the shape of the marginal needs to be fixed, because
# different versions of the MR containing cubes, combined with
# different margin directions, provide marginals of different shapes.
# We also need to calculate the percentage marginals correctly,
# so we need to perform the addition (to get the denominator)
# across the correct axis.
if axis == self.mr_dim_ind:
baseline = baseline / np.sum(baseline, axis=1)[:, None]
return baseline[:, 0]
elif isinstance(self.mr_dim_ind, tuple) and axis in self.mr_dim_ind:
total = np.sum(baseline, axis=(axis + 1))
if axis == 0:
return baseline[:, 0, 0] / total[:, 0]
return baseline[0, :, 0] / total[0]

if axis == 0 and self.mr_dim_ind is not None:
baseline = baseline[:, 0]
return baseline / np.sum(baseline)

baseline = baseline if len(baseline.shape) <= 1 else baseline[0]
baseline = baseline / np.sum(baseline)
return baseline / np.sum(baseline, axis=0)

@lazyproperty
def ca_dim_ind(self):
"""Return items dimension index if there is one.
Expand Down Expand Up @@ -146,6 +177,26 @@ def has_mr(self):
"""
return 'multiple_response' in self.dim_types

def index_table(self, axis=None, baseline=None):
"""Return index percentages for a given axis and baseline.
The index values represent the difference of the percentages to the
corresponding baseline values. The baseline values are the univariate
percentages of the corresponding variable.
"""
proportions = self.proportions(axis=axis)
baseline = (
baseline
if baseline is not None else
self._prepare_index_baseline(axis)
)

# Fix the shape to enable correct broadcasting
if axis == 0 and len(baseline.shape) <= 1:
baseline = baseline[:, None]

return proportions / baseline * 100

@lazyproperty
def is_double_mr(self):
"""This has to be overridden from cr.cube.
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,6 @@ def _load(cube_file):
CAT_X_MR_X_CAT = _load('cat-x-mr-x-cat.json')
CAT_X_CAT_FILTERED_POP = _load('cat-x-cat-filtered-population.json')
UNIV_MR_WITH_HS = _load('univ-mr-with-hs.json')
FULL_CUBE = _load('full-cube.json')
NATREP = _load('natrep-cube.json')
MR_X_MR_INDEX_TABLE = _load('mr-x-mr-index-table.json')
Loading

0 comments on commit 7dfa003

Please sign in to comment.