Skip to content

Commit

Permalink
Merge branch 'scale-means-#154065274' into rel-5.2.225
Browse files Browse the repository at this point in the history
  • Loading branch information
Crunch.io Jenkins Account committed Aug 6, 2018
2 parents e207398 + be2b43d commit 0ebd8cc
Show file tree
Hide file tree
Showing 18 changed files with 5,424 additions and 80 deletions.
32 changes: 27 additions & 5 deletions src/cr/cube/crunch_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,9 +1082,6 @@ def zscore(self, weighted=True, prune=False, hs_dims=None):

if hs_dims:
res = self._intersperse_hs_in_std_res(hs_dims, res)
arr = self.as_array(include_transforms_for_dims=hs_dims)
if isinstance(arr, np.ma.core.MaskedArray):
res = np.ma.masked_array(res, mask=arr.mask)

if prune:
arr = self.as_array(
Expand Down Expand Up @@ -1126,9 +1123,34 @@ def pvals(self, weighted=True, prune=False, hs_dims=None):

return res

def scale_means(self):
def scale_means(self, hs_dims=None, prune=False):
'''Get cube means.'''
return ScaleMeans(self).data
slices_means = [ScaleMeans(slice_).data for slice_ in self.slices]

if hs_dims and self.ndim > 1:
# Intersperse scale means with nans if H&S specified
inserted_indices = self.inserted_hs_indices()
for scale_means in slices_means:
if scale_means[0] is not None and 0 in hs_dims and inserted_indices[0]:
for i in inserted_indices[0]:
scale_means[0] = np.insert(scale_means[0], i, np.nan)
if scale_means[1] is not None and 1 in hs_dims and inserted_indices[1]:
for i in inserted_indices[1]:
scale_means[1] = np.insert(scale_means[1], i, np.nan)

if prune:
# Apply pruning
arr = self.as_array(include_transforms_for_dims=hs_dims, prune=True)
if isinstance(arr, np.ma.core.MaskedArray):
mask = arr.mask
for i, scale_means in enumerate(slices_means):
if scale_means[0] is not None:
row_mask = mask.all(axis=0) if self.ndim < 3 else mask.all(axis=1)[i]
scale_means[0] = scale_means[0][~row_mask]
if self.ndim > 1 and scale_means[1] is not None:
col_mask = mask.all(axis=1) if self.ndim < 3 else mask.all(axis=2)[i]
scale_means[1] = scale_means[1][~col_mask]
return slices_means

def get_slices(self, ca_as_0th=False):
if self.ndim < 3 and not ca_as_0th:
Expand Down
18 changes: 18 additions & 0 deletions src/cr/cube/cube_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,21 @@ def has_ca(self):
'''
return 'categorical_array' in self.dim_types

@property
def ca_dim_ind(self):
index = self._cube.ca_dim_ind
if index is None:
return None

if self.ndim == 3:
if index == 0:
# If tab dim is items, slices are not
return None
return index - 1

# If 2D - just return it
return index

@property
def mr_dim_ind(self):
'''Get the correct index of the MR dimension in the cube slice.'''
Expand Down Expand Up @@ -230,3 +245,6 @@ def is_double_mr(self):
properties of the slices.
'''
return self.dim_types == ['multiple_response'] * 2

def scale_means(self, hs_dims=None, prune=False):
return self._cube.scale_means(hs_dims, prune)[self._index]
87 changes: 43 additions & 44 deletions src/cr/cube/measures/scale_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,61 +9,60 @@

class ScaleMeans(object):
'''Implementation of the Means service.'''
def __init__(self, cube):
self._cube = cube
def __init__(self, slice_):
self._slice = slice_

@lazyproperty
def data(self):
'''Get the means calculation.'''
table = self._cube.as_array()
contents = self._inner_prod(table, self.values)
means = []
table = self._slice.as_array()
products = self._inner_prods(table, self.values)

if self._cube.has_mr and not self._cube.is_double_mr:
axis = 1 - self._cube.mr_dim_ind
return np.sum(contents, axis) / np.sum(table, axis)
for axis, product in enumerate(products):
if product is None:
means.append(product)
continue

if self.valid_inds.all():
return np.sum(contents, self.axis) / self._cube.margin(self.axis)
else:
num = np.sum(contents[self.contents_inds], self.axis)
den = np.sum(table[self.contents_inds], self.axis)
return num / den

@lazyproperty
def axis(self):
'''Get axis for means calculation.'''
axis = 0
if self._cube.ca_dim_ind == 0 or self._cube.ca_dim_ind == 2:
axis = 1
elif len(self._cube.dimensions) > 2 and self._cube.ca_dim_ind == 1:
axis = 2
return axis
# Calculate means
num = np.sum(product[self.valid_indices(axis)], axis)
den = np.sum(table[self.valid_indices(axis)], axis)
mean = num / den
if not isinstance(mean, np.ndarray):
mean = np.array([mean])
means.append(mean)
return means

@lazyproperty
def values(self):
'''Get num values for means calculation.'''
return np.array([
dim.values for dim in self._cube.dimensions
if dim.values and any(~np.isnan(dim.values))
][int(len(self._cube.dimensions) > 2)])

@lazyproperty
def valid_inds(self):
'''Valid indices for numerical values.'''
return ~np.isnan(self.values)
return [
(
np.array(dim.values)
if dim.values and any(~np.isnan(dim.values)) else
None
)
for dim in self._slice.dimensions
]

@lazyproperty
def contents_inds(self):
'''Create contents selection indices based on valid num vals.'''
def valid_indices(self, axis):
return [
slice(None) if i != self.axis else self.valid_inds
for i in range(len(self._cube.as_array().shape))
(
~np.isnan(np.array(dim.values))
if dim.values and any(~np.isnan(dim.values)) and axis == i else
slice(None)
)
for i, dim in enumerate(self._slice.dimensions)
]

def _inner_prod(self, contents, values):
if len(contents.shape) == 3 and self._cube.ca_dim_ind == 0:
values = values[:, np.newaxis]
try:
return contents * values
except:
return contents * values[:, np.newaxis]
def _inner_prods(self, contents, values):
products = []
for i, numeric in enumerate(values):
if numeric is None:
products.append(numeric)
continue
inflate = self._slice.ndim > 1 and not i
numeric = numeric[:, None] if inflate else numeric
product = contents * numeric
products.append(product)
return products
14 changes: 14 additions & 0 deletions tests/integration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# encoding: utf-8

import numpy as np


def assert_scale_means_equal(actual, expected):
for act, exp in zip(actual, expected):
if isinstance(exp, np.ndarray) and isinstance(act, np.ndarray):
np.testing.assert_almost_equal(act, exp)
elif isinstance(exp, list) and isinstance(act, list):
assert_scale_means_equal(act, exp)
else:
assert act == exp
assert True
Empty file.
21 changes: 21 additions & 0 deletions tests/integration/fixtures/cubes/scale_means/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
from functools import partial

from cr.cube.utils import load_fixture

CUBES_DIR = os.path.dirname(os.path.abspath(__file__))


def _load(cube_file):
load = partial(load_fixture, CUBES_DIR)
return load(cube_file)


CA_CAT_X_ITEMS = _load('ca-cat-x-items.json')
CA_ITEMS_X_CAT = _load('ca-items-x-cat.json')
CA_X_MR = _load('ca-x-mr.json')
CAT_X_CA_CAT_X_ITEMS = _load('cat-x-ca-cat-x-items.json')
CAT_X_CAT = _load('cat-x-cat.json')
CAT_X_MR = _load('cat-x-mr.json')
MR_X_CAT = _load('mr-x-cat.json')
UNIVARIATE_CAT = _load('univariate-cat.json')
Loading

0 comments on commit 0ebd8cc

Please sign in to comment.