Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor dimension #112

Merged
merged 8 commits into from
Oct 29, 2018
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ clean:
cleandocs:
$(MAKE) -C docs clean

coverage:
py.test --cov-report term-missing --cov=src --cov=tests -p no:warnings

docs:
$(MAKE) -C docs html

Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[tool:pytest]
python_classes = Test Describe
python_files = test_*.py
python_functions = test_ it_ they_ but_ and_it_
testpaths =
tests

Expand Down
76 changes: 38 additions & 38 deletions src/cr/cube/crunch_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@ def __init__(self, response):
'A `cube` must be JSON or `dict`.'
).format(type(response)))

self.slices = self.get_slices()

def __repr__(self):
text = '\n' + str(type(self))
text += '\nName: {}'.format(self.name)
Expand Down Expand Up @@ -158,7 +156,7 @@ def as_array(self, include_missing=False, weighted=True, adjusted=False,
@lazyproperty
def ca_dim_ind(self):
for (i, dim) in enumerate(self.dimensions):
if dim.type == 'categorical_array':
if dim.dimension_type == 'categorical_array':
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed Dimension.type to .dimension_type. There are some comments in the commit going into the rationale. Short version is having a property or method named .type or .id is to be avoided. Among other benefits this lets the reader know what kind of type it is when context can't make that clear.

return i
else:
return None
Expand Down Expand Up @@ -201,7 +199,7 @@ def description(self):

@lazyproperty
def dim_types(self):
return [dim.type for dim in self.dimensions]
return [d.dimension_type for d in self.dimensions]

@lazyproperty
def dimensions(self):
Expand Down Expand Up @@ -253,7 +251,7 @@ def get_slices(self, ca_as_0th=False):

@lazyproperty
def has_means(self):
"""Check if cube has means."""
"""True if cube contains means data."""
measures = self._cube.get('result', {}).get('measures')
if not measures:
return False
Expand Down Expand Up @@ -309,7 +307,7 @@ def is_double_mr(self):
@lazyproperty
def is_univariate_ca(self):
"""Check if cube is a just the CA ("ca x cat" or "cat x ca" dims)"""
types = {d.type for d in self.dimensions}
types = {d.dimension_type for d in self.dimensions}
ca_types = {'categorical_array', 'categorical'}
return self.ndim == 2 and types == ca_types

Expand Down Expand Up @@ -429,8 +427,8 @@ def hs_dims_for_den(hs_dims, axis):
include_missing=include_missing,
)
arr = self._fix_shape(arr, fix_valids=include_missing)
if isinstance(arr, np.ma.core.MaskedArray):

if isinstance(arr, np.ma.core.MaskedArray):
inflate_ind = tuple(
(
None
Expand Down Expand Up @@ -470,7 +468,7 @@ def missing(self):
def mr_dim_ind(self):
indices = [
i for i, dim in enumerate(self.dimensions)
if dim.type == 'multiple_response'
if dim.dimension_type == 'multiple_response'
]
if indices:
return indices[0] if len(indices) == 1 else tuple(indices)
Expand All @@ -488,7 +486,7 @@ def mr_selections_indices(self):
mr_dimensions_indices = [
i for (i, dim) in enumerate(self.all_dimensions)
if (i + 1 < len(self.all_dimensions) and
dim.type == 'multiple_response')
dim.dimension_type == 'multiple_response')
]

# For each MR and CA dimension, the 'selections' dimension
Expand Down Expand Up @@ -801,16 +799,20 @@ def scale_means(self, hs_dims=None, prune=False):
scale_means[1] = scale_means[1][~col_mask]
return slices_means

@lazyproperty
def slices(self):
return self.get_slices()

@lazyproperty
def univariate_ca_main_axis(self):
"""For univariate CA, the main axis is the categorical axis"""
dim_types = [dim.type for dim in self.dimensions]
dim_types = [d.dimension_type for d in self.dimensions]
return dim_types.index('categorical')

def valid_indices_with_selections(self, include_missing=False):
"""Get all valid indices (including MR selections)."""
return [
dim.valid_indices(include_missing)
dim.element_indices(include_missing)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed Dimension.valid_indices() to .element_indices() since it can be used to get both all and only valid indices. This eliminates a jarring contradiction in the naming.

for dim in self.all_dimensions
]

Expand Down Expand Up @@ -889,7 +891,7 @@ def _adjust_axis(self, axis):
# axis (that were provided by the user). But we don't need to update
# the axis that are "behind" the current MR.
for i, dim in enumerate(self.dimensions):
if dim.type == 'multiple_response':
if dim.dimension_type == 'multiple_response':
# This formula updates only the axis that come "after" the
# current MR (items) dimension.
new_axis[axis >= i] += 1
Expand Down Expand Up @@ -1042,7 +1044,7 @@ def _fix_shape(self, array, fix_valids=False):
0 if dim.is_mr_selections(self.all_dimensions) else slice(None)
for dim, n in zip(self.all_dimensions, array.shape)
) if not fix_valids else np.ix_(*[
dim.valid_indices(False) if n > 1 else [0]
dim.element_indices(include_missing=False) if n > 1 else [0]
for dim, n in zip(self.all_dimensions, array.shape)
])
array = array[display_ind]
Expand Down Expand Up @@ -1074,32 +1076,29 @@ def _inserted_dim_inds(self, transform_dims, axis):
return np.array(inserted_inds[dim_ind] if len(inserted_inds) else [])

def _insertions(self, result, dimension, dimension_index):
insertions = []

for indices in dimension.hs_indices:
ind_subtotal_elements = np.array(indices['inds'])

if indices['anchor_ind'] == 'top':
ind_insertion = -1
elif indices['anchor_ind'] == 'bottom':
ind_insertion = result.shape[dimension_index] - 1
else:
ind_insertion = indices['anchor_ind']
"""Return list of (idx, sum) pairs representing subtotals.

ind = tuple(
[slice(None) for _ in range(dimension_index)] +
[ind_subtotal_elements]
)
axis = dimension_index
*idx* is the int offset at which to insert the ndarray subtotal
in *sum*.
"""

# no indices are provided (should never get here)
if len(indices['inds']) == 0:
value = 0
else:
value = np.sum(result[ind], axis=axis)
insertions.append((ind_insertion, value))
def iter_insertions():
for anchor_idx, addend_idxs in dimension.hs_indices:
insertion_idx = (
-1 if anchor_idx == 'top' else
result.shape[dimension_index] - 1 if anchor_idx == 'bottom'
else anchor_idx
)
addend_fancy_idx = tuple(
[slice(None) for _ in range(dimension_index)] +
[np.array(addend_idxs)]
)
yield (
insertion_idx,
np.sum(result[addend_fancy_idx], axis=dimension_index)
)

return insertions
return [insertion for insertion in iter_insertions()]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I extracted the body of the code into the local method iter_insertions() to make this simpler to understand.


def _intersperse_hs_in_std_res(self, hs_dims, res):
for dim, inds in enumerate(self.inserted_hs_indices()):
Expand Down Expand Up @@ -1334,10 +1333,11 @@ def _transform(self, res, include_transforms_for_dims,
# Check if transformations can/need to be performed
transform = (dim.has_transforms and
i - dim_offset in include_transforms_for_dims)
if dim.type == 'multiple_response':
if dim.dimension_type == 'multiple_response':
dim_offset += 1
if (not transform or
dim.type in ITEM_DIMENSION_TYPES or dim.is_selections):
dim.dimension_type in ITEM_DIMENSION_TYPES or
dim.is_selections):
continue
# Perform transformations
insertions = self._insertions(res, dim, i)
Expand Down
Loading