Skip to content

Commit

Permalink
Merge pull request #173 from Crunch-io/means-with-insertions
Browse files Browse the repository at this point in the history
fix: means with insertions
  • Loading branch information
scanny committed Jul 1, 2019
2 parents d080f36 + a824ff6 commit f1ad558
Show file tree
Hide file tree
Showing 7 changed files with 590 additions and 503 deletions.
3 changes: 3 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# History of Changes

#### 1.10.4
- Fix means on `_Slice` having subtotals.

#### 1.10.3
- Refactor hidden and pruned slices

Expand Down
2 changes: 1 addition & 1 deletion src/cr/cube/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

"""Initialization module for crunch-cube package."""

__version__ = "1.11.3"
__version__ = "1.11.4"

# NOTE: We'll be switching to 2.0.0 once we throw out the old cube and slice
55 changes: 33 additions & 22 deletions src/cr/cube/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,14 @@ def column_index(self):
+ tuple([np.nan] * len(self._bottom_values))
)

@lazyproperty
def means(self):
return np.array(
tuple([np.nan] * len(self._top_values))
+ self._interleaved_means
+ tuple([np.nan] * len(self._bottom_values))
)

@lazyproperty
def proportions(self):
return self.values / self.margin
Expand Down Expand Up @@ -1134,6 +1142,16 @@ def _interleaved_column_index(self):
column_index.append(np.nan)
return tuple(column_index)

@lazyproperty
def _interleaved_means(self):
means = []
for i, value in enumerate(self._base_vector.means):
means.append(value)
for inserted_vector in self._opposite_inserted_vectors:
if i == inserted_vector.anchor:
means.append(np.nan)
return tuple(means)

@lazyproperty
def _interleaved_pvals(self):
pvals = []
Expand Down Expand Up @@ -1184,11 +1202,12 @@ def _top_values(self):
)


class _BaseVectorAfterHiding(_BaseTransformationVector):
class _VectorAfterHiding(_BaseTransformationVector):
"""Reflects a row or column with hidden elements removed."""

def __init__(self, base_vector):
def __init__(self, base_vector, opposite_vectors):
self._base_vector = base_vector
self._opposite_vectors = opposite_vectors

@lazyproperty
def base(self):
Expand All @@ -1200,6 +1219,10 @@ def base(self):
def base_values(self):
return self._base_vector.base_values[self._visible_element_idxs]

@lazyproperty
def column_index(self):
return self._base_vector.column_index[self._visible_element_idxs]

@lazyproperty
def margin(self):
if not isinstance(self._base_vector.margin, np.ndarray):
Expand All @@ -1210,26 +1233,6 @@ def margin(self):
def means(self):
return self._base_vector.means[self._visible_element_idxs]

@lazyproperty
def table_proportions(self):
return self._base_vector.table_proportions[self._visible_element_idxs]

@lazyproperty
def values(self):
return self._base_vector.values[self._visible_element_idxs]


class _VectorAfterHiding(_BaseVectorAfterHiding):
"""Reflects a row or column with hidden elements removed."""

def __init__(self, base_vector, opposite_vectors):
super(_VectorAfterHiding, self).__init__(base_vector)
self._opposite_vectors = opposite_vectors

@lazyproperty
def column_index(self):
return self._base_vector.column_index[self._visible_element_idxs]

@lazyproperty
def proportions(self):
return self._base_vector.proportions[self._visible_element_idxs]
Expand All @@ -1238,6 +1241,14 @@ def proportions(self):
def pvals(self):
return self._base_vector.pvals[self._visible_element_idxs]

@lazyproperty
def table_proportions(self):
return self._base_vector.table_proportions[self._visible_element_idxs]

@lazyproperty
def values(self):
return self._base_vector.values[self._visible_element_idxs]

@lazyproperty
def zscore(self):
return self._base_vector.zscore[self._visible_element_idxs]
Expand Down
120 changes: 120 additions & 0 deletions tests/fixtures/cat-x-cat-mean-subtot.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
{
"result": {
"counts": [
3,
3,
2,
2,
0
],
"dimensions": [
{
"references": {"alias": "mean", "name": "mean"},
"type": {
"categories": [{"id": 1, "missing": false, "name": "Mean"}],
"class": "categorical"
}
},
{
"derived": false,
"references": {
"alias": "Education",
"description": "Education",
"name": "Education",
"view": {
"column_width": null,
"include_missing": false,
"show_counts": false,
"show_numeric_values": false,
"transform": {
"insertions": [
{
"anchor": 1,
"args": [
2,
3,
4
],
"function": "subtotal",
"name": "Any College"
}
]
}
}
},
"type": {
"categories": [
{
"id": 1,
"missing": false,
"name": "HS or Less",
"numeric_value": 1
},
{
"id": 2,
"missing": false,
"name": "Some College",
"numeric_value": 2
},
{
"id": 3,
"missing": false,
"name": "College Grad",
"numeric_value": 3
},
{
"id": 4,
"missing": false,
"name": "Grad School",
"numeric_value": 4
},
{
"id": -1,
"missing": true,
"name": "No Data",
"numeric_value": null
}
],
"class": "categorical",
"ordinal": false
}
}
],
"filtered": {
"unweighted_n": 10,
"weighted_n": 10
},
"measures": {
"mean": {
"data": [
38.3333333333,
65.0,
55.0,
34.0,
{
"?": -8
}
],
"metadata": {
"derived": true,
"references": {},
"type": {
"class": "numeric",
"integer": true,
"missing_reasons": {
"NaN": -8,
"No Data": -1
},
"missing_rules": {}
}
},
"n_missing": 0
}
},
"n": 10,
"unfiltered": {
"unweighted_n": 10,
"weighted_n": 10
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -329,15 +329,17 @@ def it_calculates_mr_x_mr_row_proportions(self):
]
np.testing.assert_almost_equal(slice_.row_proportions, expected)

def it_calculates_mr_x_mr_column_proportions(self):
def it_knows_mr_x_mr_column_proportions(self):
slice_ = Cube(CR.MR_X_MR).partitions[0]
expected = [
[1.0, 0.13302403, 0.12391245, 0.22804396],
[0.28566937, 1.0, 0.23498805, 0.47751837],
[0.43456698, 0.34959546, 1.0, 0.72838875],
[1.0, 1.0, 1.0, 1.0],
]
np.testing.assert_almost_equal(slice_.column_proportions, expected)
np.testing.assert_almost_equal(
slice_.column_proportions,
[
[1.0, 0.13302403, 0.12391245, 0.22804396],
[0.28566937, 1.0, 0.23498805, 0.47751837],
[0.43456698, 0.34959546, 1.0, 0.72838875],
[1.0, 1.0, 1.0, 1.0],
],
)

def it_reorders_cat_x_cat(self):
slice_ = Cube(CR.CAT_X_CAT_PRUNING_HS).partitions[0]
Expand Down Expand Up @@ -441,15 +443,28 @@ def it_prunes_cat_x_cat_with_hs(self):
)
np.testing.assert_equal(slice_.base_counts, expected)

def it_accommodates_an_all_missing_element_rows_dimension(self):
slice_ = _Slice(Cube(CR.CAT_X_CAT_ALL_MISSING_ROW_ELEMENTS), 0, None, None, 0)
row_proportions = slice_.row_proportions
np.testing.assert_almost_equal(row_proportions, np.array([]))

def it_knows_means_with_subtotals_on_cat_x_cat(self):
slice_ = _Slice(Cube(CR.CAT_X_CAT_MEAN_SUBTOT), 0, None, None, 0)

means = slice_.means

np.testing.assert_almost_equal(
means, np.array([[38.3333333, np.nan, 65.0, 55.0, 34.0]])
)


class Describe_Strand(object):
"""Integration-test suite for `cr.cube.cubepart._Strand` object."""

def it_provides_nans_for_means_insertions(self):
strand = CubePartition.factory(
Cube(CR.CAT_WITH_MEANS_AND_INSERTIONS), 0, None, None, None, 0
)
np.testing.assert_almost_equal(
strand.means, [19.85555556, 13.85416667, 52.78947368, np.nan, np.nan]
)

def it_accommodates_an_all_missing_element_rows_dimension(self):
slice_ = _Slice(Cube(CR.CAT_X_CAT_ALL_MISSING_ROW_ELEMENTS), 0, None, None, 0)
row_proportions = slice_.row_proportions
np.testing.assert_almost_equal(row_proportions, np.array([]))
Loading

0 comments on commit f1ad558

Please sign in to comment.