Skip to content

Commit

Permalink
Merge 77e1d3e into b3fbd03
Browse files Browse the repository at this point in the history
  • Loading branch information
ernestoarbitrio committed Sep 24, 2020
2 parents b3fbd03 + 77e1d3e commit 8d65088
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 32 deletions.
32 changes: 28 additions & 4 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ def has_means(self):
"""True if cube-result includes means values."""
return self._cube.has_means

@lazyproperty
def is_smoothed(self):
"""True if last `show_smoothing` property in the last dimension is True"""
last_dimension = self._dimensions[-1]
return last_dimension.show_smoothing

@lazyproperty
def ndim(self):
"""int count of dimensions for this partition."""
Expand Down Expand Up @@ -516,12 +522,19 @@ def scale_mean_pairwise_indices_alt(self):

@lazyproperty
def scale_means_column(self):
"""1D float64 ndarray of column scale means
The calculation is based on multiply of the numeric values by the
row_proportions and divide by the rows_margin.
"""
if np.all(np.isnan(self._columns_dimension_numeric_values)):
return None

inner = np.nansum(self._columns_dimension_numeric_values * self.counts, axis=1)
inner = np.nansum(
self._columns_dimension_numeric_values * self.row_proportions, axis=1
)
not_a_nan_index = ~np.isnan(self._columns_dimension_numeric_values)
denominator = np.sum(self.counts[:, not_a_nan_index], axis=1)
denominator = np.sum(self.row_proportions[:, not_a_nan_index], axis=1)
return inner / denominator

@lazyproperty
Expand All @@ -542,13 +555,19 @@ def scale_means_columns_margin(self):

@lazyproperty
def scale_means_row(self):
"""1D float64 ndarray of row scale means
The calculation is based on multiply of the numeric values by the
column_proportions and divide by the columns_margin.
"""
if np.all(np.isnan(self._rows_dimension_numeric_values)):
return None
inner = np.nansum(
self._rows_dimension_numeric_values[:, None] * self.counts, axis=0
self._rows_dimension_numeric_values[:, None] * self.column_proportions,
axis=0,
)
not_a_nan_index = ~np.isnan(self._rows_dimension_numeric_values)
denominator = np.sum(self.counts[not_a_nan_index, :], axis=0)
denominator = np.sum(self.column_proportions[not_a_nan_index, :], axis=0)
return inner / denominator

@lazyproperty
Expand Down Expand Up @@ -1214,6 +1233,11 @@ class _Nub(CubePartition):
def is_empty(self):
return False if self.unweighted_count else True

@lazyproperty
def is_smoothed(self):
"""A `_Nub` object is not smoothed by default"""
return False

@lazyproperty
def means(self):
return self._scalar.means
Expand Down
32 changes: 16 additions & 16 deletions src/cr/cube/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,21 @@ def prune(self):
def shape(self):
return len(self.all_elements)

@lazyproperty
def show_smoothing(self):
"""Return True if a smoothing transform is active for this dimension."""
smoothing = self._dimension_transforms_dict.get("smoothing")
# --- default is no smoothing when smoothing transform is not present ---
if not smoothing:
return False
# --- no smoothing when dimension is not a categorical date ---
if not self._is_cat_date:
return False
# --- no smoothing when the smoothing transform is inactive ---
if not smoothing.get("show", True):
return False
return True

@lazyproperty
def smooth(self):
"""Function performing smoothing for this dimension, based on transform."""
Expand All @@ -395,7 +410,7 @@ def null_smooth(values):

return (
_SingleSideMovingAvgSmoother.smoothing_function(self._smoothing_window)
if self._show_smoothing
if self.show_smoothing
else null_smooth
)

Expand Down Expand Up @@ -469,21 +484,6 @@ def _is_cat_date(self):
if not category.get("missing", False)
)

@lazyproperty
def _show_smoothing(self):
"""Return True if a smoothing transform is active for this dimension."""
smoothing = self._dimension_transforms_dict.get("smoothing")
# --- default is no smoothing when smoothing transform is not present ---
if not smoothing:
return False
# --- no smoothing when dimension is not a categorical date ---
if not self._is_cat_date:
return False
# --- no smoothing when the smoothing transform is inactive ---
if not smoothing.get("show", True):
return False
return True

@lazyproperty
def _smoothing_window(self):
"""size of the moving window.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[float("NaN"), float("NaN"), 2.71218211, 2.7578529]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[float("NaN"), float("NaN"), 2.173158, 1.967011]
6 changes: 3 additions & 3 deletions tests/integration/test_scale_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,14 +443,14 @@ def test_var_scale_means_for_ca_itmes_x_cat():

# Testing that the scale means (row and col) are equal on the 2 diverse
# datasets
np.testing.assert_array_equal(
np.testing.assert_array_almost_equal(
slice_.var_scale_means_column, slice2_.var_scale_means_row
)

np.testing.assert_almost_equal(
np.testing.assert_array_almost_equal(
slice2_.var_scale_means_row, [2.56410909, 5.17893869, 4.75445248, 4.81611278]
)
np.testing.assert_almost_equal(
np.testing.assert_array_almost_equal(
slice_.var_scale_means_column, [2.56410909, 5.17893869, 4.75445248, 4.81611278]
)

Expand Down
35 changes: 31 additions & 4 deletions tests/integration/test_smoothing.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def it_provides_smoothed_col_pct_for_compatible_cubes(
):
transforms = {
"smoothing": {
"method": "one_side_moving_avg",
"method": "one_sided_moving_avg",
"window": window,
"show": True,
}
Expand All @@ -49,6 +49,33 @@ def it_provides_smoothed_col_pct_for_compatible_cubes(
slice_.column_percentages, load_python_expression(expectation)
)

@pytest.mark.parametrize(
"fixture, window, expectation",
(
(CR.CAT_X_CAT_DATE, 3, "cat-x-cat-date-smoothed-scale-means-w3"),
(CR.CAT_X_CAT_DATE_WGTD, 3, "cat-x-cat-date-smoothed-scale-means-w3"),
(
CR.CA_SUBVAR_X_CA_CAT_X_CAT_DATE,
3,
"ca-subvar-ca-cat-x-cat-date-scale-means-w3",
),
),
)
def it_provides_smoothed_scale_means_for_compatible_cubes(
self, fixture, window, expectation
):
transforms = {
"smoothing": {
"method": "one_sidef_moving_avg",
"window": window,
"show": True,
}
}
slice_ = Cube(fixture, transforms=transforms).partitions[0]
np.testing.assert_array_almost_equal(
slice_.scale_means_row, load_python_expression(expectation)
)

@pytest.mark.parametrize(
"fixture, expectation",
(
Expand All @@ -61,7 +88,7 @@ def it_provides_smoothed_col_pct_for_compatible_cubes(
)
def it_does_not_smooth_col_pct_for_incompatible_cubes(self, fixture, expectation):
transforms = {
"smoothing": {"method": "one_side_moving_avg", "window": 3, "show": True}
"smoothing": {"method": "one_sided_moving_avg", "window": 3, "show": True}
}
cube = Cube(fixture, transforms=transforms)
slice_ = cube.partitions[0]
Expand All @@ -79,7 +106,7 @@ def it_doesnt_smooth_counts_when_window_is_not_valid(self):
class DescribeStrandMeansSmoothing(object):
def it_provides_smoothed_means_cat_date(self):
transforms = {
"smoothing": {"method": "one_side_moving_avg", "window": 3, "show": True}
"smoothing": {"method": "one_sided_moving_avg", "window": 3, "show": True}
}
strand_ = Cube(CR.CAT_DATE_MEAN, transforms=transforms).partitions[0]
np.testing.assert_array_almost_equal(
Expand All @@ -88,7 +115,7 @@ def it_provides_smoothed_means_cat_date(self):

def it_doesnt_smoot_means_mr_mean_filt_wgtd(self):
transforms = {
"smoothing": {"method": "one_side_moving_avg", "window": 3, "show": True}
"smoothing": {"method": "one_sided_moving_avg", "window": 3, "show": True}
}
strand_ = Cube(CR.MR_MEAN_FILT_WGTD, transforms=transforms).partitions[0]
np.testing.assert_array_almost_equal(
Expand Down
47 changes: 47 additions & 0 deletions tests/unit/test_cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,20 @@ def but_scale_mean_pairwise_indices_alt_is_None_when_no_secondary_alpha_specifie

assert slice_.scale_mean_pairwise_indices_alt is None

@pytest.mark.parametrize(
"show_smoothing, expected_value", ((True, True), (False, False))
)
def it_knows_whether_it_is_smoothed(
self, show_smoothing, expected_value, dimension_, _dimensions_prop_
):
dimension_.show_smoothing = show_smoothing
_dimensions_prop_.return_value = (dimension_,)
slice_ = _Slice(None, None, None, None, None)

is_smoothed = slice_.is_smoothed

assert is_smoothed is expected_value

# fixture components ---------------------------------------------

@pytest.fixture
Expand All @@ -321,6 +335,14 @@ def _alpha_alt_prop_(self, request):
def cube_(self, request):
return instance_mock(request, Cube)

@pytest.fixture
def dimension_(self, request):
return instance_mock(request, Dimension)

@pytest.fixture
def _dimensions_prop_(self, request):
return property_mock(request, _Slice, "_dimensions")

@pytest.fixture
def matrix_(self, request):
return instance_mock(request, TransformedMatrix)
Expand Down Expand Up @@ -395,6 +417,20 @@ def it_knows_the_population_fraction(self, cube_):

assert population_fraction == 0.5

@pytest.mark.parametrize(
"show_smoothing, expected_value", ((True, True), (False, False))
)
def it_knows_whether_it_is_smoothed(
self, show_smoothing, expected_value, dimension_, _dimensions_prop_
):
dimension_.show_smoothing = show_smoothing
_dimensions_prop_.return_value = (dimension_,)
strand_ = _Strand(None, None, None, None, None, None)

is_smoothed = strand_.is_smoothed

assert is_smoothed is expected_value

# fixture components ---------------------------------------------

@pytest.fixture
Expand All @@ -405,6 +441,10 @@ def cube_(self, request):
def dimension_(self, request):
return instance_mock(request, Dimension)

@pytest.fixture
def _dimensions_prop_(self, request):
return property_mock(request, _Strand, "_dimensions")

@pytest.fixture
def _rows_dimension_prop_(self, request):
return property_mock(request, _Strand, "_rows_dimension")
Expand Down Expand Up @@ -438,3 +478,10 @@ def it_knows_when_it_is_empty(self, request, unweighted_count, expected_value):

def it_knows_its_cube_is_never_mr_aug(self):
assert _Nub(None).cube_is_mr_aug is False

def it_knows_whether_it_is_smoothed(self):
nub_ = _Nub(None)

is_smoothed = nub_.is_smoothed

assert is_smoothed is False
10 changes: 5 additions & 5 deletions tests/unit/test_dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ def it_knows_its_show_smoothing_property(
_is_cat_date_prop_.return_value = is_cat_date
dimension = Dimension(None, None, dimension_transform)

show_smoothing = dimension._show_smoothing
show_smoothing = dimension.show_smoothing

assert show_smoothing is expected_value

Expand All @@ -600,12 +600,12 @@ def it_knows_its_show_smoothing_property(
)
def it_knows_its_smooth_function(
self,
_show_smoothing_prop_,
show_smoothing_prop_,
_smoothing_window_prop_,
show_smoothing,
expected_value,
):
_show_smoothing_prop_.return_value = show_smoothing
show_smoothing_prop_.return_value = show_smoothing
_smoothing_window_prop_.return_value = 3
dimension = Dimension(None, None)

Expand Down Expand Up @@ -652,8 +652,8 @@ def valid_elements_prop_(self, request):
return property_mock(request, Dimension, "valid_elements")

@pytest.fixture
def _show_smoothing_prop_(self, request):
return property_mock(request, Dimension, "_show_smoothing")
def show_smoothing_prop_(self, request):
return property_mock(request, Dimension, "show_smoothing")

@pytest.fixture
def _is_cat_date_prop_(self, request):
Expand Down

0 comments on commit 8d65088

Please sign in to comment.