Skip to content

Commit

Permalink
smoothing: smoothed values for scale means
Browse files Browse the repository at this point in the history
  • Loading branch information
ernestoarbitrio committed Sep 23, 2020
1 parent b3fbd03 commit 4a41387
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 7 deletions.
21 changes: 17 additions & 4 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,12 +516,19 @@ def scale_mean_pairwise_indices_alt(self):

@lazyproperty
def scale_means_column(self):
"""1D float64 ndarray of column scale means
The calculation is based on multiply of the numeric values by the
row_proportions and divide by the rows_margin.
"""
if np.all(np.isnan(self._columns_dimension_numeric_values)):
return None

inner = np.nansum(self._columns_dimension_numeric_values * self.counts, axis=1)
inner = np.nansum(
self._columns_dimension_numeric_values * self.row_proportions, axis=1
)
not_a_nan_index = ~np.isnan(self._columns_dimension_numeric_values)
denominator = np.sum(self.counts[:, not_a_nan_index], axis=1)
denominator = np.sum(self.row_proportions[:, not_a_nan_index], axis=1)
return inner / denominator

@lazyproperty
Expand All @@ -542,13 +549,19 @@ def scale_means_columns_margin(self):

@lazyproperty
def scale_means_row(self):
"""1D float64 ndarray of row scale means
The calculation is based on multiply of the numeric values by the
column_proportions and divide by the columns_margin.
"""
if np.all(np.isnan(self._rows_dimension_numeric_values)):
return None
inner = np.nansum(
self._rows_dimension_numeric_values[:, None] * self.counts, axis=0
self._rows_dimension_numeric_values[:, None] * self.column_proportions,
axis=0,
)
not_a_nan_index = ~np.isnan(self._rows_dimension_numeric_values)
denominator = np.sum(self.counts[not_a_nan_index, :], axis=0)
denominator = np.sum(self.column_proportions[not_a_nan_index, :], axis=0)
return inner / denominator

@lazyproperty
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[float("NaN"), float("NaN"), 2.71218211, 2.7578529]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[float("NaN"), float("NaN"), 2.173158, 1.967011]
6 changes: 3 additions & 3 deletions tests/integration/test_scale_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,14 +443,14 @@ def test_var_scale_means_for_ca_itmes_x_cat():

# Testing that the scale means (row and col) are equal on the 2 diverse
# datasets
np.testing.assert_array_equal(
np.testing.assert_array_almost_equal(
slice_.var_scale_means_column, slice2_.var_scale_means_row
)

np.testing.assert_almost_equal(
np.testing.assert_array_almost_equal(
slice2_.var_scale_means_row, [2.56410909, 5.17893869, 4.75445248, 4.81611278]
)
np.testing.assert_almost_equal(
np.testing.assert_array_almost_equal(
slice_.var_scale_means_column, [2.56410909, 5.17893869, 4.75445248, 4.81611278]
)

Expand Down
27 changes: 27 additions & 0 deletions tests/integration/test_smoothing.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,33 @@ def it_provides_smoothed_col_pct_for_compatible_cubes(
slice_.column_percentages, load_python_expression(expectation)
)

@pytest.mark.parametrize(
"fixture, window, expectation",
(
(CR.CAT_X_CAT_DATE, 3, "cat-x-cat-date-smoothed-scale-means-w3"),
(CR.CAT_X_CAT_DATE_WGTD, 3, "cat-x-cat-date-smoothed-scale-means-w3"),
(
CR.CA_SUBVAR_X_CA_CAT_X_CAT_DATE,
3,
"ca-subvar-ca-cat-x-cat-date-scale-means-w3",
),
),
)
def it_provides_smoothed_scale_means_for_compatible_cubes(
self, fixture, window, expectation
):
transforms = {
"smoothing": {
"method": "one_side_moving_avg",
"window": window,
"show": True,
}
}
slice_ = Cube(fixture, transforms=transforms).partitions[0]
np.testing.assert_array_almost_equal(
slice_.scale_means_row, load_python_expression(expectation)
)

@pytest.mark.parametrize(
"fixture, expectation",
(
Expand Down

0 comments on commit 4a41387

Please sign in to comment.