From f8d36fc377368a425f7a9e46261c550eb8e0571c Mon Sep 17 00:00:00 2001 From: Arbitrio Date: Tue, 30 Mar 2021 14:47:14 +0200 Subject: [PATCH] add TotalShareSum measure --- src/cr/cube/cubepart.py | 16 +++++++ src/cr/cube/matrix/assembler.py | 8 ++++ src/cr/cube/matrix/measure.py | 42 +++++++++++++++++++ tests/integration/test_cubepart.py | 16 +++++++ .../integration/test_headers_and_subtotals.py | 20 +++++++++ tests/integration/test_numeric_array.py | 26 +++++++++++- tests/unit/matrix/test_measure.py | 33 +++++++++++++++ 7 files changed, 160 insertions(+), 1 deletion(-) diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index 7729bb866..cfc9c961c 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -1162,6 +1162,22 @@ def table_weighted_bases(self): """2D np.float64 ndarray of table-proportion denominator for each cell.""" return self._assembler.table_weighted_bases + @lazyproperty + def total_share_sum(self): + """2D optional np.float64 ndarray of total share sum value for each table cell. + + Raises `ValueError` if the cube-result does not include a sum cube-measure. + + Total share of sum is the sum of each subvar item divided by the TOTAL of items. + """ + try: + return self._assembler.total_share_sum + except ValueError: + raise ValueError( + "`.total_share_sum` is undefined for a cube-result without a sum " + "measure" + ) + @lazyproperty def unweighted_counts(self): """2D np.float64 ndarray of unweighted count for each slice matrix cell.""" diff --git a/src/cr/cube/matrix/assembler.py b/src/cr/cube/matrix/assembler.py index a6db7c0b5..fb0f9b374 100644 --- a/src/cr/cube/matrix/assembler.py +++ b/src/cr/cube/matrix/assembler.py @@ -475,6 +475,14 @@ def table_weighted_bases(self): """2D np.float64 ndarray of weighted table-proportion denominator per cell.""" return self._assemble_matrix(self._measures.table_weighted_bases.blocks) + @lazyproperty + def total_share_sum(self): + """2D optional np.float64 ndarray of total share of sum for each cell. + + Raises `ValueError` if the cube-result does not include a sum cube-measure. + """ + return self._assemble_matrix(self._measures.total_share_sum.blocks) + @lazyproperty def unweighted_counts(self): """2D np.float64 ndarray of unweighted-count for each cell.""" diff --git a/src/cr/cube/matrix/measure.py b/src/cr/cube/matrix/measure.py index 01636d00a..c8cc6f784 100644 --- a/src/cr/cube/matrix/measure.py +++ b/src/cr/cube/matrix/measure.py @@ -113,6 +113,11 @@ def table_weighted_bases(self): """_TableWeightedBases measure object for this cube-result.""" return _TableWeightedBases(self._dimensions, self, self._cube_measures) + @lazyproperty + def total_share_sum(self): + """_TotalShareSum measure object for this cube-result""" + return _TotalShareSum(self._dimensions, self, self._cube_measures) + @lazyproperty def unweighted_counts(self): """_UnweightedCounts measure object for this cube-result.""" @@ -1010,6 +1015,43 @@ def _subtotal_rows(self): ) +class _TotalShareSum(_BaseSecondOrderMeasure): + """Provides the row share of sum measure for a matrix. + + Row share sum is the sum of each subvar divided by the TOTAL number of row items. + """ + + @lazyproperty + def blocks(self): + """2D array of the four 2D "blocks" making up this measure. + + These are the base-values, the column-subtotals, the row-subtotals, and the + subtotal intersection-cell values. + """ + sums_blocks = SumSubtotals.blocks( + self._cube_measures.cube_sum.sums, + self._dimensions, + diff_cols_nan=True, + diff_rows_nan=True, + ) + # --- do not propagate divide-by-zero warnings to stderr --- + with np.errstate(divide="ignore", invalid="ignore"): + return [ + [ + # --- base values --- + sums_blocks[0][0] / np.sum(sums_blocks[0][0]), + # --- inserted columns --- + sums_blocks[0][1] / np.sum(sums_blocks[0][1]), + ], + [ + # --- inserted rows --- + sums_blocks[1][0] / np.sum(sums_blocks[1][0]), + # --- intersections --- + sums_blocks[1][1] / np.sum(sums_blocks[1][1]), + ], + ] + + class _UnweightedCounts(_BaseSecondOrderMeasure): """Provides the unweighted-counts measure for a matrix.""" diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index 723ad0276..411601830 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -104,6 +104,13 @@ def it_provides_values_for_cat_x_cat(self): [0.3333333, 0.1333333], [0.3333333, 0.2000000], ] + with pytest.raises(ValueError) as e: + slice_.total_share_sum + assert ( + str(e.value) + == "`.total_share_sum` is undefined for a cube-result without a sum " + "measure" + ) assert slice_.unweighted_counts.tolist() == [[5, 2], [5, 3]] assert slice_.variable_name == "v7" @@ -891,6 +898,15 @@ def it_provides_share_of_sum_measure_for_mr_x_mr(self): ] ) ) + assert slice_.total_share_sum == pytest.approx( + np.array( + [ + [0.11111111, 0.05555556, 0.11111111], + [0.05555556, 0.11111111, 0.16666667], + [0.05555556, 0.11111111, 0.22222222], + ] + ) + ) class Describe_Strand(object): diff --git a/tests/integration/test_headers_and_subtotals.py b/tests/integration/test_headers_and_subtotals.py index 3d241f63f..73d86e3ce 100644 --- a/tests/integration/test_headers_and_subtotals.py +++ b/tests/integration/test_headers_and_subtotals.py @@ -4068,6 +4068,16 @@ def it_computes_share_of_sum_for_numarray_with_subdiffs_and_subtot_on_columns(se ), nan_ok=True, ) + assert slice_.total_share_sum == pytest.approx( + np.array( + [ + [np.nan, 0.26666667, 0.2, np.nan], + [np.nan, 0.2, 0.0, np.nan], + [np.nan, 0.13333333, 0.2, np.nan], + ] + ), + nan_ok=True, + ) # pruning slice_ = Cube(NA.NUM_ARR_SUM_GROUPED_BY_CAT).partitions[0] @@ -4090,3 +4100,13 @@ def it_computes_share_of_sum_for_numarray_with_subdiffs_and_subtot_on_columns(se ), nan_ok=True, ) + assert slice_.total_share_sum == pytest.approx( + np.array( + [ + [0.26666667, 0.2], + [0.2, 0.0], + [0.13333333, 0.2], + ] + ), + nan_ok=True, + ) diff --git a/tests/integration/test_numeric_array.py b/tests/integration/test_numeric_array.py index d21acaf54..6c0e878e0 100644 --- a/tests/integration/test_numeric_array.py +++ b/tests/integration/test_numeric_array.py @@ -224,12 +224,24 @@ def it_provides_share_of_sum_for_num_array_grouped_by_cat(self): # --------------Gender---------------- # M F [0.5714285, 0.4285714], # S1 (Ticket Sold) - [1.0000, 0.0000], # S2 (Ticket Sold) + [1.00, 0.0], # S2 (Ticket Sold) [0.4000, 0.6000], # S3 (Ticket Sold) ], ), nan_ok=True, ) + assert slice_.total_share_sum == pytest.approx( + np.array( + [ + # --------------Gender---------------- + # M F + [0.26666667, 0.2], # S1 (Ticket Sold) + [0.2, 0.0], # S2 (Ticket Sold) + [0.13333333, 0.2], # S3 (Ticket Sold) + ], + ), + nan_ok=True, + ) assert slice_.columns_base == pytest.approx(np.array([[3, 2], [3, 2], [3, 2]])) def it_provides_share_of_sum_for_num_array_x_mr(self): @@ -271,3 +283,15 @@ def it_provides_share_of_sum_for_num_array_x_mr(self): ), nan_ok=True, ) + assert slice_.total_share_sum == pytest.approx( + np.array( + [ + # --------------MR---------------- + # S1 S2 S3 + [0.27906977, 0.11627907, 0.11627907], # S1 (num arr) + [0.20930233, 0.0, 0.0], # S2 (num arr) + [0.09302326, 0.09302326, 0.09302326], # S3 (num arr) + ], + ), + nan_ok=True, + ) diff --git a/tests/unit/matrix/test_measure.py b/tests/unit/matrix/test_measure.py index 1adb8ba69..38afbd120 100644 --- a/tests/unit/matrix/test_measure.py +++ b/tests/unit/matrix/test_measure.py @@ -26,6 +26,7 @@ _Sums, _TableUnweightedBases, _TableWeightedBases, + _TotalShareSum, _UnweightedCounts, _WeightedCounts, ) @@ -778,6 +779,38 @@ def it_computes_its_blocks(self, request): SumSubtotals_.blocks.assert_called_once_with(ANY, None, True, True) +class Describe_TotalShareSum(object): + """Unit test suite for `cr.cube.matrix.measure._RowShareSum` object.""" + + def it_computes_its_blocks(self, request): + SumSubtotals_ = class_mock(request, "cr.cube.matrix.measure.SumSubtotals") + SumSubtotals_.blocks.return_value = [ + np.array([[[5.0, 12.0]], [[21.0, 32.0]]]), + np.array([[[]], [[]]]), + np.array([[[]], [[]]]), + np.array([[[]], [[]]]), + ] + sums_blocks_ = instance_mock( + request, _Sums, blocks=np.array([[[5.0, 6.0]], [[7.0, 8.0]]]) + ) + second_order_measures_ = instance_mock( + request, + SecondOrderMeasures, + sums=sums_blocks_, + ) + cube_measures_ = class_mock(request, "cr.cube.matrix.cubemeasure.CubeMeasures") + + total_share_sum = _TotalShareSum(None, second_order_measures_, cube_measures_) + + assert total_share_sum.blocks[0][0] == pytest.approx( + np.array([[0.29411765, 0.70588235]]) + ) + assert total_share_sum.blocks[0][1] == pytest.approx( + np.array([[0.3962264, 0.6037735]]) + ) + SumSubtotals_.blocks.assert_called_once_with(ANY, None, True, True) + + class Describe_TableUnweightedBases(object): """Unit test suite for `cr.cube.matrix.measure._TableUnweightedBases` object."""