Skip to content

Commit

Permalink
add TotalShareSum measure
Browse files Browse the repository at this point in the history
  • Loading branch information
ernestoarbitrio committed Mar 30, 2021
1 parent 2a86555 commit f8d36fc
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 1 deletion.
16 changes: 16 additions & 0 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,22 @@ def table_weighted_bases(self):
"""2D np.float64 ndarray of table-proportion denominator for each cell."""
return self._assembler.table_weighted_bases

@lazyproperty
def total_share_sum(self):
"""2D optional np.float64 ndarray of total share sum value for each table cell.
Raises `ValueError` if the cube-result does not include a sum cube-measure.
Total share of sum is the sum of each subvar item divided by the TOTAL of items.
"""
try:
return self._assembler.total_share_sum
except ValueError:
raise ValueError(
"`.total_share_sum` is undefined for a cube-result without a sum "
"measure"
)

@lazyproperty
def unweighted_counts(self):
"""2D np.float64 ndarray of unweighted count for each slice matrix cell."""
Expand Down
8 changes: 8 additions & 0 deletions src/cr/cube/matrix/assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,14 @@ def table_weighted_bases(self):
"""2D np.float64 ndarray of weighted table-proportion denominator per cell."""
return self._assemble_matrix(self._measures.table_weighted_bases.blocks)

@lazyproperty
def total_share_sum(self):
"""2D optional np.float64 ndarray of total share of sum for each cell.
Raises `ValueError` if the cube-result does not include a sum cube-measure.
"""
return self._assemble_matrix(self._measures.total_share_sum.blocks)

@lazyproperty
def unweighted_counts(self):
"""2D np.float64 ndarray of unweighted-count for each cell."""
Expand Down
42 changes: 42 additions & 0 deletions src/cr/cube/matrix/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ def table_weighted_bases(self):
"""_TableWeightedBases measure object for this cube-result."""
return _TableWeightedBases(self._dimensions, self, self._cube_measures)

@lazyproperty
def total_share_sum(self):
"""_TotalShareSum measure object for this cube-result"""
return _TotalShareSum(self._dimensions, self, self._cube_measures)

@lazyproperty
def unweighted_counts(self):
"""_UnweightedCounts measure object for this cube-result."""
Expand Down Expand Up @@ -1010,6 +1015,43 @@ def _subtotal_rows(self):
)


class _TotalShareSum(_BaseSecondOrderMeasure):
"""Provides the row share of sum measure for a matrix.
Row share sum is the sum of each subvar divided by the TOTAL number of row items.
"""

@lazyproperty
def blocks(self):
"""2D array of the four 2D "blocks" making up this measure.
These are the base-values, the column-subtotals, the row-subtotals, and the
subtotal intersection-cell values.
"""
sums_blocks = SumSubtotals.blocks(
self._cube_measures.cube_sum.sums,
self._dimensions,
diff_cols_nan=True,
diff_rows_nan=True,
)
# --- do not propagate divide-by-zero warnings to stderr ---
with np.errstate(divide="ignore", invalid="ignore"):
return [
[
# --- base values ---
sums_blocks[0][0] / np.sum(sums_blocks[0][0]),
# --- inserted columns ---
sums_blocks[0][1] / np.sum(sums_blocks[0][1]),
],
[
# --- inserted rows ---
sums_blocks[1][0] / np.sum(sums_blocks[1][0]),
# --- intersections ---
sums_blocks[1][1] / np.sum(sums_blocks[1][1]),
],
]


class _UnweightedCounts(_BaseSecondOrderMeasure):
"""Provides the unweighted-counts measure for a matrix."""

Expand Down
16 changes: 16 additions & 0 deletions tests/integration/test_cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ def it_provides_values_for_cat_x_cat(self):
[0.3333333, 0.1333333],
[0.3333333, 0.2000000],
]
with pytest.raises(ValueError) as e:
slice_.total_share_sum
assert (
str(e.value)
== "`.total_share_sum` is undefined for a cube-result without a sum "
"measure"
)
assert slice_.unweighted_counts.tolist() == [[5, 2], [5, 3]]
assert slice_.variable_name == "v7"

Expand Down Expand Up @@ -891,6 +898,15 @@ def it_provides_share_of_sum_measure_for_mr_x_mr(self):
]
)
)
assert slice_.total_share_sum == pytest.approx(
np.array(
[
[0.11111111, 0.05555556, 0.11111111],
[0.05555556, 0.11111111, 0.16666667],
[0.05555556, 0.11111111, 0.22222222],
]
)
)


class Describe_Strand(object):
Expand Down
20 changes: 20 additions & 0 deletions tests/integration/test_headers_and_subtotals.py
Original file line number Diff line number Diff line change
Expand Up @@ -4068,6 +4068,16 @@ def it_computes_share_of_sum_for_numarray_with_subdiffs_and_subtot_on_columns(se
),
nan_ok=True,
)
assert slice_.total_share_sum == pytest.approx(
np.array(
[
[np.nan, 0.26666667, 0.2, np.nan],
[np.nan, 0.2, 0.0, np.nan],
[np.nan, 0.13333333, 0.2, np.nan],
]
),
nan_ok=True,
)

# pruning
slice_ = Cube(NA.NUM_ARR_SUM_GROUPED_BY_CAT).partitions[0]
Expand All @@ -4090,3 +4100,13 @@ def it_computes_share_of_sum_for_numarray_with_subdiffs_and_subtot_on_columns(se
),
nan_ok=True,
)
assert slice_.total_share_sum == pytest.approx(
np.array(
[
[0.26666667, 0.2],
[0.2, 0.0],
[0.13333333, 0.2],
]
),
nan_ok=True,
)
26 changes: 25 additions & 1 deletion tests/integration/test_numeric_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,24 @@ def it_provides_share_of_sum_for_num_array_grouped_by_cat(self):
# --------------Gender----------------
# M F
[0.5714285, 0.4285714], # S1 (Ticket Sold)
[1.0000, 0.0000], # S2 (Ticket Sold)
[1.00, 0.0], # S2 (Ticket Sold)
[0.4000, 0.6000], # S3 (Ticket Sold)
],
),
nan_ok=True,
)
assert slice_.total_share_sum == pytest.approx(
np.array(
[
# --------------Gender----------------
# M F
[0.26666667, 0.2], # S1 (Ticket Sold)
[0.2, 0.0], # S2 (Ticket Sold)
[0.13333333, 0.2], # S3 (Ticket Sold)
],
),
nan_ok=True,
)
assert slice_.columns_base == pytest.approx(np.array([[3, 2], [3, 2], [3, 2]]))

def it_provides_share_of_sum_for_num_array_x_mr(self):
Expand Down Expand Up @@ -271,3 +283,15 @@ def it_provides_share_of_sum_for_num_array_x_mr(self):
),
nan_ok=True,
)
assert slice_.total_share_sum == pytest.approx(
np.array(
[
# --------------MR----------------
# S1 S2 S3
[0.27906977, 0.11627907, 0.11627907], # S1 (num arr)
[0.20930233, 0.0, 0.0], # S2 (num arr)
[0.09302326, 0.09302326, 0.09302326], # S3 (num arr)
],
),
nan_ok=True,
)
33 changes: 33 additions & 0 deletions tests/unit/matrix/test_measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
_Sums,
_TableUnweightedBases,
_TableWeightedBases,
_TotalShareSum,
_UnweightedCounts,
_WeightedCounts,
)
Expand Down Expand Up @@ -778,6 +779,38 @@ def it_computes_its_blocks(self, request):
SumSubtotals_.blocks.assert_called_once_with(ANY, None, True, True)


class Describe_TotalShareSum(object):
"""Unit test suite for `cr.cube.matrix.measure._RowShareSum` object."""

def it_computes_its_blocks(self, request):
SumSubtotals_ = class_mock(request, "cr.cube.matrix.measure.SumSubtotals")
SumSubtotals_.blocks.return_value = [
np.array([[[5.0, 12.0]], [[21.0, 32.0]]]),
np.array([[[]], [[]]]),
np.array([[[]], [[]]]),
np.array([[[]], [[]]]),
]
sums_blocks_ = instance_mock(
request, _Sums, blocks=np.array([[[5.0, 6.0]], [[7.0, 8.0]]])
)
second_order_measures_ = instance_mock(
request,
SecondOrderMeasures,
sums=sums_blocks_,
)
cube_measures_ = class_mock(request, "cr.cube.matrix.cubemeasure.CubeMeasures")

total_share_sum = _TotalShareSum(None, second_order_measures_, cube_measures_)

assert total_share_sum.blocks[0][0] == pytest.approx(
np.array([[0.29411765, 0.70588235]])
)
assert total_share_sum.blocks[0][1] == pytest.approx(
np.array([[0.3962264, 0.6037735]])
)
SumSubtotals_.blocks.assert_called_once_with(ANY, None, True, True)


class Describe_TableUnweightedBases(object):
"""Unit test suite for `cr.cube.matrix.measure._TableUnweightedBases` object."""

Expand Down

0 comments on commit f8d36fc

Please sign in to comment.