Skip to content

Commit

Permalink
Merge branch 'pvals-with-hs-154599334' into rel-4.2.33
Browse files Browse the repository at this point in the history
  • Loading branch information
Crunch.io Jenkins Account committed Jan 26, 2018
2 parents 5d60817 + 6e05707 commit a165588
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 10 deletions.
40 changes: 30 additions & 10 deletions src/cr/cube/crunch_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,13 +293,27 @@ def _calculate_constraints_sum(cls, prop_table, prop_margin, axis):
# (because of the inner matrix dimensions).
return np.dot(prop_margin, V)

def _calculate_standard_error(self, axis):
total = self._margin(weighted=False, adjusted=True)
def _calculate_standard_error(self, axis, include_transforms_for_dims=None):
total = self._margin(
weighted=False,
adjusted=True,
include_transforms_for_dims=include_transforms_for_dims,
)
# Calculate margin across axis, as percentages of the total count
margin = self._margin(axis=axis, weighted=False, adjusted=True) / total
margin = self._margin(
axis=axis,
weighted=False,
adjusted=True,
include_transforms_for_dims=include_transforms_for_dims
) / total
# Adjusted proportions table, necessary for the standard error,
# because of the division by it.
props = self._proportions(axis=axis, weighted=False, adjusted=True)
props = self._proportions(
axis=axis,
weighted=False,
adjusted=True,
include_transforms_for_dims=include_transforms_for_dims
)

constraints = self._calculate_constraints_sum(props, margin, axis)
if axis == 0:
Expand All @@ -319,27 +333,33 @@ def _calculate_standard_error(self, axis):

return np.sqrt((magic_d * props * (1 - props) + constraints) / total)

def _calculate_statistics(self, axis):
def _calculate_statistics(self, axis, include_transforms_for_dims=None):
if axis not in [0, 1]:
raise ValueError('Unexpected value for `axis`: {}'.format(axis))

props = self.proportions(axis=axis)
props = self.proportions(axis=axis, include_transforms_for_dims=include_transforms_for_dims)

# Statistics are calculated by operating on both axes' margins. In this
# function, we need to determine the cross-axis (other than the one
# we're doing the calculation for), in order to be able to calculate
# *that* margin, which will serve as the basis for the
# statistics calculation.
cross_axis = 0 if axis == 1 else 1
cross_margin = self.margin(axis=cross_axis) / self.margin()
cross_margin = self.margin(
axis=cross_axis,
include_transforms_for_dims=include_transforms_for_dims
) / self.margin(include_transforms_for_dims=include_transforms_for_dims)

if cross_axis == 1:
# If the row proportional margins are required, they also need to
# be broadcast into the vector column shape, in order to be able to
# perform the subtration from the matrix.
cross_margin = cross_margin[:, np.newaxis]

return (props - cross_margin) / self._calculate_standard_error(axis)
return (
(props - cross_margin) /
self._calculate_standard_error(axis, include_transforms_for_dims)
)

def _double_mr_proportions(self, axis, weighted):
all_dimensions = self._get_dimensions(self._cube)
Expand Down Expand Up @@ -833,7 +853,7 @@ def percentages(self, axis=None):
'''
return self.proportions(axis) * 100

def pvals(self, axis):
def pvals(self, axis, include_transforms_for_dims=None):
'''Calculate p-vals.
This function calculates statistically signifficant results for
Expand All @@ -849,7 +869,7 @@ def pvals(self, axis):
cell of the table-like representation of the
crunch cube.
'''
stats = self._calculate_statistics(axis)
stats = self._calculate_statistics(axis, include_transforms_for_dims)
sign = np.sign(stats)

p_values = 2 * (1 - norm.cdf(np.abs(stats)))
Expand Down
11 changes: 11 additions & 0 deletions tests/integration/test_crunch_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -1497,6 +1497,17 @@ def test_subtotals_as_array_2d_cube_with_hs_on_row(self):
actual = cube.as_array(include_transforms_for_dims=[0, 1])
np.testing.assert_array_equal(actual, expected)

def test_subtotals_pvals_2d_cube_with_hs_on_row(self):
'''Ensure that pvals shape is the same as table shape with H%S'''
cube = CrunchCube(FIXT_ECON_BLAME_X_IDEOLOGY_ROW_HS)
expected = 6, 6
actual = cube.pvals(axis=0, include_transforms_for_dims=[0, 1]).shape

# Only assert the shape of the table, as the pvals are going to be
# rewritten soon.
# TODO: Change assertion after merging new Z-scores.
np.testing.assert_array_equal(actual, expected)

def test_subtotals_as_array_2d_cube_with_hs_on_col(self):
cube = CrunchCube(FIXT_ECON_BLAME_X_IDEOLOGY_COL_HS)
expected = np.array([
Expand Down

0 comments on commit a165588

Please sign in to comment.