From b813ab9d35eb5f9d7a64f6e1925971ab0a171496 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Wed, 20 Jun 2018 12:10:01 +0200 Subject: [PATCH 1/8] [#158458376]: Implement pruning of labels in CubeSlice --- src/cr/cube/cube_slice.py | 21 +++++++++++++++++++++ tests/unit/test_cube_slice.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/src/cr/cube/cube_slice.py b/src/cr/cube/cube_slice.py index c31d703ea..6f3599a07 100644 --- a/src/cr/cube/cube_slice.py +++ b/src/cr/cube/cube_slice.py @@ -150,3 +150,24 @@ def ca_main_axis(self): return 1 - ca_ind else: return None + + def labels(self, hs_dims=None, prune=False): + '''Get labels for the cube slice, and perform pruning by slice.''' + labels = self._cube.labels(include_transforms_for_dims=hs_dims)[-2:] + if not prune: + return labels + + def prune_dimension_labels(labels, prune_indices): + '''Get pruned labels for single dimension, besed on prune inds.''' + labels = [ + label for label, prune in zip(labels, prune_indices) + if not prune + ] + return labels + + prune_indices = self.prune_indices(transforms=hs_dims) + labels = [ + prune_dimension_labels(dim_labels, dim_prune_inds) + for dim_labels, dim_prune_inds in zip(labels, prune_indices) + ] + return labels diff --git a/tests/unit/test_cube_slice.py b/tests/unit/test_cube_slice.py index c17cb1d9d..08160d7e6 100644 --- a/tests/unit/test_cube_slice.py +++ b/tests/unit/test_cube_slice.py @@ -152,3 +152,35 @@ def test_dim_types(self): cube.ndim = 3 actual = CubeSlice(cube, 0).dim_types assert actual == expected + + def test_pruning_2d_labels(self): + '''Test that 2D labels are fetched from cr.cube, and pruned.''' + cube = Mock() + cube.ndim = 2 + cube.prune_indices.return_value = [ + np.array([True, False]), np.array([False, False, True]), + ] + cube.labels.return_value = [ + [Mock(), 'fake_lbl_1'], ['fake_lbl_2', 'fake_lbl_3', Mock()], + ] + actual = CubeSlice(cube, 0).labels(prune=True) + expected = [['fake_lbl_1'], ['fake_lbl_2', 'fake_lbl_3']] + assert actual == expected + + def test_pruning_3d_labels(self): + '''Test that 2D labels are fetched from cr.cube, and pruned.''' + cube = Mock() + cube.ndim = 3 + cube.prune_indices.return_value = [ + Mock(), + (np.array([True, False]), np.array([False, False, True])), + Mock(), + ] + cube.labels.return_value = [ + Mock(), + [Mock(), 'fake_lbl_1'], + ['fake_lbl_2', 'fake_lbl_3', Mock()], + ] + actual = CubeSlice(cube, 1).labels(prune=True) + expected = [['fake_lbl_1'], ['fake_lbl_2', 'fake_lbl_3']] + assert actual == expected From be01b9cdaba280f4fa2ea500ecd47fd081413ed8 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 21 Jun 2018 09:19:01 +0200 Subject: [PATCH 2/8] [#158458376]: Subtotal invalid if all H&S IDs are not in dim element IDs --- src/cr/cube/subtotal.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/cr/cube/subtotal.py b/src/cr/cube/subtotal.py index d47edad66..361c10419 100644 --- a/src/cr/cube/subtotal.py +++ b/src/cr/cube/subtotal.py @@ -21,7 +21,10 @@ def is_valid(self): required_keys = {'anchor', 'args', 'function', 'name'} has_keys = set(self._data.keys()) == required_keys if has_keys: - return self._data['function'] == 'subtotal' + is_subtotal = self._data['function'] == 'subtotal' + hs_ids = self._data['args'] + are_hs_ids_valid = self._validate_hs_ids(hs_ids) + return is_subtotal and are_hs_ids_valid return False @lazyproperty @@ -43,10 +46,15 @@ def anchor(self): def _all_dim_ids(self): return [el.get('id') for el in self._dim.elements(include_missing=True)] + def _validate_hs_ids(self, hs_ids): + element_ids = [el['id'] for el in self._dim.elements()] + return any(arg in element_ids for arg in hs_ids) + @lazyproperty def args(self): '''Get H&S args.''' - if self.is_valid: + hs_ids = self._data['args'] + if self.is_valid and self._validate_hs_ids(hs_ids): return self._data['args'] return [] From 9769a911aed53447a6c813014b09aa287dd28dcb Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 21 Jun 2018 12:36:45 +0200 Subject: [PATCH 3/8] [#158458376]: Update 'has_mr' property, specific to slices --- src/cr/cube/cube_slice.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/cr/cube/cube_slice.py b/src/cr/cube/cube_slice.py index 6f3599a07..ca3ef7695 100644 --- a/src/cr/cube/cube_slice.py +++ b/src/cr/cube/cube_slice.py @@ -171,3 +171,12 @@ def prune_dimension_labels(labels, prune_indices): for dim_labels, dim_prune_inds in zip(labels, prune_indices) ] return labels + + @property + def has_mr(self): + '''True if the slice has MR dimension. + + This property needs to be overridden, because we don't care about the + 0th dimension (and if it's an MR) in the case of a 3D cube. + ''' + return 'multiple_response' in self.dim_types From ee461c316a7e4891593b61a17c2e426e7cbd3444 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 21 Jun 2018 14:02:45 +0200 Subject: [PATCH 4/8] [#158458376]: Fix tests after updating subtotals validation logic --- src/cr/cube/subtotal.py | 4 ++-- tests/unit/test_dimension.py | 21 +++++++++++-------- tests/unit/test_subtotal.py | 40 ++++++++++++++++++++++++++++-------- 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/src/cr/cube/subtotal.py b/src/cr/cube/subtotal.py index 361c10419..0135e85a0 100644 --- a/src/cr/cube/subtotal.py +++ b/src/cr/cube/subtotal.py @@ -53,8 +53,8 @@ def _validate_hs_ids(self, hs_ids): @lazyproperty def args(self): '''Get H&S args.''' - hs_ids = self._data['args'] - if self.is_valid and self._validate_hs_ids(hs_ids): + hs_ids = self._data.get('args', None) + if hs_ids and self.is_valid and self._validate_hs_ids(hs_ids): return self._data['args'] return [] diff --git a/tests/unit/test_dimension.py b/tests/unit/test_dimension.py index 086d51cec..ee817b5c3 100644 --- a/tests/unit/test_dimension.py +++ b/tests/unit/test_dimension.py @@ -180,6 +180,9 @@ def test_dimension_description(self): actual = dim.description self.assertEqual(actual, expected) + @patch('cr.cube.dimension.Dimension._elements', [ + {'id': 1}, {'id': 2}, {'id': 5}, {'id': 4} + ]) @patch('cr.cube.subtotal.Subtotal._all_dim_ids', [1, 2, 4, 5]) @patch('cr.cube.dimension.Dimension._get_type') def test_hs_names_with_bad_data(self, mock_type): @@ -277,9 +280,11 @@ def test_hs_indices_with_empty_indices(self, mock_type): actual = dim.hs_indices self.assertEqual(actual, expected) + @patch('cr.cube.dimension.Dimension.elements') @patch('cr.cube.dimension.Dimension._get_type') - def test_subtotals(self, mock_type): + def test_subtotals(self, mock_type, mock_elements): mock_type.return_value = None + mock_elements.return_value = [{'id': 1}, {'id': 5}] dim_data = { 'references': { 'view': { @@ -308,43 +313,43 @@ def test_inserted_hs_indices_and_order(self, mock_type): 'insertions': [ { u'anchor': u'bottom', - u'args': [], + u'args': [111], u'function': u'subtotal', u'name': u'bottoms up one', }, { u'anchor': u'bottom', - u'args': [], + u'args': [222], u'function': u'subtotal', u'name': u'bottoms up two', }, { u'anchor': u'bottom', - u'args': [], + u'args': [333], u'function': u'subtotal', u'name': u'bottoms up three', }, { u'anchor': u'top', - u'args': [], + u'args': [444], u'function': u'subtotal', u'name': u'on top one', }, { u'anchor': u'top', - u'args': [], + u'args': [555], u'function': u'subtotal', u'name': u'on top two', }, { u'anchor': 333, - u'args': [], + u'args': [555], u'function': u'subtotal', u'name': u'in the middle one', }, { u'anchor': 333, - u'args': [], + u'args': [555], u'function': u'subtotal', u'name': u'in the middle two', } diff --git a/tests/unit/test_subtotal.py b/tests/unit/test_subtotal.py index b287bd367..0b523df3b 100644 --- a/tests/unit/test_subtotal.py +++ b/tests/unit/test_subtotal.py @@ -43,17 +43,29 @@ def test_is_invalid_when_missing_keys(self): self.assertEqual(actual, expected) def test_is_invalid_when_not_subtotal(self): - subtotal = Subtotal(self.invalid_subtotal_2, Mock()) + dim = Mock() + dim.elements.return_value = [{'id': 5}, {'id': 4}] + subtotal = Subtotal(self.invalid_subtotal_2, dim) expected = False actual = subtotal.is_valid self.assertEqual(actual, expected) def test_is_valid(self): - subtotal = Subtotal(self.valid_subtotal, Mock()) + dim = Mock() + dim.elements.return_value = [{'id': 5}, {'id': 4}] + subtotal = Subtotal(self.valid_subtotal, dim) expected = True actual = subtotal.is_valid self.assertEqual(actual, expected) + def test_is_invalid_when_hs_ids_not_in_dim_elements(self): + dim = Mock() + dim.elements.return_value = [{'id': 101}, {'id': 102}] + subtotal = Subtotal(self.valid_subtotal, dim) + expected = False + actual = subtotal.is_valid + self.assertEqual(actual, expected) + def test_anchor_on_invalid_missing_keys(self): subtotal = Subtotal(self.invalid_subtotal_1, Mock()) expected = None @@ -61,38 +73,50 @@ def test_anchor_on_invalid_missing_keys(self): self.assertEqual(actual, expected) def test_anchor_on_invalid_not_subtotal(self): - subtotal = Subtotal(self.invalid_subtotal_2, Mock()) + dim = Mock() + dim.elements.return_value = [{'id': 5}, {'id': 4}] + subtotal = Subtotal(self.invalid_subtotal_2, dim) expected = None actual = subtotal.anchor self.assertEqual(actual, expected) @patch('cr.cube.subtotal.Subtotal._all_dim_ids', [1, 3, 5]) def test_anchor_on_valid(self): - subtotal = Subtotal(self.valid_subtotal, Mock()) + dim = Mock() + dim.elements.return_value = [{'id': 5}, {'id': 4}] + subtotal = Subtotal(self.valid_subtotal, dim) expected = 5 actual = subtotal.anchor self.assertEqual(actual, expected) def test_args_on_invalid_1(self): - subtotal = Subtotal(self.invalid_subtotal_1, Mock()) + dim = Mock() + dim.elements.return_value = [{'id': 5}, {'id': 4}] + subtotal = Subtotal(self.invalid_subtotal_1, dim) expected = [] actual = subtotal.args self.assertEqual(actual, expected) def test_args_on_invalid_2(self): - subtotal = Subtotal(self.invalid_subtotal_2, Mock()) + dim = Mock() + dim.elements.return_value = [{'id': 5}, {'id': 4}] + subtotal = Subtotal(self.invalid_subtotal_2, dim) expected = [] actual = subtotal.args self.assertEqual(actual, expected) def test_args_on_valid(self): - subtotal = Subtotal(self.valid_subtotal, Mock()) + dim = Mock() + dim.elements.return_value = [{'id': 5}, {'id': 4}] + subtotal = Subtotal(self.valid_subtotal, dim) expected = [5, 4] actual = subtotal.args self.assertEqual(actual, expected) def test_anchor_on_uppercased_bottom(self): - subtotal = Subtotal(self.valid_subtotal_anchor_bottom, Mock()) + dim = Mock() + dim.elements.return_value = [{'id': 5}, {'id': 4}] + subtotal = Subtotal(self.valid_subtotal_anchor_bottom, dim) anchor = subtotal.anchor assert anchor == 'bottom' From 91061d11088abc649570daae48e8c002736d9e0f Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Fri, 22 Jun 2018 13:26:00 +0200 Subject: [PATCH 5/8] [#158458376]: Remove y_offset and tests --- src/cr/cube/crunch_cube.py | 28 ------------------ tests/unit/test_crunch_cube.py | 53 ---------------------------------- 2 files changed, 81 deletions(-) diff --git a/src/cr/cube/crunch_cube.py b/src/cr/cube/crunch_cube.py index 265080232..4e12a9c3c 100644 --- a/src/cr/cube/crunch_cube.py +++ b/src/cr/cube/crunch_cube.py @@ -1083,34 +1083,6 @@ def population_counts(self, population_size, weighted=True, prune=prune ) * population_size - def y_offset(self, expand=False, include_transforms_for_dims=None): - '''Gets y offset for sheet manipulation. - - Args: - - expand (bool): If a cube is a categorical array, and it's also a - 0-index cube of multiple cube sequence, it needs to be - displayed in a specific manner (sliced across categories - dimension). This argument enables such behavior. If a CA cube - is the only one (not a part of the multitable), it is output - normally, without expansion. - ''' - if not self.dimensions: - return 4 - - first_dim_length = self.as_array( - include_transforms_for_dims=include_transforms_for_dims - ).shape[0] - - # Special case of CA as a 0-ind cube - if expand and self.dimensions[0].type == 'categorical_array': - return first_dim_length * (len(self.dimensions[1].elements()) + 4) - - if self.ndim <= 2 and self.dimensions[0].type: - return first_dim_length + 4 - return first_dim_length * (self.as_array( - include_transforms_for_dims=include_transforms_for_dims - ).shape[1] + 4) - def index(self, weighted=True, prune=False): '''Get cube index measurement.''' return Index(self, weighted, prune).data diff --git a/tests/unit/test_crunch_cube.py b/tests/unit/test_crunch_cube.py index b9776c4d0..6d2a664b7 100644 --- a/tests/unit/test_crunch_cube.py +++ b/tests/unit/test_crunch_cube.py @@ -194,59 +194,6 @@ def test_test_filter_annotation(self): actual = CrunchCube(mock_cube).filter_annotation self.assertEqual(actual, expected) - @patch('cr.cube.crunch_cube.CrunchCube.dimensions') - def test_y_offset_no_dimensions(self, mock_dims): - dims = [] - mock_dims.__get__ = Mock(return_value=dims) - expected = 4 - actual = CrunchCube({}).y_offset() - self.assertEqual(actual, expected) - - @patch('cr.cube.crunch_cube.CrunchCube.as_array') - @patch('cr.cube.crunch_cube.CrunchCube.dimensions') - def test_y_offset_two_dimensions(self, mock_dims, mock_as_array): - dims = [Mock(), Mock()] - mock_dims.__get__ = Mock(return_value=dims) - mock_as_array.return_value = np.arange(12).reshape(3, 4) - - # Expected is the first dim length + offset of 4 - expected = 3 + 4 - actual = CrunchCube({}).y_offset() - self.assertEqual(actual, expected) - - @patch('cr.cube.crunch_cube.CrunchCube.as_array') - @patch('cr.cube.crunch_cube.CrunchCube.dimensions') - def test_y_offset_three_dimensions(self, mock_dims, mock_as_array): - dims = [Mock(), Mock(), Mock()] - mock_dims.__get__ = Mock(return_value=dims) - mock_as_array.return_value = np.arange(24).reshape(3, 4, 2) - - # Expected is the first dim len * (sedond dim len + offset of 4) - # The first dim is used as a slice, while the second is the number - # of rows of each subtable. Offset is space between tables - expected = 3 * (4 + 4) - actual = CrunchCube({}).y_offset() - self.assertEqual(actual, expected) - - @patch('cr.cube.crunch_cube.CrunchCube.as_array') - @patch('cr.cube.crunch_cube.CrunchCube.dimensions') - def test_y_offset_ca_as_zero_index_cube(self, mock_dims, mock_as_array): - dims = [Mock(), Mock()] - dims[0].type = 'categorical_array' - - # Second dim has total length of 3 - dims[1].elements.return_value = [Mock(), Mock(), Mock()] - mock_dims.__get__ = Mock(return_value=dims) - mock_as_array.return_value = np.arange(12).reshape(3, 4) - - # Expected is the first dim len * (sedond dim len + offset of 4) - # The first dim is used as a slice, while the second is the number - # of rows of each subtable. Offset is space between tables - expected = 3 * (3 + 4) - # 'expand' argument expands the CA dim as slices - actual = CrunchCube({}).y_offset(expand=True) - self.assertEqual(actual, expected) - @patch('cr.cube.crunch_cube.CrunchCube.is_weighted', False) def test_n_unweighted_and_has_no_weight(self): unweighted_count = Mock() From bddc6269f5dcf29e5644801fd7e3ecb2e06b262c Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Mon, 25 Jun 2018 09:12:50 +0200 Subject: [PATCH 6/8] [#158458376]: Inline function call --- src/cr/cube/cube_slice.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cr/cube/cube_slice.py b/src/cr/cube/cube_slice.py index ca3ef7695..d550681ed 100644 --- a/src/cr/cube/cube_slice.py +++ b/src/cr/cube/cube_slice.py @@ -165,10 +165,10 @@ def prune_dimension_labels(labels, prune_indices): ] return labels - prune_indices = self.prune_indices(transforms=hs_dims) labels = [ prune_dimension_labels(dim_labels, dim_prune_inds) - for dim_labels, dim_prune_inds in zip(labels, prune_indices) + for dim_labels, dim_prune_inds in + zip(labels, self.prune_indices(transforms=hs_dims)) ] return labels From deccae38780953113bbdd6ca3d74a772f14ecac0 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Mon, 25 Jun 2018 09:18:54 +0200 Subject: [PATCH 7/8] [#158458376]: Refactor validity check --- src/cr/cube/subtotal.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/cr/cube/subtotal.py b/src/cr/cube/subtotal.py index 0135e85a0..cd8cdfeb1 100644 --- a/src/cr/cube/subtotal.py +++ b/src/cr/cube/subtotal.py @@ -20,11 +20,8 @@ def is_valid(self): if isinstance(self._data, dict): required_keys = {'anchor', 'args', 'function', 'name'} has_keys = set(self._data.keys()) == required_keys - if has_keys: - is_subtotal = self._data['function'] == 'subtotal' - hs_ids = self._data['args'] - are_hs_ids_valid = self._validate_hs_ids(hs_ids) - return is_subtotal and are_hs_ids_valid + if has_keys and self._data['function'] == 'subtotal': + return self._validate_hs_ids(self._data['args']) return False @lazyproperty From 168acc9b6ee66c095129a5a06867fe1c74a0b812 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Mon, 25 Jun 2018 09:27:31 +0200 Subject: [PATCH 8/8] [#158458376]: More refactoring (purge obsolete validation method) --- src/cr/cube/subtotal.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/cr/cube/subtotal.py b/src/cr/cube/subtotal.py index cd8cdfeb1..0a9191f54 100644 --- a/src/cr/cube/subtotal.py +++ b/src/cr/cube/subtotal.py @@ -21,7 +21,10 @@ def is_valid(self): required_keys = {'anchor', 'args', 'function', 'name'} has_keys = set(self._data.keys()) == required_keys if has_keys and self._data['function'] == 'subtotal': - return self._validate_hs_ids(self._data['args']) + return any( + element for element in self._dim.elements() + if element['id'] in self._data['args'] + ) return False @lazyproperty @@ -43,16 +46,12 @@ def anchor(self): def _all_dim_ids(self): return [el.get('id') for el in self._dim.elements(include_missing=True)] - def _validate_hs_ids(self, hs_ids): - element_ids = [el['id'] for el in self._dim.elements()] - return any(arg in element_ids for arg in hs_ids) - @lazyproperty def args(self): '''Get H&S args.''' hs_ids = self._data.get('args', None) - if hs_ids and self.is_valid and self._validate_hs_ids(hs_ids): - return self._data['args'] + if hs_ids and self.is_valid: + return hs_ids return [] @lazyproperty