From c5f87c8d217e34b4b42cce5d8796b96131dce4e2 Mon Sep 17 00:00:00 2001 From: Adrian Lara Date: Thu, 28 May 2020 12:50:18 -0600 Subject: [PATCH 1/4] during import, ignore units_pint during get_or_create for raw Column --- seed/models/columns.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/seed/models/columns.py b/seed/models/columns.py index aa41fe9be1..2f30d61f99 100644 --- a/seed/models/columns.py +++ b/seed/models/columns.py @@ -982,7 +982,6 @@ def select_col_obj(column_name, table_name, organization_column): organization=organization, table_name__in=[None, ''], column_name=field['from_field'], - units_pint=field.get('from_units'), # might be None is_extra_data=False # data from header rows in the files are NEVER extra data ) except Column.MultipleObjectsReturned: @@ -994,8 +993,6 @@ def select_col_obj(column_name, table_name, organization_column): from_org_col = Column.objects.filter(organization=organization, table_name__in=[None, ''], column_name=field['from_field'], - units_pint=field.get('from_units'), - # might be None is_extra_data=is_extra_data).first() _log.debug("Grabbing the first from_column") From 1b1f907c2439e19c2c646a1997c8aa00d9a4019c Mon Sep 17 00:00:00 2001 From: Adrian Lara Date: Thu, 28 May 2020 13:31:19 -0600 Subject: [PATCH 2/4] during import, raw Columns with the same name are deduped --- seed/models/columns.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/seed/models/columns.py b/seed/models/columns.py index 2f30d61f99..505cfb6189 100644 --- a/seed/models/columns.py +++ b/seed/models/columns.py @@ -989,12 +989,23 @@ def select_col_obj(column_name, table_name, organization_column): "More than one from_column found for {}.{}".format(field['to_table_name'], field['to_field'])) - # TODO: write something to remove the duplicate columns - from_org_col = Column.objects.filter(organization=organization, - table_name__in=[None, ''], - column_name=field['from_field'], - is_extra_data=is_extra_data).first() - _log.debug("Grabbing the first from_column") + all_from_cols = Column.objects.filter( + organization=organization, + table_name__in=[None, ''], + column_name=field['from_field'], + is_extra_data=False + ) + + ColumnMapping.objects.filter(column_raw__id__in=models.Subquery(all_from_cols.values('id'))).delete() + all_from_cols.delete() + + from_org_col, _ = Column.objects.get_or_create( + organization=organization, + table_name__in=[None, ''], + column_name=field['from_field'], + is_extra_data=False # data from header rows in the files are NEVER extra data + ) + _log.debug("Creating a new from_column") new_field['to_column_object'] = select_col_obj(field['to_field'], field['to_table_name'], to_org_col) From 9d7850613c53a393bf2212749f0910c7984fd413 Mon Sep 17 00:00:00 2001 From: Adrian Lara Date: Thu, 4 Jun 2020 16:27:22 -0600 Subject: [PATCH 3/4] Clarify Column's _column_fields_to_columns dedup raw col objects logic --- seed/models/columns.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/seed/models/columns.py b/seed/models/columns.py index 505cfb6189..611c60c004 100644 --- a/seed/models/columns.py +++ b/seed/models/columns.py @@ -982,9 +982,11 @@ def select_col_obj(column_name, table_name, organization_column): organization=organization, table_name__in=[None, ''], column_name=field['from_field'], - is_extra_data=False # data from header rows in the files are NEVER extra data + is_extra_data=False # Column objects representing raw/header rows are NEVER extra data ) except Column.MultipleObjectsReturned: + # We want to avoid the ambiguity of having multiple Column objects for a specific raw column. + # To do that, delete all multiples along with any associated ColumnMapping objects. _log.debug( "More than one from_column found for {}.{}".format(field['to_table_name'], field['to_field'])) @@ -999,11 +1001,11 @@ def select_col_obj(column_name, table_name, organization_column): ColumnMapping.objects.filter(column_raw__id__in=models.Subquery(all_from_cols.values('id'))).delete() all_from_cols.delete() - from_org_col, _ = Column.objects.get_or_create( + from_org_col = Column.objects.create( organization=organization, table_name__in=[None, ''], column_name=field['from_field'], - is_extra_data=False # data from header rows in the files are NEVER extra data + is_extra_data=False # Column objects representing raw/header rows are NEVER extra data ) _log.debug("Creating a new from_column") From 9cef37f28a34964aa2a0caf71257595a941f4953 Mon Sep 17 00:00:00 2001 From: Adrian Lara Date: Fri, 5 Jun 2020 15:03:51 -0600 Subject: [PATCH 4/4] Test for data persistence when remapping w/ and w/o unit-aware fields --- seed/data_importer/tests/test_mapping.py | 99 ++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/seed/data_importer/tests/test_mapping.py b/seed/data_importer/tests/test_mapping.py index 070818401c..3f4f1f7754 100644 --- a/seed/data_importer/tests/test_mapping.py +++ b/seed/data_importer/tests/test_mapping.py @@ -94,6 +94,105 @@ def test_mapping(self): # for p in props: # pp(p) + def test_remapping_with_and_without_unit_aware_columns_doesnt_lose_data(self): + """ + During import, when the initial -State objects are created from the extra_data values, + ColumnMapping objects are used to take the extra_data dictionary values and create the + -State objects, setting the DB-level values as necessary - e.g. taking a raw + "Site EUI (kBtu/ft2)" value and inserting it into the DB field "site_eui". + + Previously, remapping could cause extra Column objects to be created, and subsequently, + this created extra ColumnMapping objects. These extra ColumnMapping objects could cause + raw values to be inserted into the wrong DB field on -State creation. + """ + # Just as in the previous test, build extra_data PropertyState + state = self.property_state_factory.get_property_state_as_extra_data( + import_file_id=self.import_file.id, + source_type=ASSESSED_RAW, + data_state=DATA_STATE_IMPORT, + random_extra=42, + ) + + # Replace the site_eui key-value that gets autogenerated by get_property_state_as_extra_data + del state.extra_data['site_eui'] + state.extra_data['Site EUI (kBtu/ft2)'] = 123 + state.save() + + self.import_file.raw_save_done = True + self.import_file.save() + + # Build 2 sets of mappings - with and without a unit-aware destination site_eui data + suggested_mappings = mapper.build_column_mapping( + list(state.extra_data.keys()), + Column.retrieve_all_by_tuple(self.org), + previous_mapping=get_column_mapping, + map_args=[self.org], + thresh=80 + ) + + ed_site_eui_mappings = [] + unit_aware_site_eui_mappings = [] + for raw_column, suggestion in suggested_mappings.items(): + if raw_column == 'Site EUI (kBtu/ft2)': + # Make this an extra_data field (without from_units) + ed_site_eui_mappings.append({ + "from_field": raw_column, + "from_units": None, + "to_table_name": 'PropertyState', + "to_field": raw_column, + "to_field_display_name": raw_column, + }) + + unit_aware_site_eui_mappings.append({ + "from_field": raw_column, + "from_units": 'kBtu/ft**2/year', + "to_table_name": 'PropertyState', + "to_field": 'site_eui', + "to_field_display_name": 'Site EUI', + }) + else: + other_mapping = { + "from_field": raw_column, + "from_units": None, + "to_table_name": suggestion[0], + "to_field": suggestion[1], + "to_field_display_name": suggestion[1], + } + ed_site_eui_mappings.append(other_mapping) + unit_aware_site_eui_mappings.append(other_mapping) + + # Map and remap the file multiple times with different mappings each time. + # Round 1 - Map site_eui data into Extra Data + Column.create_mappings(ed_site_eui_mappings, self.org, self.user, self.import_file.id) + tasks.map_data(self.import_file.id) + + # There should only be one raw 'Site EUI (kBtu/ft2)' Column object + self.assertEqual(1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count()) + # The one propertystate should have site eui info in extra_data + prop = self.import_file.find_unmatched_property_states().get() + self.assertIsNone(prop.site_eui) + self.assertIsNotNone(prop.extra_data.get('Site EUI (kBtu/ft2)')) + + # Round 2 - Map site_eui data into the PropertyState attribute "site_eui" + Column.create_mappings(unit_aware_site_eui_mappings, self.org, self.user, self.import_file.id) + tasks.map_data(self.import_file.id, remap=True) + + self.assertEqual(1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count()) + # The one propertystate should have site eui info in site_eui + prop = self.import_file.find_unmatched_property_states().get() + self.assertIsNotNone(prop.site_eui) + self.assertIsNone(prop.extra_data.get('Site EUI (kBtu/ft2)')) + + # Round 3 - Map site_eui data into Extra Data + Column.create_mappings(ed_site_eui_mappings, self.org, self.user, self.import_file.id) + tasks.map_data(self.import_file.id, remap=True) + + self.assertEqual(1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count()) + # The one propertystate should have site eui info in extra_data + prop = self.import_file.find_unmatched_property_states().get() + self.assertIsNone(prop.site_eui) + self.assertIsNotNone(prop.extra_data.get('Site EUI (kBtu/ft2)')) + class TestDuplicateFileHeaders(DataMappingBaseTestCase): def setUp(self):