Skip to content

Commit

Permalink
Merge 39d8d96 into d68b456
Browse files Browse the repository at this point in the history
  • Loading branch information
adrian-lara committed Jun 5, 2020
2 parents d68b456 + 39d8d96 commit f79f612
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 10 deletions.
99 changes: 99 additions & 0 deletions seed/data_importer/tests/test_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,105 @@ def test_mapping(self):
# for p in props:
# pp(p)

def test_remapping_with_and_without_unit_aware_columns_doesnt_lose_data(self):
"""
During import, when the initial -State objects are created from the extra_data values,
ColumnMapping objects are used to take the extra_data dictionary values and create the
-State objects, setting the DB-level values as necessary - e.g. taking a raw
"Site EUI (kBtu/ft2)" value and inserting it into the DB field "site_eui".
Previously, remapping could cause extra Column objects to be created, and subsequently,
this created extra ColumnMapping objects. These extra ColumnMapping objects could cause
raw values to be inserted into the wrong DB field on -State creation.
"""
# Just as in the previous test, build extra_data PropertyState
state = self.property_state_factory.get_property_state_as_extra_data(
import_file_id=self.import_file.id,
source_type=ASSESSED_RAW,
data_state=DATA_STATE_IMPORT,
random_extra=42,
)

# Replace the site_eui key-value that gets autogenerated by get_property_state_as_extra_data
del state.extra_data['site_eui']
state.extra_data['Site EUI (kBtu/ft2)'] = 123
state.save()

self.import_file.raw_save_done = True
self.import_file.save()

# Build 2 sets of mappings - with and without a unit-aware destination site_eui data
suggested_mappings = mapper.build_column_mapping(
list(state.extra_data.keys()),
Column.retrieve_all_by_tuple(self.org),
previous_mapping=get_column_mapping,
map_args=[self.org],
thresh=80
)

ed_site_eui_mappings = []
unit_aware_site_eui_mappings = []
for raw_column, suggestion in suggested_mappings.items():
if raw_column == 'Site EUI (kBtu/ft2)':
# Make this an extra_data field (without from_units)
ed_site_eui_mappings.append({
"from_field": raw_column,
"from_units": None,
"to_table_name": 'PropertyState',
"to_field": raw_column,
"to_field_display_name": raw_column,
})

unit_aware_site_eui_mappings.append({
"from_field": raw_column,
"from_units": 'kBtu/ft**2/year',
"to_table_name": 'PropertyState',
"to_field": 'site_eui',
"to_field_display_name": 'Site EUI',
})
else:
other_mapping = {
"from_field": raw_column,
"from_units": None,
"to_table_name": suggestion[0],
"to_field": suggestion[1],
"to_field_display_name": suggestion[1],
}
ed_site_eui_mappings.append(other_mapping)
unit_aware_site_eui_mappings.append(other_mapping)

# Map and remap the file multiple times with different mappings each time.
# Round 1 - Map site_eui data into Extra Data
Column.create_mappings(ed_site_eui_mappings, self.org, self.user, self.import_file.id)
tasks.map_data(self.import_file.id)

# There should only be one raw 'Site EUI (kBtu/ft2)' Column object
self.assertEqual(1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count())
# The one propertystate should have site eui info in extra_data
prop = self.import_file.find_unmatched_property_states().get()
self.assertIsNone(prop.site_eui)
self.assertIsNotNone(prop.extra_data.get('Site EUI (kBtu/ft2)'))

# Round 2 - Map site_eui data into the PropertyState attribute "site_eui"
Column.create_mappings(unit_aware_site_eui_mappings, self.org, self.user, self.import_file.id)
tasks.map_data(self.import_file.id, remap=True)

self.assertEqual(1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count())
# The one propertystate should have site eui info in site_eui
prop = self.import_file.find_unmatched_property_states().get()
self.assertIsNotNone(prop.site_eui)
self.assertIsNone(prop.extra_data.get('Site EUI (kBtu/ft2)'))

# Round 3 - Map site_eui data into Extra Data
Column.create_mappings(ed_site_eui_mappings, self.org, self.user, self.import_file.id)
tasks.map_data(self.import_file.id, remap=True)

self.assertEqual(1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count())
# The one propertystate should have site eui info in extra_data
prop = self.import_file.find_unmatched_property_states().get()
self.assertIsNone(prop.site_eui)
self.assertIsNotNone(prop.extra_data.get('Site EUI (kBtu/ft2)'))


class TestDuplicateFileHeaders(DataMappingBaseTestCase):
def setUp(self):
Expand Down
30 changes: 20 additions & 10 deletions seed/models/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,22 +982,32 @@ def select_col_obj(column_name, table_name, organization_column):
organization=organization,
table_name__in=[None, ''],
column_name=field['from_field'],
units_pint=field.get('from_units'), # might be None
is_extra_data=False # data from header rows in the files are NEVER extra data
is_extra_data=False # Column objects representing raw/header rows are NEVER extra data
)
except Column.MultipleObjectsReturned:
# We want to avoid the ambiguity of having multiple Column objects for a specific raw column.
# To do that, delete all multiples along with any associated ColumnMapping objects.
_log.debug(
"More than one from_column found for {}.{}".format(field['to_table_name'],
field['to_field']))

# TODO: write something to remove the duplicate columns
from_org_col = Column.objects.filter(organization=organization,
table_name__in=[None, ''],
column_name=field['from_field'],
units_pint=field.get('from_units'),
# might be None
is_extra_data=is_extra_data).first()
_log.debug("Grabbing the first from_column")
all_from_cols = Column.objects.filter(
organization=organization,
table_name__in=[None, ''],
column_name=field['from_field'],
is_extra_data=False
)

ColumnMapping.objects.filter(column_raw__id__in=models.Subquery(all_from_cols.values('id'))).delete()
all_from_cols.delete()

from_org_col = Column.objects.create(
organization=organization,
table_name__in=[None, ''],
column_name=field['from_field'],
is_extra_data=False # Column objects representing raw/header rows are NEVER extra data
)
_log.debug("Creating a new from_column")

new_field['to_column_object'] = select_col_obj(field['to_field'],
field['to_table_name'], to_org_col)
Expand Down

0 comments on commit f79f612

Please sign in to comment.