Skip to content

Commit

Permalink
Merge pull request #566 from pipermerriam/piper/issue-565-revert-tax-…
Browse files Browse the repository at this point in the history
…id-splitting

Piper/issue 565 revert tax id splitting
  • Loading branch information
mmclark committed Dec 17, 2015
2 parents 59d3e86 + 216a5e3 commit 9fd1e74
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 138 deletions.
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
sudo: false

cache:
directories:
- node_modules
- seed/static/vendors/bower_components
- $HOME/.pip-cache/

language: python
Expand Down
92 changes: 31 additions & 61 deletions seed/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import string
import operator
import traceback
import itertools
from _csv import Error
from dateutil import parser
from django.core.mail import send_mail
Expand Down Expand Up @@ -476,6 +475,18 @@ def result_fn(model, key, value):
return result_fn


def _normalize_tax_lot_id(value):
return value.strip().lstrip('0').upper().replace(
'-', ''
).replace(
' ', ''
).replace(
'/', ''
).replace(
'\\', ''
)


@shared_task
def map_row_chunk(chunk, file_pk, source_type, prog_key, increment, *args, **kwargs):
"""Does the work of matching a mapping to a source type and saving
Expand Down Expand Up @@ -519,9 +530,9 @@ def map_row_chunk(chunk, file_pk, source_type, prog_key, increment, *args, **kwa

for row in chunk:
model = mapper.map_row(
row=row,
mapping=mapping,
model_class=BuildingSnapshot,
row,
mapping,
BuildingSnapshot,
cleaner=map_cleaner,
concat=concats,
apply_columns=apply_columns,
Expand All @@ -530,70 +541,21 @@ def map_row_chunk(chunk, file_pk, source_type, prog_key, increment, *args, **kwa
**kwargs
)

if model.tax_lot_id:
model.tax_lot_id = _normalize_tax_lot_id(model.tax_lot_id)

model.import_file = import_file
model.source_type = save_type
model.clean()
model.super_organization = import_file.import_record.super_organization

# Tax Lot ID can potentially contain multiple deliniated values. These
# need to be split and normalized.
tax_lot_ids = _extract_tax_lot_ids(model.tax_lot_id)
model.tax_lot_id, additional_ids = tax_lot_ids[0], tax_lot_ids[1:]
model.save()

# If there is more than a single ID, duplicate the building.
for tax_lot_id in additional_ids:
model.id = None
model.tax_lot_id = tax_lot_id
model.save()

try:
if model:
# Make sure that we've saved all of the extra_data column names
save_column_names(model, mapping=mapping)
except NameError:
# There were no rows in the chunk.
pass

increment_cache(prog_key, increment)


def _normalize_tax_lot_id(value):
return value.strip().lstrip('0').upper().replace(
'-', ''
).replace(
' ', ''
).replace(
'/', ''
).replace(
'\\', ''
)


def split(value, delimiters):
"""
Given a string, and an iterable of delimeters, return an iterable
containing all of the substrings that were deliniated by the delimeters.
"""
_value = [str(value)]
for delimiter in delimiters:
_value = tuple(itertools.chain.from_iterable((
v.split(delimiter) for v in _value
)))
return _value


def _extract_tax_lot_ids(value):
if not value:
return [value]

tax_lot_ids = [
_normalize_tax_lot_id(tax_lot_id)
for tax_lot_id in split(value, ",;")
if tax_lot_id
]
return tax_lot_ids


@shared_task
@lock_and_track
def _map_data(file_pk, *args, **kwargs):
Expand Down Expand Up @@ -1036,17 +998,25 @@ def get_canonical_id_matches(org_id, pm_id, tax_id, custom_id):
params = []
can_snapshots = get_canonical_snapshots(org_id)
if pm_id:
params.append(Q(pm_property_id=pm_id) | Q(pm_property_id__isnull=True))
params.append(Q(pm_property_id=pm_id))
params.append(Q(tax_lot_id=pm_id))
params.append(Q(custom_id_1=pm_id))
if tax_id:
params.append(Q(tax_lot_id=tax_id) | Q(tax_lot_id__isnull=True))
params.append(Q(pm_property_id=tax_id))
params.append(Q(tax_lot_id=tax_id))
params.append(Q(custom_id_1=tax_id))
if custom_id:
params.append(Q(custom_id_1=custom_id) | Q(custom_id_1__isnull=True))
params.append(Q(pm_property_id=custom_id))
params.append(Q(tax_lot_id=custom_id))
params.append(Q(custom_id_1=custom_id))

if not params:
# Return an empty QuerySet if we don't have any params.
return can_snapshots.none()

canonical_matches = can_snapshots.filter(*params)
canonical_matches = can_snapshots.filter(
reduce(operator.or_, params)
)

return canonical_matches

Expand Down
48 changes: 24 additions & 24 deletions seed/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,10 +275,10 @@ def test_mapping_w_concat(self):
self.assertEqual(
mapped_bs.address_line_1, u'1600 Pennsylvania Ave. Someplace Nice'
)

def test_is_same_snapshot(self):
"""Test to check if two snapshots are duplicates"""

bs_data = {
'pm_property_id': 1243,
'tax_lot_id': '435/422',
Expand All @@ -289,14 +289,14 @@ def test_is_same_snapshot(self):
'city': 'Gotham City',
'postal_code': 8999,
}

s1 = util.make_fake_snapshot(
self.import_file, bs_data, ASSESSED_BS, is_canon=True,
org=self.fake_org
)

self.assertTrue(tasks.is_same_snapshot(s1, s1), "Matching a snapshot to itself should return True")

#Making a different snapshot, now Garfield complex rather than Greenfield complex
bs_data_2 = {
'pm_property_id': 1243,
Expand All @@ -308,15 +308,15 @@ def test_is_same_snapshot(self):
'city': 'Gotham City',
'postal_code': 8999,
}

s2 = util.make_fake_snapshot(
self.import_file, bs_data_2, ASSESSED_BS, is_canon=True,
org=self.fake_org
)

self.assertFalse(tasks.is_same_snapshot(s1, s2), "Matching a snapshot to a different snapshot should return False")



def test_match_buildings(self):
"""Good case for testing our matching system."""
Expand All @@ -330,7 +330,7 @@ def test_match_buildings(self):
'city': 'Gotham City',
'postal_code': 8999,
}

#Since the change to not match duplicates there needs to be a second record that isn't exactly the same
#to run this test. In this case address_line_2 now has a value of 'A' rather than ''
bs_data_2 = {
Expand Down Expand Up @@ -378,7 +378,7 @@ def test_match_buildings(self):
AuditLog.objects.first().action_note,
'System matched building ID.'
)


def test_match_duplicate_buildings(self):
"""
Expand All @@ -394,12 +394,12 @@ def test_match_duplicate_buildings(self):
'city': 'Gotham City',
'postal_code': "8999",
}

import_file = ImportFile.objects.create(
import_record=self.import_record,
mapping_done=True
)

# Setup mapped PM snapshot.
snapshot = util.make_fake_snapshot(
import_file, bs_data, PORTFOLIO_BS, is_canon=True,
Expand All @@ -419,10 +419,10 @@ def test_match_duplicate_buildings(self):

tasks.match_buildings(import_file.pk, self.fake_user.pk)
tasks.match_buildings(new_import_file.pk, self.fake_user.pk)

self.assertEqual(len(BuildingSnapshot.objects.all()), 2)


def test_handle_id_matches_duplicate_data(self):
"""
Test for handle_id_matches behavior when matching duplicate data
Expand All @@ -437,7 +437,7 @@ def test_handle_id_matches_duplicate_data(self):
'city': 'Cartoon City',
'postal_code': "54321",
}

# Setup mapped AS snapshot.
snapshot = util.make_fake_snapshot(
self.import_file, bs_data, ASSESSED_BS, is_canon=True,
Expand All @@ -450,9 +450,9 @@ def test_handle_id_matches_duplicate_data(self):
import_record=self.import_record,
mapping_done=True
)

tasks.match_buildings(new_import_file.pk, self.fake_user.pk)

duplicate_import_file = ImportFile.objects.create(
import_record=self.import_record,
mapping_done=True
Expand All @@ -461,10 +461,10 @@ def test_handle_id_matches_duplicate_data(self):
new_snapshot = util.make_fake_snapshot(
duplicate_import_file, bs_data, PORTFOLIO_BS, org=self.fake_org
)

self.assertRaises(tasks.DuplicateDataError, tasks.handle_id_matches, new_snapshot, duplicate_import_file, self.fake_user.pk)



def test_match_no_matches(self):
"""When a canonical exists, but doesn't match, we create a new one."""
Expand Down Expand Up @@ -577,7 +577,7 @@ def test_no_unmatched_buildings(self):
def test_separates_system_and_possible_match_types(self):
"""We save possible matches separately."""
bs1_data = {
'pm_property_id': 1243,
'pm_property_id': 123,
'tax_lot_id': '435/422',
'property_name': 'Greenfield Complex',
'custom_id_1': 1243,
Expand Down Expand Up @@ -633,7 +633,7 @@ def test_get_ancestors(self):
'city': 'Gotham City',
'postal_code': 8999,
}

#Since we changed to not match duplicate data make a second record that matches with something slighty changed
#In this case appended a 'A' to the end of address_line_1
bs_data_2 = {
Expand Down
53 changes: 0 additions & 53 deletions seed/tests/test_tax_lot_id_splitting_and_normalization.py

This file was deleted.

0 comments on commit 9fd1e74

Please sign in to comment.