Merge pull request #566 from pipermerriam/piper/issue-565-revert-tax-…

…id-splitting Piper/issue 565 revert tax id splitting
SEED-platform · Dec 17, 2015 · 9fd1e74 · 9fd1e74
2 parents 59d3e86 + 216a5e3
commit 9fd1e74
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 138 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,5 +1,9 @@
+sudo: false
+
 cache:
   directories: 
+    - node_modules
+    - seed/static/vendors/bower_components
     - $HOME/.pip-cache/
 
 language: python

diff --git a/seed/tasks.py b/seed/tasks.py
@@ -9,7 +9,6 @@
 import string
 import operator
 import traceback
-import itertools
 from _csv import Error
 from dateutil import parser
 from django.core.mail import send_mail
@@ -476,6 +475,18 @@ def result_fn(model, key, value):
     return result_fn
 
 
+def _normalize_tax_lot_id(value):
+    return value.strip().lstrip('0').upper().replace(
+        '-', ''
+    ).replace(
+        ' ', ''
+    ).replace(
+        '/', ''
+    ).replace(
+        '\\', ''
+    )
+
+
 @shared_task
 def map_row_chunk(chunk, file_pk, source_type, prog_key, increment, *args, **kwargs):
     """Does the work of matching a mapping to a source type and saving
@@ -519,9 +530,9 @@ def map_row_chunk(chunk, file_pk, source_type, prog_key, increment, *args, **kwa
 
     for row in chunk:
         model = mapper.map_row(
-            row=row,
-            mapping=mapping,
-            model_class=BuildingSnapshot,
+            row,
+            mapping,
+            BuildingSnapshot,
             cleaner=map_cleaner,
             concat=concats,
             apply_columns=apply_columns,
@@ -530,70 +541,21 @@ def map_row_chunk(chunk, file_pk, source_type, prog_key, increment, *args, **kwa
             **kwargs
         )
 
+        if model.tax_lot_id:
+            model.tax_lot_id = _normalize_tax_lot_id(model.tax_lot_id)
+
         model.import_file = import_file
         model.source_type = save_type
         model.clean()
         model.super_organization = import_file.import_record.super_organization
-
-        # Tax Lot ID can potentially contain multiple deliniated values.  These
-        # need to be split and normalized.
-        tax_lot_ids = _extract_tax_lot_ids(model.tax_lot_id)
-        model.tax_lot_id, additional_ids = tax_lot_ids[0], tax_lot_ids[1:]
         model.save()
-
-        # If there is more than a single ID, duplicate the building.
-        for tax_lot_id in additional_ids:
-            model.id = None
-            model.tax_lot_id = tax_lot_id
-            model.save()
-
-    try:
+    if model:
         # Make sure that we've saved all of the extra_data column names
         save_column_names(model, mapping=mapping)
-    except NameError:
-        # There were no rows in the chunk.
-        pass
 
     increment_cache(prog_key, increment)
 
 
-def _normalize_tax_lot_id(value):
-    return value.strip().lstrip('0').upper().replace(
-        '-', ''
-    ).replace(
-        ' ', ''
-    ).replace(
-        '/', ''
-    ).replace(
-        '\\', ''
-    )
-
-
-def split(value, delimiters):
-    """
-    Given a string, and an iterable of delimeters, return an iterable
-    containing all of the substrings that were deliniated by the delimeters.
-    """
-    _value = [str(value)]
-    for delimiter in delimiters:
-        _value = tuple(itertools.chain.from_iterable((
-            v.split(delimiter) for v in _value
-        )))
-    return _value
-
-
-def _extract_tax_lot_ids(value):
-    if not value:
-        return [value]
-
-    tax_lot_ids = [
-        _normalize_tax_lot_id(tax_lot_id)
-        for tax_lot_id in split(value, ",;")
-        if tax_lot_id
-    ]
-    return tax_lot_ids
-
-
 @shared_task
 @lock_and_track
 def _map_data(file_pk, *args, **kwargs):
@@ -1036,17 +998,25 @@ def get_canonical_id_matches(org_id, pm_id, tax_id, custom_id):
     params = []
     can_snapshots = get_canonical_snapshots(org_id)
     if pm_id:
-        params.append(Q(pm_property_id=pm_id) | Q(pm_property_id__isnull=True))
+        params.append(Q(pm_property_id=pm_id))
+        params.append(Q(tax_lot_id=pm_id))
+        params.append(Q(custom_id_1=pm_id))
     if tax_id:
-        params.append(Q(tax_lot_id=tax_id) | Q(tax_lot_id__isnull=True))
+        params.append(Q(pm_property_id=tax_id))
+        params.append(Q(tax_lot_id=tax_id))
+        params.append(Q(custom_id_1=tax_id))
     if custom_id:
-        params.append(Q(custom_id_1=custom_id) | Q(custom_id_1__isnull=True))
+        params.append(Q(pm_property_id=custom_id))
+        params.append(Q(tax_lot_id=custom_id))
+        params.append(Q(custom_id_1=custom_id))
 
     if not params:
         # Return an empty QuerySet if we don't have any params.
         return can_snapshots.none()
 
-    canonical_matches = can_snapshots.filter(*params)
+    canonical_matches = can_snapshots.filter(
+        reduce(operator.or_, params)
+    )
 
     return canonical_matches
 

diff --git a/seed/tests/test_tasks.py b/seed/tests/test_tasks.py
@@ -275,10 +275,10 @@ def test_mapping_w_concat(self):
         self.assertEqual(
             mapped_bs.address_line_1, u'1600 Pennsylvania Ave. Someplace Nice'
         )
-
+        
     def test_is_same_snapshot(self):
         """Test to check if two snapshots are duplicates"""
-
+        
         bs_data = {
             'pm_property_id': 1243,
             'tax_lot_id': '435/422',
@@ -289,14 +289,14 @@ def test_is_same_snapshot(self):
             'city': 'Gotham City',
             'postal_code': 8999,
         }
-
+        
         s1 = util.make_fake_snapshot(
             self.import_file, bs_data, ASSESSED_BS, is_canon=True,
             org=self.fake_org
         )
-
+        
         self.assertTrue(tasks.is_same_snapshot(s1, s1), "Matching a snapshot to itself should return True")
-
+        
         #Making a different snapshot, now Garfield complex rather than Greenfield complex
         bs_data_2 = {
             'pm_property_id': 1243,
@@ -308,15 +308,15 @@ def test_is_same_snapshot(self):
             'city': 'Gotham City',
             'postal_code': 8999,
         }
-
+        
         s2 = util.make_fake_snapshot(
             self.import_file, bs_data_2, ASSESSED_BS, is_canon=True,
             org=self.fake_org
         )
-
+        
         self.assertFalse(tasks.is_same_snapshot(s1, s2), "Matching a snapshot to a different snapshot should return False")
-
-
+        
+        
 
     def test_match_buildings(self):
         """Good case for testing our matching system."""
@@ -330,7 +330,7 @@ def test_match_buildings(self):
             'city': 'Gotham City',
             'postal_code': 8999,
         }
-
+        
         #Since the change to not match duplicates there needs to be a second record that isn't exactly the same
         #to run this test.  In this case address_line_2 now has a value of 'A' rather than ''
         bs_data_2 = {
@@ -378,7 +378,7 @@ def test_match_buildings(self):
             AuditLog.objects.first().action_note,
             'System matched building ID.'
         )
-
+    
 
     def test_match_duplicate_buildings(self):
         """
@@ -394,12 +394,12 @@ def test_match_duplicate_buildings(self):
             'city': 'Gotham City',
             'postal_code': "8999",
         }
-
+        
         import_file = ImportFile.objects.create(
             import_record=self.import_record,
             mapping_done=True
         )
-
+        
         # Setup mapped PM snapshot.
         snapshot = util.make_fake_snapshot(
             import_file, bs_data, PORTFOLIO_BS, is_canon=True,
@@ -419,10 +419,10 @@ def test_match_duplicate_buildings(self):
 
         tasks.match_buildings(import_file.pk, self.fake_user.pk)
         tasks.match_buildings(new_import_file.pk, self.fake_user.pk)
-
+        
         self.assertEqual(len(BuildingSnapshot.objects.all()), 2)
-
-
+        
+        
     def test_handle_id_matches_duplicate_data(self):
         """
         Test for handle_id_matches behavior when matching duplicate data
@@ -437,7 +437,7 @@ def test_handle_id_matches_duplicate_data(self):
             'city': 'Cartoon City',
             'postal_code': "54321",
         }
-
+        
         # Setup mapped AS snapshot.
         snapshot = util.make_fake_snapshot(
             self.import_file, bs_data, ASSESSED_BS, is_canon=True,
@@ -450,9 +450,9 @@ def test_handle_id_matches_duplicate_data(self):
             import_record=self.import_record,
             mapping_done=True
         )
-
+        
         tasks.match_buildings(new_import_file.pk, self.fake_user.pk)
-
+        
         duplicate_import_file = ImportFile.objects.create(
             import_record=self.import_record,
             mapping_done=True
@@ -461,10 +461,10 @@ def test_handle_id_matches_duplicate_data(self):
         new_snapshot = util.make_fake_snapshot(
             duplicate_import_file, bs_data, PORTFOLIO_BS, org=self.fake_org
         )
-
+        
         self.assertRaises(tasks.DuplicateDataError, tasks.handle_id_matches, new_snapshot, duplicate_import_file, self.fake_user.pk)
-
-
+                
+        
 
     def test_match_no_matches(self):
         """When a canonical exists, but doesn't match, we create a new one."""
@@ -577,7 +577,7 @@ def test_no_unmatched_buildings(self):
     def test_separates_system_and_possible_match_types(self):
         """We save possible matches separately."""
         bs1_data = {
-           'pm_property_id': 1243,
+           'pm_property_id': 123,
            'tax_lot_id': '435/422',
            'property_name': 'Greenfield Complex',
            'custom_id_1': 1243,
@@ -633,7 +633,7 @@ def test_get_ancestors(self):
            'city': 'Gotham City',
            'postal_code': 8999,
         }
-
+        
         #Since we changed to not match duplicate data make a second record that matches with something slighty changed
         #In this case appended a 'A' to the end of address_line_1
         bs_data_2 = {

diff --git a/seed/tests/test_tax_lot_id_splitting_and_normalization.py b/seed/tests/test_tax_lot_id_splitting_and_normalization.py