In [None]:
#From https://github.com/gabrielo/Allegheny-County-Property-Assessments

In [562]:
# assessments/cd/AC Property Assessments_10012017.xls (which is actually TDF) from Amy Gottsegen and Randy Sargent buying a CD from the county assessors office
# assessments/Allegheny_County_Parcel_Boundaries.geojson from https://data.wprdc.org/dataset/allegheny-county-parcel-boundaries
import array, csv, datetime, json, math, numpy, os, random, re 
from dateutil.parser import parse
from shapely.geometry import mapping, shape
from PIL import Image
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import pandas,numbers
from operator import itemgetter, attrgetter
import string
import calendar,time
from IPython.core.display import HTML

# Use geopandas.  Boilerplate from https://docs.google.com/document/d/1utZuLHcKQEZNXTQLOysTNCxTHrqxczAUymmtplpn27Q/edit#heading=h.f50xoxwmcir
import pandas as pd
import geopandas as gpd
from geopandas import GeoSeries, GeoDataFrame
%matplotlib inline

pd.options.display.max_colwidth = 300
pd.options.display.max_rows = 100
pd.set_option('display.max_columns', 500)

# Built-in time handling fails for times before 1900.  Use arrow instead.  
# See https://arrow.readthedocs.io/en/latest/ for info
import arrow

def LonLatToPixelXY(lonlat):
    (lon, lat) = lonlat
    x = (lon + 180.0) * 256.0 / 360.0
    y = 128.0 - math.log(math.tan((lat + 90.0) * math.pi / 360.0)) * 128.0 / math.pi
    return [x, y]

# This does the same as above, but takes a GeoJSON point (which is what centroid returns)
def PointToPixelXY(point):
    lon=point.x
    lat=point.y
    x = (lon + 180.0) * 256.0 / 360.0
    y = 128.0 - math.log(math.tan((lat + 90.0) * math.pi / 360.0)) * 128.0 / math.pi
    return [x, y]

def GetCentroid(geometry):
    s = shape(geometry)
    return (s.centroid.x, s.centroid.y)   

def GetEpoch(date):
    return (date - datetime.datetime(1970, 1, 1)).total_seconds()

def HexToRgb(hex_string):
    rgb = colors.hex2color(hex_string)
    r,g,b = tuple([int(255*x) for x in rgb])
    return (r,g,b)

In [525]:
# Uses Google geocoding API to geocode an address
# Assumes key is in google_api_key.txt

import sqlite3
import urllib

def geocode_address(address):
    try:
        geocode_address.conn
    except:
        geocode_address.conn = sqlite3.connect('geocoding_cache')
        geocode_address.cur = geocode_address.conn.cursor()
        geocode_address.cur.execute(('CREATE TABLE IF NOT EXISTS kvs'
                                     ' (key PRIMARY KEY, value)'
                                     ' WITHOUT ROWID;'))
        geocode_address.conn.commit()

    geocode_address.cur.execute('SELECT value FROM kvs WHERE key=?', (address,))
    rows = geocode_address.cur.fetchall()
    if rows:
        return json.loads(rows[0][0])
        
    api_key = open('google-api-key-do-not-commit.txt').read()
    payload = {'address':address, 'key':api_key}
    result = json.load(urllib2.urlopen('https://maps.googleapis.com/maps/api/geocode/json?%s' % urllib.urlencode(payload)))
    
    geocode_address.cur.execute(('INSERT OR REPLACE INTO kvs (key, value)'
                                 ' VALUES (?, ?);'),
                                 (address, json.dumps(result)))
    geocode_address.conn.commit()
    
    return result
    

# Create structures to populate with sale and ownership information

In [2]:
# Read in 2017 data to harvest list of residential parcel IDs
# We need to do this before loading the older ADB files because they're missing some of the columns we need.

In [3]:
# For each residential PARID, we want to generate a sorted list of who bought it when.
# For each owner name, we want to generate a sorted list of PARIDs they bought and when they bought it.
# For each change address, we want to generate a sorted list of PARIDs they bought and when they bought it.
# Each of the following is a map indexed on PARID, owner name, or change address.  

# The map indexed on PARID builds up a history of a given property.
#   Each record contains a a list of maps containing a list of date, event_type, owner name, and change address
#   event_type is either PURCHASE or FORECLOSURE

# Each map indexed on owner name or change address builds a history of a given owner/change address.
#   Each record contains a list of maps containing date, event_type, and PARID
#   event_type is either PURCHASE or SALE


In [490]:
# To start with, clear the maps
property_map = {}
owner_map = {}
changeaddr_map = {}

# Also create a map to cache lookups for owner and changeaddress PARID sets at various times
# to speed up runtime
owned_parids_cache={}

# TODO: Create a map of owner and changeaddress to inferred owner type
# 'GOVERNMENT'
# 'INVESTOR' 

In [602]:
# Create functions to update the above maps with info from a given row of an assessment spreadsheet/database.  
# The first argument should be a pandas table, the second an index into the table.
# We expect the index to be PARID
# We expect the following columns to exist and be valid:
#   PROPERTYOWNER
#   CHANGENOTICEADDRESS1 - CHANGENOTICEADDRESS4
#   SALEDATE


# In 2017 the date format is MM-DD-YYYY
# In 2009, an example is 09/16/96 00:00:00

# Setup regular expression for parsing various sorts of dates
re_MDY = re.compile('(\d\d)-(\d\d)-(\d\d\d\d)')
re_YMD = re.compile('(\d\d\d\d)-(\d\d)-(\d\d)')
re_MDY_HMS = re.compile('(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)')

# Setup this year to use for dealing with 2-digit dates
this_year = datetime.datetime.now().year
this_date = arrow.now().format('YYYY-MM-DD')

# Utility function to normalize SALEDATE format
# SALEDATE field is in '%m-%d-%Y' format (ex 10-26-2012) when present
def SaledateToEpoch(datestr):
    if(re_YMD.match(datestr)!=None):
        return calendar.timegm(time.strptime(datestr, '%Y-%m-%d'))
    elif(re_MDY.match(datestr)!=None):
        return calendar.timegm(time.strptime(datestr, '%m-%d-%Y'))

# Built-in time handling fails for times before 1900.  Use arrow instead.  
# See https://arrow.readthedocs.io/en/latest/ for info
def SaledateToYMD(datestr):
    # Check which format and parse accordingly
    if(re_YMD.match(datestr)!=None):
        # Already like we want it
        return datestr
    if(re_MDY.match(datestr)!=None):
        return arrow.get(datestr, 'MM-DD-YYYY').format('YYYY-MM-DD')
    elif(re_MDY_HMS.match(datestr)!=None):
        # Some of the 2-digit years are farther back than python's default pivot year of 1969
        # If the year we parse is > this_year, set it back by 100 years
        ad=arrow.get(datestr, 'MM/DD/YY HH:mm:ss')
        if(ad.year>this_year):
            ad = ad.replace(year=(ad.year-100))
        return ad.format('YYYY-MM-DD')
    else:
        raise Exception('Unrecognized saledate format %r' % (datestr))

# find_event_by_date takes a list of maps containng 'date' fields and looks for one the the last one that's before
# search_date or the first which is simultaneous (in case of multiple simultaneous records).  
# Returns -1 if the search_date is earlier than any existing item.  It assumes that each map in the event list 
# contains a 'date' field and that they're in sorted order from earliest to latest.
def find_event_index_by_date(event_list, search_date):
    match_index=-1
    for i in range(0,len(event_list)):
        if(event_list[i]['date']<search_date):
            # This happened before the date we're looking for, update match_index
            match_index=i
        elif(event_list[i]['date']==search_date):
            # This is the first exact match, return i
            return(i)
        else:
            # This happened after the date we're looking for, return match_index
            return match_index
    # If we get to here, there is no event after this date.  Return match_index
    return match_index

# This takes an existing event list and either inserts a given event_map and returns True, or 
# decides that the event is a duplicate and returns false
def insert_event(event_list, event_map):
    # Find the index of the element already in the list which precedes the date in event_map.  
    # If not found, match_index is -1, so we should insert it at the beginning of event_list and return True
    # If found, check if the record matches what we already have (TODO: see if we need something
    # more sophisticated than ==).  If it matches what we already have, return False and do not modify the list.
    # If it doesn't match what we have insert it.  Note that list.insert takes the arg of the element to insert
    # before, so we use match_index+1
    match_index = find_event_index_by_date(event_list, event_map['date'])
    if(match_index==-1):
        #print "No match, insert at beginning"
        # No match.  Insert at the beginning, return True
        event_list.insert(0, event_map)
        return True
    # We've got a matching or preceeding event, compare it with this and any other potentially 
    # simultaneous events
    #print "Time match at %d" % (match_index)
    for i in range(match_index, len(event_list)):
        if(event_map == event_list[i]):
            # This is a duplicate, ignore it
            #print "Found duplicate match at %d (%r)" % (i,event_list[i])
            return False
        elif(event_map['date']<event_list[i]['date']):
            # We're past any simultaneous events that might have matched, continue
            #print "Hit a later item at %d (%r)" % (i,match_index)
            break
        #print "Didn't match at %d (%r), keep trying" % (i,event_list[i])
        match_index=i
    # We've got a preceeding or simultaneous event that doesnt match.  Insert this 
    # new item just past match_index
    #print "Inserting after %d" % (match_index)
    event_list.insert(match_index+1, event_map)
    return True
        
# Clean up multiple whitespace to a single space
def cleanup_str(str_in):
    str_out = re.sub( '\s+', ' ', str_in).strip()
    return(str_out)

# Try to normalize changeaddr to be more likely to match
re_zp4 = re.compile('(.+) (\d\d\d\d\d)-(\d\d\d\d)?')
re_cscz = re.compile('(.+) (\w\w), (\d\d\d\d\d)')
# Check for change address with no alphanumerics in it
re_noalnum = re.compile('^([^\w]+)$')
def get_owner_changeaddr(apd, i):
    c_list = []
    c1=apd['CHANGENOTICEADDRESS1'].iloc[i]
    c2=apd['CHANGENOTICEADDRESS2'].iloc[i]
    c3=apd['CHANGENOTICEADDRESS3'].iloc[i]
    c4=apd['CHANGENOTICEADDRESS4'].iloc[i]
    if(not (pandas.isnull(c1) or c1=='')):
        c = string.strip(str(c1))
        if(c!=''):
            c_list.append(c)
    if(not (pandas.isnull(c2) or c2=='')):
        c = string.strip(str(c2))
        if(c!=''):
            c_list.append(c)
    if(not (pandas.isnull(c3) or c3=='')):
        c = string.strip(str(c3))
        if(c!=''):
            c_list.append(c)
    if(not (pandas.isnull(c4) or c4=='')):
        c = string.strip(str(c4))
        if(c!=''):
            c_list.append(c)
    ret_str = cleanup_str(", ".join(c_list))
    
    # Check to make sure it's got at least one alphanumeric
    m = re_noalnum.match(ret_str)
    if(m!=None):
        print "%d (%s): Skipping invalid changeaddr %r" % (i, apd.index[i],ret_str)
        return ''
    
    # Check for zip+4, and if so cut it down to regular zip.  2009 didn't have zip+4
    m = re_zp4.match(ret_str)
    if(m!=None):
        #print "Getting rid of zip+4 %r" %(ret_str)
        ret_str = "%s %s" % (m.group(1),m.group(2))
        #print "  Result = %r" %(ret_str)
        
    # Check for city state, zip.  If so, replace with city, state zip
    m = re_cscz.match(ret_str)
    if(m!=None):
        #print "Getting rid of city state, zip %r" %(ret_str)
        ret_str = "%s, %s %s" % (m.group(1),m.group(2),m.group(3))
        #print "  Result = %r" %(ret_str)
        
    # Check for PGH, change to PITTSBURGH
    ret_str = re.sub( ' PGH,', ' PITTSBURGH,', ret_str)
    
    return(ret_str)

# First arg is a pandas table, second is an index for what to process.
# This updates property_map, owner_map, and changeaddr_map appropriately
# Returns True on success, False on failure
def process_assessment_record(apd, i):
    # Extract PARID, saledate, owner_name, and changeaddr from the record
    par_id = apd.index[i]
    
    # Process saledate
    saledate_raw = apd['SALEDATE'].iloc[i]
    # If saledate is valid, keep it.  Otherwise return False
    if(saledate_raw == '' or (isinstance(saledate_raw, numbers.Number) and math.isnan(saledate_raw))):
        #print "%s: Missing saledate" % (par_id)
        return False
    
    # Convert saledatet YMD so it sorts properly 
    saledate = SaledateToYMD(saledate_raw)
    
    owner_name = cleanup_str(apd['PROPERTYOWNER'].iloc[i])
    owner_changeaddr = get_owner_changeaddr(apd,i)
        
    if(owner_changeaddr == ''):
        #print "%s: Missing owner_changeaddr" % (par_id)
        # skip this one
        return False
    
    # We have valid par_id, owner_name, and owner_changeaddr.  Update the maps.
    property_event={'date': saledate, 'event_type':'PURCHASE', 'ownername':owner_name, 'changeaddr': owner_changeaddr}
    owner_event={'date':saledate, 'event_type':'PURCHASE', 'parid':par_id}

    # Check for nice-to-have columns, if valid value, add to property_event
    try:
        saledesc = apd['SALEDESC'].iloc[i]
        if(saledesc != '' and not (isinstance(saledesc, numbers.Number) and math.isnan(saledate))):
            property_event['saledesc']=saledesc
    except:
        pass
 
    try:
        homesteadflag = apd['HOMESTEADFLAG'].iloc[i]
        if(homesteadflag != '' and not (isinstance(homesteadflag, numbers.Number) and math.isnan(homesteadflag))):
            property_event['homesteadflag']=homesteadflag
    except:
        pass

    try:
        ownerdesc = apd['OWNERDESC'].iloc[i]
        if(ownerdesc != '' and not (isinstance(ownerdesc, numbers.Number) and math.isnan(saledate))):
            property_event['ownerdesc']=ownerdesc
    except:
        pass

    #print "-------\nProcessing %s: %r %r %r" % (par_id, saledate, owner_name, owner_changeaddr)

    if(not property_map.has_key(par_id)):
        # We don't have an entry for this property yet, create an empty list
        property_map[par_id]=[]
    
    # Insert the property event
    pret = insert_event(property_map[par_id], property_event)
 
    if(not owner_map.has_key(owner_name)):
        # We don't have an entry for this owner name yet, create an empty list
        owner_map[owner_name]=[]
    
    # Insert the owner event to owner map
    oret = insert_event(owner_map[owner_name], owner_event)
   
    if(not changeaddr_map.has_key(owner_changeaddr)):
        # We don't have an entry for this changeaddr yet, create an empty list
        changeaddr_map[owner_changeaddr]=[]
    
    # Insert the owner event
    cret = insert_event(changeaddr_map[owner_changeaddr], owner_event)
    
    # Print diagnostics
    #print "%s: %r %r %r = %r %r %r" % (par_id, saledate, owner_name, owner_changeaddr, pret, oret, cret)
    return(pret or oret or cret)

# Clear out the cache that stores what sets of properties are associated with each owner changeaddr at a given time.  
# We need to do this whenever we process in a new dataset.
def reset_cache():
    owned_parids_cache={}
    
# For a common parcel id, check if two events are both for the same transaction.  
# This happens when the dates are the same and/or when the set of owner names overlap
def is_same_transaction(e1,e2):
    if(e1['event_type']!='PURCHASE' or e2['event_type']!='PURCHASE'):
        return False
    if(e1['date']==e2['date']):
        return True
    if((e1['ownername'] in e2['ownername']) or (e2['ownername'] in e1['ownername'])):
        return True
    return False
    
def register_sale_dates(parid):
    prop_ev_list = property_map[parid]
    if(len(prop_ev_list)<2):
        # No sales have happened that we know about, only purchases
        return

    # Keep a list of the equivalent transactions so any subsequent sale can 
    # be applied to all.  Seed it with the 0'th item in prop_ev_list
    equiv_transactions=[prop_ev_list[0]]
    #print "Starting with %r" %(equiv_transactions)
    for i in range(1,len(prop_ev_list)):
        # See if this new transaction is equivalent to the last item in the current list
        if(is_same_transaction(prop_ev_list[i],equiv_transactions[-1])):
            equiv_transactions.append(prop_ev_list[i])
            #print "  %d still equivalent %r" %(i, equiv_transactions)
        else:
            #print "  %d not equivalent %r, record sale date" %(i,prop_ev_list[i])
            sell_event={'date':prop_ev_list[i]['date'],'event_type':'SALE','parid':parid}
            
            for j in range(0,len(equiv_transactions)):
                    dup_o = insert_event(owner_map[equiv_transactions[j]['ownername']], sell_event)
                    dup_c = insert_event(changeaddr_map[equiv_transactions[j]['changeaddr']], sell_event)
                    #print "    %d %r: %r %r" % (j, equiv_transactions[j], dup_o, dup_c)
            # Reset equiv_transactions and keep going if any more transactions are left
            equiv_transactions=[prop_ev_list[i]]
        
# This needs to be called once after imports are complete and before get_owned_parids
def register_all_sales():
    # Reset the owner and changeaddr caches since this will change the results
    reset_cache()
    
    # Add SALE records each time a property goes into different hands
    for parid in property_map.keys():
        register_sale_dates(parid)
   
def get_next_sale_date(parid, start_date):
    event_list = property_map[parid]
    # Get the index of the first transaction at or before this date
    start_index=find_event_index_by_date(event_list, start_date)

    # Starting with the transaction at or before the initial start date, 
    # go past all the equivalent transactions to the next one that's a real transfer.
    # If no future transfers, then return None
    equiv_transactions=[event_list[start_index]]

    for i in range(start_index,len(event_list)):
        # See if this new transaction is equivalent to the last item in the current list
        if(is_same_transaction(event_list[i],equiv_transactions[-1])):
            equiv_transactions.append(event_list[i])
            #print "  %d still equivalent %r" %(i, equiv_transactions)
        else:
            # This is a real tranfer, return the event date
            return event_list[i]['date']
    # If we get to here, there is no event after this date.  Return None
    return None

    
def get_owned_parids(name, event_list, eval_date):
    # Check if this name:date pair is in the cache.  If so, return the 
    # stored map of {'parids':owned_parids,'next_date':next_date}
    name_date_str=u"%s:%s"%(name,eval_date)
    if(name_date_str in owned_parids_cache):
        return(owned_parids_cache[name_date_str])

    # Return value wasn't in the cache, compute it
    
    # Use a set for accumulating parids since we don't want duplicates
    owned_parids=set()
    
    # In case eval_date is at or after the last date in the list, 
    # default next_date to today
    next_date = this_date
    for i in range(0,len(event_list)):
        if(event_list[i]['date']<= eval_date):
            # This event happened on or before the date we're asking about, process the event
            if(event_list[i]['event_type']=='PURCHASE'):
                # Add purchased property to owned_parids
                owned_parids.add(event_list[i]['parid'])
            else:
                # This must be a sale, remove it from owned_parids
                # Note that this can potentially fail in the case where 
                # two subsequent sales of the same property involve the same 
                # change address if the transactions happen as (add, 
                # (add, remove) from the same day, remove).  This actually 
                # happens with PARID == '0104R00158000000'.  So, we put this
                # in a try/catch block.  Bleh...
                try:
                    owned_parids.remove(event_list[i]['parid'])
                except:
                    pass
        else:
            # This and subsequent events happened after the date we're looking for.
            # Set next_date and return owned_parids
            next_date = event_list[i]['date']
            break

    ret_val = {'parids':owned_parids,'next_date':next_date}
    # Store in cache for next time
    owned_parids_cache[name_date_str]=ret_val
    return ret_val

# For a given parid and eval_date, get a map back containing:
#   'parids' = the set of parids owned by the same owners/changeaddrs as of eval_date, and
#   'next_date' = the next date that something changes within that set of owners/changeaddrs
# When the last of the related events is complete, 'next_date'==this_date
def get_related_parids(parid, eval_date):
    prop_ev_list = property_map[parid]
    parid_set = set()
    start_date = eval_date
    next_date = this_date

    # Keep a list of the equivalent transactions so we can accumulate the 
    # parids for all.  Seed it with the 0'th item in prop_ev_list
    equiv_transactions=[prop_ev_list[0]]
    # If date of the first event is later than eval_date, set eval_date to 
    # the first event.  We don't want to iterate over the earlier purchases by
    # the original owners we know about.  Just start at the first purchase
    # of this parcel we know about.
    if(eval_date<prop_ev_list[0]['date']):
        eval_date=prop_ev_list[0]['date']
        start_date=eval_date
    #print "%s: starting with %r" %(parid, equiv_transactions)
    for i in range(1,len(prop_ev_list)):
        # See if this new transaction is equivalent to the last item in the current list
        if(is_same_transaction(prop_ev_list[i],equiv_transactions[-1]) and 
           prop_ev_list[i]['date']<=eval_date):
            equiv_transactions.append(prop_ev_list[i])
            #print "  %d still equivalent %r" %(i, equiv_transactions)
        elif(prop_ev_list[i]['date']>eval_date):
            #print "  %d after timespan (%r), process current set" %(i,prop_ev_list[i])
            break
        else:
            # We haven't hit the end of the timespan yet, but the ownership has changed.
            # Reset equiv_transactions starting from the current event
            equiv_transactions=[prop_ev_list[i]]
            
    # We've got all the equivalent transactions, process them
    changeaddr_set=set()
    owner_set=set()
    for j in range(0,len(equiv_transactions)):
        owner_set.add(equiv_transactions[j]['ownername'])
        changeaddr_set.add(equiv_transactions[j]['changeaddr'])

    #print "    Owner set= %r\n    Changeaddr set= %r" % (owner_set, changeaddr_set)
    
    for owner in owner_set:
        ret_map=get_owned_parids(owner, owner_map[owner],eval_date)
        parid_set = parid_set.union(ret_map['parids'])
        if(ret_map['next_date']<next_date):
            next_date = ret_map['next_date']
    for changeaddr in changeaddr_set:
        ret_map=get_owned_parids(changeaddr, changeaddr_map[changeaddr],eval_date)
        parid_set = parid_set.union(ret_map['parids'])
        if(ret_map['next_date']<next_date):
            next_date = ret_map['next_date']

    ret_val={"parids":parid_set, "parcount":len(parid_set), "start_date":start_date, "next_date":next_date}
    #print "    Returning %r" % (ret_val)
    return ret_val

# Call this once for each assessment spreadsheet/database file.  It adds PURCHASE records if they don't already exist.
# It's safe to call multiple times on the same input file if necessary, but takes a lot of processing time
def process_all_assessment_records(apd):
    start=arrow.now()
    addcnt=0
    chunk_start_time=arrow.now()
    chunk_size=10000

    # Clear the owner and changeaddr caches as this will cause changes in the ownership profiles
    reset_cache()
    
    # Iterate over each record in the data frame and add to the maps
    for i in range(0,len(apd)):
        if(process_assessment_record(apd,i)):
            addcnt=addcnt+1
        if((i%chunk_size)==0 and i>0):
            print "%d-%d: %d added, %s time elapsed" %(i-(chunk_size-1), i, addcnt, arrow.now()-chunk_start_time)
            #break
            addcnt=0
            chunk_start_time=arrow.now()
    end=arrow.now()
    print "Processing took %s" % (str(end-start))

In [587]:
this_date = arrow.now().format('YYYY-MM-DD')

# Read in 2017 data and generate series dwelling_col which is True for residental PARIDs

In [6]:
# Read in assessments data as TDF, set index to PARID
path = "assessments/cd/AC Property Assessments_10012017.xls"
raw_pd_2017 = pd.read_csv(path,sep='\t', index_col='PARID',dtype={'PROPERTYHOUSENUM':numpy.str,'CHANGENOTICEADDRESS4':numpy.str, 'PROPERTYZIP':numpy.str})

  interactivity=interactivity, compiler=compiler, result=result)


In [262]:
# Consider a parcel a dwelling if either residential or apartments (usedesc includes APART)
# dwelling_col is indexed on PARID, has TRUE for dwellings, and FALSE for non-dwellings
vacant_col = raw_pd_2017.USEDESC.str.contains('VACANT')

# Tried to filter out vacant lots, but it led to less dramatic results
#dwelling_col = ((raw_pd_2017.CLASSDESC == 'RESIDENTIAL') | (raw_pd_2017.USEDESC.str.contains('APART'))) & (~vacant_col.astype('bool'))
dwelling_col = ((raw_pd_2017.CLASSDESC == 'RESIDENTIAL') | (raw_pd_2017.USEDESC.str.contains('APART')))
dwelling_col.name='is_dwelling'

In [263]:
# Only keep the subset that are dwellings
apd_2017 = pd.concat([raw_pd_2017, dwelling_col], axis=1)[dwelling_col]
len(apd_2017)

519728

In [422]:
# Process 15213 entries from apd_2017 into the property, owner, and changeaddr maps
#process_all_assessment_records(apd_2017[apd_2017.PROPERTYZIP=='15213'])

Processing took 0:00:02.110833


In [540]:
# Process entries from apd_2017 into the property, owner, and changeaddr maps
process_all_assessment_records(apd_2017)

1-10000: 0 added, 0:00:05.059451 time elapsed
10001-20000: 0 added, 0:00:04.587906 time elapsed
20001-30000: 0 added, 0:00:06.278301 time elapsed
30001-40000: 0 added, 0:00:04.796491 time elapsed
40001-50000: 0 added, 0:00:04.555983 time elapsed
50001-60000: 0 added, 0:00:05.036547 time elapsed
60001-70000: 0 added, 0:00:04.728418 time elapsed
70001-80000: 0 added, 0:00:05.532626 time elapsed
80001-90000: 0 added, 0:00:04.883779 time elapsed
90001-100000: 0 added, 0:00:05.080045 time elapsed
100001-110000: 0 added, 0:00:04.929137 time elapsed
110001-120000: 0 added, 0:00:04.312086 time elapsed
120001-130000: 0 added, 0:00:04.834830 time elapsed
130001-140000: 0 added, 0:00:04.253594 time elapsed
140001-150000: 0 added, 0:00:04.839972 time elapsed
150001-160000: 0 added, 0:00:05.017164 time elapsed
160001-170000: 0 added, 0:00:04.910971 time elapsed
170001-180000: 0 added, 0:00:05.015537 time elapsed
180001-190000: 0 added, 0:00:04.886419 time elapsed
190001-200000: 0 added, 0:00:04.482

# Deal with older versions of the assessment database from the ACCDB files from Bob Gradeck

In [485]:
# From https://stackoverflow.com/questions/17123550/extract-and-sort-data-from-mdb-file-using-mdbtools-in-python?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
import sys, subprocess, os
from io import StringIO
import pandas as pd
VERBOSE = True
def mdb_to_pandas(database_path):
    subprocess.call(["mdb-schema", database_path, "mysql"])
    # Get the list of table names with "mdb-tables"
    table_names = subprocess.Popen(["mdb-tables", "-1", database_path],
                                   stdout=subprocess.PIPE).communicate()[0]
    tables = table_names.splitlines()
    sys.stdout.flush()
    # Dump each table as a stringio using "mdb-export",
    out_tables = {}
    for rtable in tables:
        table = rtable.decode()
        if VERBOSE: print('running table:',table)
        if table != '':
            if VERBOSE: print("Dumping " + table)
            contents = subprocess.Popen(["mdb-export", database_path, table],
                                        stdout=subprocess.PIPE).communicate()[0]
            temp_io = StringIO(contents.decode('utf8'))
            print(table, temp_io)
            out_tables[table] = pd.read_csv(temp_io, encoding='utf-8')
    return out_tables
pd.options.display.max_colwidth = 300
pd.set_option('display.max_columns', 500)


In [343]:
# ACCDB files are from Bob Gradeck
# 2009 is missing YEARBLT, CLASSDESC, and CLASS

# A number of files after 2009 seem to have a common set of column names
post_2009_colmap = {'mapblolot':'PARID',
                    'FairMarketTotal':'FAIRMARKETTOTAL',
                    'UseDesc':'USEDESC',
                    'PropertyZip':'PROPERTYZIP',
                    'PropertyOwner2':'PROPERTYOWNER',
                    'ChangeNoticeFullAddress1':'CHANGENOTICEADDRESS1',
                    'ChangeNoticeFullAddress2':'CHANGENOTICEADDRESS2',
                    'ChangeNoticeFullAddress3':'CHANGENOTICEADDRESS3',
                    'ChangeNoticeFullAddress4':'CHANGENOTICEADDRESS4',
                    'HomesteadFlag':'HOMESTEADFLAG',
                    'OwnerDesc':'OWNERDESC',
                    'SaleDate':'SALEDATE',
                    'SalePrice':'SALEPRICE',
                    'SaleCode': 'SALECODE',
                    'SaleDesc': 'SALEDESC'
                    }
accdb_info = {'2009':{'fname':'assessments/gradeck/AssessmentSep09.mdb',
                      'tname':'pncis',
                     'col_remap': {'PIN':'PARID',
                                    'FAIRMARK_2':'FAIRMARKETTOTAL',
                                    'USEDESC':'USEDESC',
                                    'PROPERTYZI':'PROPERTYZIP',
                                    'PROPERTYOW':'PROPERTYOWNER',
                                    'CHANGENOTI':'CHANGENOTICEADDRESS1',
                                    'CHANGENO_1':'CHANGENOTICEADDRESS2',
                                    'CHANGENO_2':'CHANGENOTICEADDRESS3',
                                    'CHANGENO_3':'CHANGENOTICEADDRESS4',
                                    'HOMESTEADF':'HOMESTEADFLAG',
                                    'OWNERDESC':'OWNERDESC',
                                    'SALEDATE':'SALEDATE',
                                    'SALEPRICE':'SALEPRICE',
                                  }},
               '2010':{'fname':'assessments/gradeck/June2010AssessNew.accdb',
                       'tname':'RawDataAssessment',
                       'col_remap': post_2009_colmap
                      },
               '2011':{'fname':'assessments/gradeck/AssessMarch2011Data3.mdb.accdb',
                       'tname':'AssessMarch2011',
                       'col_remap': {'PIN':'PARID',
                                    'PropertyOwnerNew':'PROPERTYOWNER',
                                    'CHANGENOTICEFULLADDRESS1':'CHANGENOTICEADDRESS1',
                                    'CHANGENOTICEFULLADDRESS2':'CHANGENOTICEADDRESS2',
                                    'CHANGENOTICEFULLADDRESS3':'CHANGENOTICEADDRESS3',
                                    'CHANGENOTICEFULLADDRESS4':'CHANGENOTICEADDRESS4',
                                  }
                      },
               '2012':{'fname':'assessments/gradeck/AssessNov2012.mdb',
                       'tname':'AssessmentOct2_2012',
                       'col_remap': {}
                      },
               '2013':{'fname':'assessments/gradeck/Dec2013Assess.accdb',
                       'tname':'RawDataAssessment',
                       'col_remap': post_2009_colmap
                      },
               '2014':{'fname':'assessments/gradeck/June2014Assess.accdb',
                       'tname':'RawDataAssessment',
                       'col_remap': post_2009_colmap
                      },
               '2015':{'fname':'assessments/gradeck/May2015Assess.accdb',
                       'tname':'RawDataAssessment',
                       'col_remap': post_2009_colmap,
                      }
             }

In [231]:
# Uses accdb_info map to import a file for a given year and return a dataframe.  
# This doesn't remap the column names (we might not know them yet)
def import_accdb_file(year):
    # Import the accdb file for this year.  The returned value is a map of table names to tables
    accdb_table_map = mdb_to_pandas(accdb_info[year]['fname'])     
    apd_df = accdb_table_map[accdb_info[year]['tname']]
    return apd_df

# Takes a dataframe imported by import_accdb_file, renames columns, sets the index to PARID, 
# filters out rows that aren't dwellings, sets zipcode to be type string
def cleanup_accdb_import(df, year):
    r_df = df.rename(index=str, columns=accdb_info[year]['col_remap']).set_index('PARID')
    r_df = pd.merge(r_df,dwelling_col.to_frame(),on='PARID', left_on=None, right_on=None,
                     left_index=False, right_index=False, sort=False,
                     suffixes=('_x', '_y'), copy=True, indicator=False,
                     validate=None)
    print "After merge, size = %d" % (len(r_df))

    r_df = r_df[r_df.is_dwelling]
    print "After filtering to only include dwellings, size = %d" % (len(r_df))

    # Change ',  -' in CHANGENOTICEADDRESS3 to nan (that's a common pattern in these accdb files)
    bad_changeaddr = r_df.CHANGENOTICEADDRESS3==',  -'
    r_df.loc[bad_changeaddr,'CHANGENOTICEADDRESS3']= numpy.nan

    # Filter out empty owner names
    empty_owner = r_df.PROPERTYOWNER.isna()

    # Filter out empty change addresses
    p1=pandas.isna(r_df.CHANGENOTICEADDRESS1) 
    p2=pandas.isna(r_df.CHANGENOTICEADDRESS2) 
    p3=pandas.isna(r_df.CHANGENOTICEADDRESS3)
    p4=pandas.isna(r_df.CHANGENOTICEADDRESS4)
    r_df = r_df[~((p1&p2&p3&p4)|empty_owner)]
    print "After filtering to remove empty owner names and change addresses, size = %d" % (len(r_df))

    # Setup column types where needed
    r_df['PROPERTYZIP'] = r_df['PROPERTYZIP'].astype(basestring)
    
    # Fix up <BR> to be &
    r_df.PROPERTYOWNER = r_df.PROPERTYOWNER.str.replace('\s*<BR>\s*', ' & ', regex=True)
    return r_df

# Process 2015

In [496]:
apd_2015_raw = import_accdb_file('2015') 

('running table:', u'LandUseCodes')
Dumping LandUseCodes
(u'LandUseCodes', <_io.StringIO object at 0x7f9cf2f4a050>)
('running table:', u'MasterListPotentialREO')
Dumping MasterListPotentialREO
(u'MasterListPotentialREO', <_io.StringIO object at 0x7f9d6aaa8350>)
('running table:', u'Name AutoCorrect Save Failures')
Dumping Name AutoCorrect Save Failures
(u'Name AutoCorrect Save Failures', <_io.StringIO object at 0x7f9d6aaa8750>)
('running table:', u'OwnerCodeDefinition')
Dumping OwnerCodeDefinition
(u'OwnerCodeDefinition', <_io.StringIO object at 0x7f9ccdb652d0>)
('running table:', u'RawDataAssessment')
Dumping RawDataAssessment
(u'RawDataAssessment', <_io.StringIO object at 0x7f9d6aaa8350>)
('running table:', u'RawDataAssessment Table Layout')
Dumping RawDataAssessment Table Layout
(u'RawDataAssessment Table Layout', <_io.StringIO object at 0x7f9ccd3b38d0>)
('running table:', u'SaleDataFull')
Dumping SaleDataFull
(u'SaleDataFull', <_io.StringIO object at 0x7f9c823c1bd0>)
('running tabl

In [497]:
apd_2015 = cleanup_accdb_import(apd_2015_raw, '2015')

After merge, size = 577753
After filtering to only include dwellings, size = 519223
After filtering to remove empty owner names and change addresses, size = 506872


In [None]:
list(apd_2015)

In [498]:
process_all_assessment_records(apd_2015)

1-10000: 1517 added, 0:00:05.867339 time elapsed
10001-20000: 1407 added, 0:00:04.489289 time elapsed
20001-30000: 1427 added, 0:00:04.819603 time elapsed
30001-40000: 1334 added, 0:00:04.969159 time elapsed
40001-50000: 1274 added, 0:00:05.379638 time elapsed
50001-60000: 1384 added, 0:00:05.167290 time elapsed
60001-70000: 1399 added, 0:00:05.008074 time elapsed
70001-80000: 1293 added, 0:00:04.626940 time elapsed
80001-90000: 1321 added, 0:00:04.544026 time elapsed
90001-100000: 1572 added, 0:00:05.238416 time elapsed
100001-110000: 1307 added, 0:00:05.137790 time elapsed
110001-120000: 1388 added, 0:00:04.622946 time elapsed
120001-130000: 1272 added, 0:00:05.403689 time elapsed
130001-140000: 1098 added, 0:00:05.451560 time elapsed
140001-150000: 1124 added, 0:00:05.181060 time elapsed
150001-160000: 1398 added, 0:00:05.196724 time elapsed
160001-170000: 1300 added, 0:00:04.792961 time elapsed
170001-180000: 1203 added, 0:00:05.596729 time elapsed
180001-190000: 1221 added, 0:00:0

# Process 2014

In [499]:
apd_2014_raw = import_accdb_file('2014') 

('running table:', u'LandUseCodes')
Dumping LandUseCodes
(u'LandUseCodes', <_io.StringIO object at 0x7f9c3dc874d0>)
('running table:', u'MasterListPotentialREO')
Dumping MasterListPotentialREO
(u'MasterListPotentialREO', <_io.StringIO object at 0x7f9c15490250>)
('running table:', u'Name AutoCorrect Save Failures')
Dumping Name AutoCorrect Save Failures
(u'Name AutoCorrect Save Failures', <_io.StringIO object at 0x7f9c15490650>)
('running table:', u'OwnerCodeDefinition')
Dumping OwnerCodeDefinition
(u'OwnerCodeDefinition', <_io.StringIO object at 0x7f9c14e3fad0>)
('running table:', u'RawDataAssessment')
Dumping RawDataAssessment
(u'RawDataAssessment', <_io.StringIO object at 0x7f9c15490250>)
('running table:', u'RawDataAssessment Table Layout')
Dumping RawDataAssessment Table Layout
(u'RawDataAssessment Table Layout', <_io.StringIO object at 0x7f9c14424a50>)
('running table:', u'SaleDataFull')
Dumping SaleDataFull
(u'SaleDataFull', <_io.StringIO object at 0x7f9ab6d1ccd0>)
('running tabl

In [500]:
apd_2014 = cleanup_accdb_import(apd_2014_raw, '2014')

After merge, size = 576705
After filtering to only include dwellings, size = 518307
After filtering to remove empty owner names and change addresses, size = 505834


In [501]:
process_all_assessment_records(apd_2014)

1-10000: 266 added, 0:00:05.618758 time elapsed
10001-20000: 226 added, 0:00:04.789025 time elapsed
20001-30000: 223 added, 0:00:05.533156 time elapsed
30001-40000: 205 added, 0:00:05.346532 time elapsed
40001-50000: 256 added, 0:00:05.375026 time elapsed
50001-60000: 225 added, 0:00:05.805101 time elapsed
60001-70000: 245 added, 0:00:05.383229 time elapsed
70001-80000: 239 added, 0:00:05.206946 time elapsed
80001-90000: 251 added, 0:00:05.517752 time elapsed
90001-100000: 285 added, 0:00:05.431181 time elapsed
100001-110000: 243 added, 0:00:05.211462 time elapsed
110001-120000: 261 added, 0:00:05.428997 time elapsed
120001-130000: 258 added, 0:00:04.613174 time elapsed
130001-140000: 161 added, 0:00:04.925088 time elapsed
140001-150000: 211 added, 0:00:05.429947 time elapsed
150001-160000: 273 added, 0:00:04.913482 time elapsed
160001-170000: 240 added, 0:00:04.764811 time elapsed
170001-180000: 204 added, 0:00:06.020577 time elapsed
180001-190000: 205 added, 0:00:05.418098 time elaps

# Process 2013

In [502]:
apd_2013_raw = import_accdb_file('2013') 

('running table:', u'Name AutoCorrect Save Failures')
Dumping Name AutoCorrect Save Failures
(u'Name AutoCorrect Save Failures', <_io.StringIO object at 0x7f9c3dc873d0>)
('running table:', u'OwnerCodeDefinition')
Dumping OwnerCodeDefinition
(u'OwnerCodeDefinition', <_io.StringIO object at 0x7f9b2e4ab0d0>)
('running table:', u'RawDataAssessment')
Dumping RawDataAssessment
(u'RawDataAssessment', <_io.StringIO object at 0x7f9b2e4ab750>)
('running table:', u'RawDataAssessment Table Layout')
Dumping RawDataAssessment Table Layout
(u'RawDataAssessment Table Layout', <_io.StringIO object at 0x7f9b2ddff1d0>)
('running table:', u'SaleDataFull')
Dumping SaleDataFull
(u'SaleDataFull', <_io.StringIO object at 0x7f9a4a166bd0>)
('running table:', u'SaleDetails')
Dumping SaleDetails
(u'SaleDetails', <_io.StringIO object at 0x7f9a498b3dd0>)
('running table:', u'tblOwnerAddressCount')
Dumping tblOwnerAddressCount
(u'tblOwnerAddressCount', <_io.StringIO object at 0x7f9a481572d0>)
('running table:', u'Un

In [488]:
apd_2013 = cleanup_accdb_import(apd_2013_raw, '2013')

After merge, size = 575195
After filtering to only include dwellings, size = 517123
After filtering to remove empty owner names and change addresses, size = 504628


In [489]:
apd_2013.loc['0191H00034000000'].PROPERTYOWNER

u'ROBERT E KELLY JR REVOCABLE TRUST       ROBERT E KELLY JR \u0393\xc7\xf4 TRUSTEE & BRI'

In [491]:
process_all_assessment_records(apd_2013)

1-10000: 9797 added, 0:00:05.047437 time elapsed
10001-20000: 9897 added, 0:00:04.349531 time elapsed
20001-30000: 9805 added, 0:00:04.347125 time elapsed
30001-40000: 9922 added, 0:00:04.748191 time elapsed
40001-50000: 9957 added, 0:00:04.252485 time elapsed
50001-60000: 9881 added, 0:00:04.917269 time elapsed
60001-70000: 9915 added, 0:00:04.663241 time elapsed
70001-80000: 9971 added, 0:00:05.381502 time elapsed
80001-90000: 9901 added, 0:00:05.187847 time elapsed
90001-100000: 9896 added, 0:00:05.320142 time elapsed
100001-110000: 9900 added, 0:00:04.745531 time elapsed
110001-120000: 9980 added, 0:00:04.537871 time elapsed
120001-130000: 9835 added, 0:00:05.404307 time elapsed
130001-140000: 9923 added, 0:00:05.861283 time elapsed
140001-150000: 9873 added, 0:00:04.975491 time elapsed
150001-160000: 9884 added, 0:00:04.953024 time elapsed
160001-170000: 9909 added, 0:00:05.259866 time elapsed
170001-180000: 9938 added, 0:00:05.342598 time elapsed
180001-190000: 9951 added, 0:00:0

# SKIP 2012

In [None]:
# PROBLEM:
#  SALEDATE field is MM?DDYYYY, which is being interpreted as a float
#    9042007  = '2007-09-04'
#  This is from property_map['0467M00177000000']
# SKIP FOR NOW


In [None]:
apd_2012_raw = import_accdb_file('2012') 

In [291]:
apd_2012 = cleanup_accdb_import(apd_2012_raw, '2012')


After merge, size = 572899
After filtering to only include dwellings, size = 515052
After filtering to remove empty owner names and change addresses, size = 515052


In [292]:
process_all_assessment_records(apd_2012)

TypeError: buffer size mismatch

# Process 2011

In [503]:
apd_2011_raw = import_accdb_file('2011') 

('running table:', u'AssessMarch2011')
Dumping AssessMarch2011
(u'AssessMarch2011', <_io.StringIO object at 0x7f9c23829c50>)
('running table:', u'MasterListPotentialREO')
Dumping MasterListPotentialREO
(u'MasterListPotentialREO', <_io.StringIO object at 0x7f9c39c75550>)
('running table:', u'OwnerCodeDefinition')
Dumping OwnerCodeDefinition
(u'OwnerCodeDefinition', <_io.StringIO object at 0x7f9a29da0ad0>)
('running table:', u'Universe of Potential Public Owners')
Dumping Universe of Potential Public Owners
(u'Universe of Potential Public Owners', <_io.StringIO object at 0x7f9a296b2550>)
('running table:', u'ChangeNoticeCount')
Dumping ChangeNoticeCount
(u'ChangeNoticeCount', <_io.StringIO object at 0x7f9c39c75550>)


In [504]:
apd_2011 = cleanup_accdb_import(apd_2011_raw, '2011')

After merge, size = 566424
After filtering to only include dwellings, size = 509082
After filtering to remove empty owner names and change addresses, size = 508830


In [505]:
process_all_assessment_records(apd_2011)

1-10000: 9341 added, 0:00:04.501821 time elapsed
10001-20000: 9450 added, 0:00:04.474968 time elapsed
20001-30000: 9756 added, 0:00:04.939104 time elapsed
30001-40000: 9638 added, 0:00:05.174120 time elapsed
40001-50000: 9763 added, 0:00:04.502422 time elapsed
50001-60000: 9632 added, 0:00:04.686311 time elapsed
60001-70000: 9763 added, 0:00:05.465233 time elapsed
70001-80000: 9735 added, 0:00:05.216360 time elapsed
80001-90000: 9644 added, 0:00:05.685269 time elapsed
90001-100000: 9535 added, 0:00:05.639264 time elapsed
100001-110000: 9420 added, 0:00:05.574873 time elapsed
110001-120000: 9681 added, 0:00:04.861739 time elapsed
120001-130000: 9725 added, 0:00:04.739321 time elapsed
130001-140000: 9813 added, 0:00:04.588998 time elapsed
140001-150000: 9708 added, 0:00:04.881982 time elapsed
150001-160000: 9509 added, 0:00:04.705842 time elapsed
160001-170000: 9777 added, 0:00:05.290132 time elapsed
170001-180000: 9580 added, 0:00:05.027809 time elapsed
180001-190000: 9698 added, 0:00:0

# Process 2010

In [506]:
apd_2010_raw = import_accdb_file('2010') 

('running table:', u'LandUseCodes')
Dumping LandUseCodes
(u'LandUseCodes', <_io.StringIO object at 0x7f9c3dc871d0>)
('running table:', u'MasterListPotentialREO')
Dumping MasterListPotentialREO
(u'MasterListPotentialREO', <_io.StringIO object at 0x7f9a2806a6d0>)
('running table:', u'Name AutoCorrect Save Failures')
Dumping Name AutoCorrect Save Failures
(u'Name AutoCorrect Save Failures', <_io.StringIO object at 0x7f9a2806aad0>)
('running table:', u'OwnerCodeDefinition')
Dumping OwnerCodeDefinition
(u'OwnerCodeDefinition', <_io.StringIO object at 0x7f9a27af30d0>)
('running table:', u'RawDataAssessment Table Layout')
Dumping RawDataAssessment Table Layout
(u'RawDataAssessment Table Layout', <_io.StringIO object at 0x7f9a2806a6d0>)
('running table:', u'SaleDataFull')
Dumping SaleDataFull
(u'SaleDataFull', <_io.StringIO object at 0x7f9a27af31d0>)
('running table:', u'SaleDataFullYear2013')
Dumping SaleDataFullYear2013
(u'SaleDataFullYear2013', <_io.StringIO object at 0x7f9a27af3f50>)
('run

In [507]:
apd_2010 = cleanup_accdb_import(apd_2010_raw, '2010')

After merge, size = 564759
After filtering to only include dwellings, size = 507726
After filtering to remove empty owner names and change addresses, size = 500552


In [508]:
process_all_assessment_records(apd_2010)

1-10000: 4532 added, 0:00:04.662122 time elapsed
10001-20000: 4356 added, 0:00:04.342665 time elapsed
20001-30000: 4820 added, 0:00:04.886731 time elapsed
30001-40000: 4525 added, 0:00:05.122695 time elapsed
40001-50000: 4208 added, 0:00:05.165239 time elapsed
50001-60000: 4327 added, 0:00:04.827004 time elapsed
60001-70000: 4598 added, 0:00:04.906871 time elapsed
70001-80000: 4333 added, 0:00:04.789660 time elapsed
80001-90000: 4465 added, 0:00:05.401239 time elapsed
90001-100000: 4456 added, 0:00:05.064490 time elapsed
100001-110000: 4832 added, 0:00:05.279100 time elapsed
110001-120000: 5215 added, 0:00:05.139622 time elapsed
120001-130000: 4756 added, 0:00:05.344353 time elapsed
130001-140000: 3706 added, 0:00:05.494263 time elapsed
140001-150000: 4523 added, 0:00:05.081339 time elapsed
150001-160000: 4177 added, 0:00:04.780596 time elapsed
160001-170000: 4644 added, 0:00:05.251841 time elapsed
170001-180000: 5021 added, 0:00:05.077111 time elapsed
180001-190000: 4715 added, 0:00:0

# Process 2009

In [509]:
apd_2009_raw = import_accdb_file('2009') 

('running table:', u'pncis')
Dumping pncis
(u'pncis', <_io.StringIO object at 0x7f9d2fd9ef50>)


In [510]:
apd_2009 = cleanup_accdb_import(apd_2009_raw, '2009')

After merge, size = 562378
After filtering to only include dwellings, size = 505806
After filtering to remove empty owner names and change addresses, size = 500042


In [511]:
process_all_assessment_records(apd_2009)

1-10000: 1304 added, 0:00:05.463582 time elapsed
10001-20000: 1365 added, 0:00:05.568963 time elapsed
20001-30000: 1530 added, 0:00:04.924514 time elapsed
30001-40000: 1387 added, 0:00:05.150892 time elapsed
40001-50000: 1458 added, 0:00:05.091715 time elapsed
50001-60000: 1519 added, 0:00:05.092926 time elapsed
60001-70000: 1485 added, 0:00:05.329167 time elapsed
70001-80000: 1434 added, 0:00:05.349812 time elapsed
80001-90000: 1445 added, 0:00:05.081766 time elapsed
90001-100000: 1351 added, 0:00:04.474183 time elapsed
100001-110000: 1348 added, 0:00:04.453870 time elapsed
110001-120000: 1535 added, 0:00:05.402858 time elapsed
120001-130000: 1233 added, 0:00:05.536183 time elapsed
130001-140000: 2023 added, 0:00:05.021823 time elapsed
140001-150000: 1590 added, 0:00:04.993244 time elapsed
150001-160000: 1430 added, 0:00:05.553492 time elapsed
160001-170000: 1387 added, 0:00:05.301996 time elapsed
170001-180000: 1485 added, 0:00:04.794184 time elapsed
180001-190000: 1292 added, 0:00:0

# After input processing is done, register all sales so we know when owners stop owning given parcels

In [512]:
start=arrow.now()
register_all_sales()
end=arrow.now()
print "Processing took %s" % (str(end-start))

Processing took 0:00:13.707817


# Debugging

In [440]:
apd_20.SALEDESC.value_counts()

VALID SALE                          141204
LOVE AND AFFECTION SALE             109656
UNVERIFIED DECLARED VALID SALE       40437
OTHER INVALID SALES INDICATED        36991
BRAND NEW SALE NOT ANALYZED          24141
MULTI-PARCEL SALE                    18471
THE FLAG CANNOT BE DETERMINED        15589
NOT APPLICABLE                        9730
SHERIFF SALE                          5224
OUTLIER SALE                          3255
COMBINATIONS AND SPLITS               2155
CITY TREASURER SALE                   1446
VACANT LAND SALE                      1110
CHANGED AFTER SALE                     862
UNVERIFIED DECLARED VALID SALE X       436
INTERIM                                193
OWNER SUPPLIED AND VALID               167
VALID SALE excluded from MKT            95
??                                      94
OWNER SUPPLIED SALE                      4
Name: SALEDESC, dtype: int64

In [340]:
apd_2015.SALECODE.value_counts()

AttributeError: 'DataFrame' object has no attribute 'SALECODE'

In [327]:
apd_2017.loc['0084J00185000000']

PROPERTYOWNER                   SEABROOKE J THOMAS
PROPERTYHOUSENUM                              5825
PROPERTYFRACTION                                  
PROPERTYADDRESS                          PIERCE ST
PROPERTYCITY                            PITTSBURGH
PROPERTYSTATE                                   PA
PROPERTYUNIT                                      
PROPERTYZIP                                  15232
MUNICODE                                       107
MUNIDESC                     7th Ward - PITTSBURGH
SCHOOLCODE                                      47
SCHOOLDESC                      City Of Pittsburgh
LEGAL1                PT 51 LOT 16.18X63 PIERCE ST
LEGAL2                         2 STY BRK HSE #5825
LEGAL3                                         NaN
NEIGHCODE                                    10703
NEIGHDESC                                SHADYSIDE
TAXCODE                                          T
TAXDESC                               20 - Taxable
TAXSUBCODE                     

In [325]:
property_map['0029C00050000000']

[{'changeaddr': '1914 WATSON ST, PITTSBURGH, PA 15219',
  'date': u'1977-01-20',
  'event_type': 'PURCHASE',
  'ownername': 'PRYOR WILLIAM D'},
 {'changeaddr': '414 GRANT ST RM 200, CITY-COUNTY BUILDING, PITTSBURGH, PA 15219',
  'date': u'2017-04-28',
  'event_type': 'PURCHASE',
  'ownername': 'CITY OF PITTSBURGH'}]

In [321]:
property_map['0084J00185000000']

[{'changeaddr': '4124 BUTLER ST, PITTSBURGH, PA 15201',
  'date': u'2003-09-11',
  'event_type': 'PURCHASE',
  'ownername': 'SEABROOKE JOSEPH A & J THOMAS SEABROOKE'},
 {'changeaddr': '4124 BUTLER ST STE A, PITTSBURGH, PA 15201',
  'date': u'2009-10-06',
  'event_type': 'PURCHASE',
  'ownername': 'SEABROOKE J THOMAS'},
 {'changeaddr': '4124 BUTLER ST, PITTSBURGH, PA 15201',
  'date': u'2009-10-06',
  'event_type': 'PURCHASE',
  'ownername': 'SEABROOKE J THOMAS'}]

In [315]:
property_map['0028H00014000000']=[{'changeaddr': '3535 BLVD OF THE ALLIES, PITTSBURGH, PA 15213',
  'date': u'1986-08-25',
  'event_type': 'PURCHASE',
  'ownername': 'KELLY ROBERT E JR & KELLY ROBERT E JR WILLIAM W RIELLY ROBERT B NELL JR & HOMER E STOTLER'},
 {'changeaddr': '3535 BLVD OF THE ALLIES, PITTSBURGH, PA 15213',
  'date': u'1986-08-25',
  'event_type': 'PURCHASE',
  'ownername': 'KELLY ROBERT E JR & RIELLY WILLIAM W & NELL ROBE'},
 {'changeaddr': '3535 BLVD OF THE ALLIES, PITTSBURGH, PA 15213',
  'date': u'2012-12-18',
  'event_type': 'PURCHASE',
  'ownername': 'ROBERT E KELLY JR REVOCABLE TRUST'},
 {'changeaddr': '3535 BLVD OF THE ALLIES, PITTSBURGH, PA 15213',
  'date': u'2012-12-18',
  'event_type': 'PURCHASE',
  'ownername': 'ROBERT E KELLY JR REVOCABLE TRUST & WILLIAM W RIELLY REVOCABLE TRU'},
 {'changeaddr': '3535 BLVD OF THE ALLIES, PITTSBURGH, PA 15213',
  'date': u'2012-12-18',
  'event_type': 'PURCHASE',
  'ownername': 'ROBERT E KELLY JR REVOCABLE TRUST ROBERT E KELLY JR TRUSTEE & WIL'}]

In [316]:
owner_map['ROBERT E KELLY JR REVOCABLE TRUST ROBERT E KELLY JR TRUSTEE & WIL']=owner_map['ROBERT E KELLY JR REVOCABLE TRUST ROBERT E KELLY JR \xce\x93\xc3\x87\xc3\xb4 TRUSTEE & WIL']

In [275]:
changeaddr_map['8 WATSON ST, CARNEGIE, PA 15106']

[{'date': u'2003-01-08',
  'event_type': 'PURCHASE',
  'parid': '0104R00160000000'},
 {'date': u'2007-09-12',
  'event_type': 'PURCHASE',
  'parid': '0104R00162000000'},
 {'date': u'2010-02-01',
  'event_type': 'PURCHASE',
  'parid': '0104R00158000000'},
 {'date': u'2012-07-20',
  'event_type': 'PURCHASE',
  'parid': '0104R00158000000'},
 {'date': u'2012-07-20', 'event_type': 'SALE', 'parid': '0104R00158000000'},
 {'date': u'2016-09-09', 'event_type': 'SALE', 'parid': '0104R00158000000'}]

In [323]:
owner_map['STANDARD REALTY GROUP LP']

[{'date': u'2003-12-12',
  'event_type': 'PURCHASE',
  'parid': '0084J00135000000'},
 {'date': u'2003-12-12',
  'event_type': 'PURCHASE',
  'parid': '0084J00206000000'},
 {'date': u'2004-11-19',
  'event_type': 'PURCHASE',
  'parid': '0084J00153000000'},
 {'date': u'2004-12-23',
  'event_type': 'PURCHASE',
  'parid': '0025D00155000400'},
 {'date': u'2004-12-23',
  'event_type': 'PURCHASE',
  'parid': '0025D00155000900'},
 {'date': u'2004-12-23',
  'event_type': 'PURCHASE',
  'parid': '0048M00024000000'},
 {'date': u'2004-12-23',
  'event_type': 'PURCHASE',
  'parid': '0084J00172000000'},
 {'date': u'2004-12-23',
  'event_type': 'PURCHASE',
  'parid': '0084J00207000000'},
 {'date': u'2004-12-23',
  'event_type': 'PURCHASE',
  'parid': '0084J00178000000'},
 {'date': u'2004-12-23',
  'event_type': 'PURCHASE',
  'parid': '0084J00210000000'},
 {'date': u'2004-12-23',
  'event_type': 'PURCHASE',
  'parid': '0084J00211000000'},
 {'date': u'2004-12-23',
  'event_type': 'PURCHASE',
  'parid': '

In [324]:
# Sanity check it's working
next_date = '1970-01-01'
parid='0084J00135000000'
print property_map[parid]
while(next_date<this_date):
    ret=get_related_parids(parid, next_date)
    print "%s: %s - %s = %d" % (parid, ret['start_date'], ret['next_date'], ret['parcount'])
    next_date=ret['next_date']

[{'date': u'2003-12-12', 'changeaddr': '4124 BUTLER ST, PITTSBURGH, PA 15201', 'event_type': 'PURCHASE', 'ownername': 'STANDARD REALTY GROUP LP'}]
0084J00135000000: 2003-12-12 - 2004-02-24 = 103
0084J00135000000: 2004-02-24 - 2004-11-19 = 104
0084J00135000000: 2004-11-19 - 2004-12-23 = 85
0084J00135000000: 2004-12-23 - 2005-03-28 = 117
0084J00135000000: 2005-03-28 - 2006-05-17 = 118
0084J00135000000: 2006-05-17 - 2006-06-07 = 123
0084J00135000000: 2006-06-07 - 2007-04-11 = 124
0084J00135000000: 2007-04-11 - 2008-10-02 = 125
0084J00135000000: 2008-10-02 - 2009-09-29 = 126
0084J00135000000: 2009-09-29 - 2009-09-30 = 125
0084J00135000000: 2009-09-30 - 2009-10-01 = 115
0084J00135000000: 2009-10-01 - 2009-10-02 = 101
0084J00135000000: 2009-10-02 - 2009-10-05 = 92
0084J00135000000: 2009-10-05 - 2009-10-06 = 84
0084J00135000000: 2009-10-06 - 2009-10-07 = 74
0084J00135000000: 2009-10-07 - 2009-10-09 = 53
0084J00135000000: 2009-10-09 - 2010-11-18 = 52
0084J00135000000: 2010-11-18 - 2011-02-02 =

In [241]:
# TORKEO ROBERT V
next_date = '1970-01-01'
parid='0029H00175000000'
print property_map[parid]
while(next_date<this_date):
    ret=get_related_parids(parid, next_date)
    print "%s: %s - %s = %d" % (parid, ret['start_date'], ret['next_date'], ret['parcount'])
    next_date=ret['next_date']

[{'date': u'2001-09-13', 'changeaddr': '3152 GRIFFITH ST, PITTSBURGH, PA 15213', 'event_type': 'PURCHASE', 'ownername': 'TORKEO ROBERT V'}]
0029H00175000000: 2001-09-13 - 2002-06-07 = 11
0029H00175000000: 2002-06-07 - 2002-10-30 = 12
0029H00175000000: 2002-10-30 - 2011-03-09 = 13
0029H00175000000: 2011-03-09 - 2013-01-29 = 14
0029H00175000000: 2013-01-29 - 2013-09-16 = 15
0029H00175000000: 2013-09-16 - 2018-08-09 = 16


In [377]:
property_map[property_map.keys()[1]]

[{'changeaddr': '4 OAKLAND SQ, PITTSBURGH, PA 15213',
  'date': u'1964-09-11',
  'event_type': 'PURCHASE',
  'ownername': 'SCIULLI ERRICO & OLIMPIA ITALI'}]

# Process geometries to be able to record lat/lon for properties

In [19]:
#with open("assessments/Allegheny_County_Parcel_Boundaries.geojson") as f:
#    parcel_json = json.load(f)
#len(parcel_json['features'])
g = gpd.read_file('assessments/Allegheny_County_Parcel_Boundaries.geojson')

g=g.set_index('PIN')

# Calculate the centroid for each row and store in a new column called centroid
g['centroid']=g['geometry'].centroid

In [96]:
# Create a map from PARID to centroid
parid2centroid = {g.index[i]:g['centroid'].iloc[i] for i in range(0, len(g.index))}

In [20]:
c = g.loc['0029G00269000000']['centroid']
print "(%f, %f)" % (c.x,c.y)

(-79.953716, 40.430757)


In [21]:
PointToPixelXY(g.loc['0029G00269000000']['centroid'])

[71.14402403468608, 96.51512692091366]

# New volume animation

In [22]:
def color_from_floats(r,g,b):
    return r + g * 256.0 + b * 256.0 * 256.0

def pack_color(color):
    return color['r'] + color['g'] * 256.0 + color['b'] * 256.0 * 256.0;

def parse_color(color):
    color = color.strip()
    c = color
    try:
        if c[0] == '#':
            c = c[1:]
        if len(c) == 3:
            return pack_color({'r': 17 * int(c[0:1], 16),
                               'g': 17 * int(c[1:2], 16),
                               'b': 17 * int(c[2:3], 16)})
        if len(c) == 6:
            return pack_color({'r': int(c[0:2], 16),
                               'g': int(c[2:4], 16),
                               'b': int(c[4:6], 16)})
    except:
        pass
    raise InvalidUsage('Cannot parse color <code><b>%s</b></code> from spreadsheet.<br><br>Color must be in standard web form, <code><b>#RRGGBB</b></code>, where RR, GG, and BB are each two-digit hexadecimal numbers between 00 and FF.<br><br>See <a href="https://www.w3schools.com/colors/colors_picker.asp">HTML Color Picker</a>' % color)

def parse_colors(colors):
    packed = [parse_color(color) for color in colors]
    return numpy.array(packed, dtype = numpy.float32)

In [465]:
out_suffix="_09_10_11_13_14_15_17"
#out_suffix="_15213_17"

In [423]:
start=arrow.now()

# Use the property_map, owner_map, and changeaddr_map to generate a new type of volume animation
# Write out a binary file with the volume colors
vol_colors = ['#a50026','#cd2827','#e75436','#f7804b','#fdad61','#fed788','#ffffbf','#b9e0ed','#8dc0db','#699fca','#4d7db9','#3e5aa7','#313695']
def volume_to_color(volume):
    if (volume < 2):
        return parse_color(vol_colors[0]) 
    #elif (volume < 3):
    #    return parse_color(vol_colors[1]) 
    #elif (volume < 4):
    #    return parse_color(vol_colors[2]) 
    elif (volume < 5):
        return parse_color(vol_colors[3]) 
    elif (volume < 10):
        return parse_color(vol_colors[4]) 
    elif (volume < 20):
        return parse_color(vol_colors[5]) 
    elif (volume < 40):
        return parse_color(vol_colors[6]) 
    elif (volume < 60):
        return parse_color(vol_colors[7]) 
    elif (volume < 80):
        return parse_color(vol_colors[8]) 
    elif (volume < 150):
        return parse_color(vol_colors[9]) 
    elif (volume < 300):
        return parse_color(vol_colors[10]) 
    elif (volume < 500):
        return parse_color(vol_colors[11]) 
    else:
        return parse_color(vol_colors[12]) 

def output_volume_dots():
    # Write out volume of ownership for each residential non-vacant land property
    points = []
    i=0
    start=arrow.now()
    chunk_start_time=arrow.now()
    chunk_size=1000

    for parid in property_map.keys():
        centroid=None
        try:
            centroid = parid2centroid[parid]
        except:
            print "%s is missing from centroids, skipping" % (parid)
            continue

        try:
            # Handle periodic debug message
            if((i%chunk_size)==0 and i>0):
                print "%d-%d: processing %r, %s time elapsed" %(i-(chunk_size-1), i, parid, arrow.now()-chunk_start_time)
                addcnt=0
                chunk_start_time=arrow.now()

            # Nominally start at 1950
            next_date = '1950-01-01'

            while(next_date<this_date):
                volume_map=get_related_parids(parid, next_date)

                next_date=volume_map['next_date']
                color = volume_to_color(volume_map['parcount'])
                if((i%chunk_size)==0):
                    print "  %s: %s - %s = %d" % (parid, volume_map['start_date'], volume_map['next_date'], volume_map['parcount'])
                saledate = SaledateToEpoch(volume_map['start_date'])
                enddate = SaledateToEpoch(next_date)
                if(color != None):
                    points += PointToPixelXY(centroid)     
                    points.append(color)
                    # Put epoch time for SALEDATE as start valid time, and next_date as end valid time
                    points.append(float(saledate))
                    points.append(float(enddate))
                else:
                    print "Color of " + str(volume_map['parcount']) + " is None"
        except:
            print "Unexpected error processing %s, next_date=%s:" % (parid, next_date), sys.exc_info()[0]
            #raise

        #Increment debug message counter
        i=i+1

    array.array('f', points).tofile(open(('assessments/res_volume_color_m_epoch%s.bin'%out_suffix), 'wb'))

    end=arrow.now()
    print "Processing took %s" % (str(end-start))
    
#import cProfile
#cProfile.run('output_volume_dots()')
output_volume_dots()


  0011F00188000000: 2008-11-17 - 2009-10-06 = 34
  0011F00188000000: 2009-10-06 - 2010-02-16 = 35
  0011F00188000000: 2010-02-16 - 2011-02-08 = 36
  0011F00188000000: 2011-02-08 - 2012-01-11 = 37
  0011F00188000000: 2012-01-11 - 2012-09-12 = 48
  0011F00188000000: 2012-09-12 - 2012-10-26 = 52
  0011F00188000000: 2012-10-26 - 2013-04-26 = 57
  0011F00188000000: 2013-04-26 - 2013-10-25 = 59
  0011F00188000000: 2013-10-25 - 2014-10-24 = 60
  0011F00188000000: 2014-10-24 - 2015-02-04 = 66
  0011F00188000000: 2015-02-04 - 2015-09-22 = 69
  0011F00188000000: 2015-09-22 - 2015-10-30 = 70
  0011F00188000000: 2015-10-30 - 2016-04-29 = 72
  0011F00188000000: 2016-04-29 - 2016-08-19 = 75
  0011F00188000000: 2016-08-19 - 2017-04-28 = 79
  0011F00188000000: 2017-04-28 - 2018-08-11 = 80
1-1000: processing '0051P00166000000', 0:00:00.399189 time elapsed
  0051P00166000000: 2001-07-20 - 2006-08-04 = 2
  0051P00166000000: 2006-08-04 - 2010-08-02 = 3
  0051P00166000000: 2010-08-02 - 2013-11-20 = 4
  005

In [601]:
start=arrow.now()

single_property_color = '#a50026'
corporate_color = '#ffffbf'
unknown_color = '#303030'

def ownertype_to_color(parid, property_event):
    ownerdesc = 'REGULAR'
    if('ownerdesc' in property_event.keys()):
       ownerdesc = property_event['ownerdesc']
    volume_map=get_related_parids(parid, property_event['date'])
    volume = volume_map['parcount']
    if ('CORPORATION' in ownerdesc or 'Corporation' in ownerdesc):
        # Ivory 
        return corporate_color
    elif ('homesteadflag' in property_event.keys() and 
          (property_event['homesteadflag'].strip()=='HOM' or property_event['homesteadflag'].strip()=='C')):
        # If homestead flag set, use single property color regardless of volume
        return single_property_color
    elif ('REGULAR' in ownerdesc or 'Regular' in ownerdesc):
        # Regular owner, what volume?
        if(volume == 1):
            # Same color as 1 in volume view
            return single_property_color
        else:
            # Same color as other in class view
            return '#02ca75'
    else:
        print "Unrecognized owner type: %s, %d" % (ownerdesc,volume)
        return unknown_color

def output_ownertype_dots(parid_arr, suffix):
    # If parid_arr not specified, do all of the keys in property_map
    if(len(parid_arr)==0):
        parid_arr=property_map.keys()
        
    # Write out volume of ownership for each residential non-vacant land property
    points = []
    start=arrow.now()
    chunk_start_time=arrow.now()
    chunk_size=1000
    chunk_cnt=0
    
    for parid in parid_arr:
        centroid=None
        did_output_dot=False
        try:
            centroid = parid2centroid[parid]
        except:
            print "%s is missing from centroids, skipping" % (parid)
            continue

        try:
            # Handle periodic debug message
            if((chunk_cnt%chunk_size)==0 and chunk_cnt>0):
                print "%d-%d: processing %r, %s time elapsed" %(chunk_cnt-(chunk_size-1), chunk_cnt, parid, arrow.now()-chunk_start_time)
                addcnt=0
                chunk_start_time=arrow.now()

            # Get list of property events or this property
            property_events = property_map[parid]

            # Keep track of the date of the last datapoint. 
            last_date = '1900-01-01'
            last_color=0
            
            for i in range(0,len(property_events)):
                event_date = property_events[i]['date']
                # Get color for this property event
                color = ownertype_to_color(parid, property_events[i])
                # Check if the date and color haven't changed, if so skip to the next event
                if(color == last_color and last_date == event_date):
                    # Nothing new here, move along
                    continue
                elif(last_date==event_date and i>0):
                    # Color changed without the sale date changing, possibly flag this as an issue
                    # If this new color is unknown, skip it
                    # If the last color was unknown, delete it and use this one
                    # Otherwise, this might be a change in homesteadflag.  If so, single_property_color takes precedence.
                    # Corporate takes priority between multi and corporate
                    # If neither has single_property_color, raise an exception
                    if(color==unknown_color):
                        # Ok, just skip this one
                        continue
                    elif(last_color==unknown_color):
                        # Pop the previous point off (5 floats) and use this new one
                        for j in range(0,5):
                            del points[-1]
                        #print "  REMOVED %s: %s = %s (len %d)" % (parid, event_date,last_color, len(points))
                    elif(last_color==single_property_color):
                        # Ok, just skip this one
                        continue
                    elif(color == single_property_color):
                        # Pop the previous point off (5 floats) and use this new one
                        for j in range(0,5):
                            del points[-1]
                        #print "  REMOVED %s: %s = %s (len %d)" % (parid, event_date,last_color, len(points))
                    elif(last_color==corporate_color):
                        # Ok, just skip this one
                        continue
                    elif(color==corporate_color):
                        # Pop the previous point off (5 floats) and use this new one
                        for j in range(0,5):
                            del points[-1]
                        #print "  REMOVED %s: %s = %s (len %d)" % (parid, event_date,last_color, len(points))
                    else:
                        print " PROBLEM %s: %s changed color on same date %s->%s" % (parid, event_date, last_color, color)
                        raise
                        
                if((chunk_cnt%chunk_size)==0):
                    print "  %s: %s = %s" % (parid, event_date, color)
                
                # Get range of dates for this color
                saledate = SaledateToEpoch(event_date)
                next_date = get_next_sale_date(parid, event_date)
                if(next_date == None):
                    # No new owner after this, set end date to the end of time
                    enddate = float(1e38)
                    next_date=this_date
                else:
                    enddate = SaledateToEpoch(next_date)
                if(color != None):
                    points += PointToPixelXY(centroid)     
                    points.append(parse_color(color))
                    # Put epoch time for SALEDATE as start valid time, and next_date as end valid time
                    points.append(float(saledate))
                    points.append(float(enddate))
                    #print "  %s: %s - %s = %s (len %d)" % (parid, event_date, next_date, color, len(points))
                else:
                    print "Color of " + str(volume_map['parcount']) + " is None"

                # Set last_date and last_color for next loop
                last_date = event_date
                last_color= color
                did_output_dot=True
        except:
            print "Unexpected error processing %s:" % (parid), sys.exc_info()[0]
            raise

        #Increment debug message counter
        if(did_output_dot):
            chunk_cnt = chunk_cnt+1

    array.array('f', points).tofile(open(('assessments/ownertype_color_m_epoch%s.bin'%suffix), 'wb'))

    end=arrow.now()
    print "Processing took %s for %d dots" % (str(end-start), chunk_cnt)
    

In [600]:
#import cProfile
#cProfile.run('output_volume_dots()')
output_ownertype_dots([],out_suffix)

  0177N00269000000: 2007-03-02 = #a50026
  0177N00269000000: 2011-02-28 = #a50026
  REMOVED 0104P00058000000: 1999-09-08 = #02ca75 (len 185)
  REMOVED 0023L00150000000: 2001-01-04 = #02ca75 (len 235)
  REMOVED 0046A00067000000: 2004-06-03 = #02ca75 (len 595)
  REMOVED 0117A00030000000: 2012-07-11 = #02ca75 (len 1035)
  REMOVED 0445J00217000000: 2004-11-10 = #02ca75 (len 1045)
  REMOVED 0826D00101000000: 2013-07-02 = #02ca75 (len 1115)
  REMOVED 0568K00082000000: 1982-02-03 = #02ca75 (len 1415)
  REMOVED 0048F00113000000: 2007-04-24 = #02ca75 (len 1550)
  REMOVED 0071G00130000000: 2013-01-31 = #02ca75 (len 1590)
  REMOVED 0070M00241000000: 2011-09-20 = #02ca75 (len 2850)
  REMOVED 0775K00070000000: 2013-09-13 = #02ca75 (len 3040)
  REMOVED 0432L00244000000: 2012-04-12 = #02ca75 (len 4905)
  REMOVED 0165B00095000000: 2008-10-07 = #02ca75 (len 4910)
  REMOVED 0381B00012000000: 2001-01-29 = #02ca75 (len 5380)
  REMOVED 0004A00164000000: 1978-05-25 = #02ca75 (len 7415)
1-1000: processing u'

  REMOVED 0457N00256000000: 2012-05-29 = #02ca75 (len 67415)
  REMOVED 0372M00260000000: 2008-10-08 = #02ca75 (len 67430)
  REMOVED 0736D00013000000: 2013-09-06 = #02ca75 (len 67945)
  REMOVED 0386H00233000000: 2013-08-02 = #02ca75 (len 68370)
  REMOVED 0082A00234000000: 2010-02-01 = #02ca75 (len 68560)
  REMOVED 1906B00172000000: 2014-05-30 = #02ca75 (len 68630)
  REMOVED 0454K00252000000: 2010-02-24 = #02ca75 (len 68725)
  REMOVED 0276K00076000000: 2005-09-08 = #02ca75 (len 69085)
  REMOVED 0849R000250H1A00: 2013-10-21 = #02ca75 (len 69975)
  REMOVED 0124K00170000000: 2014-05-29 = #02ca75 (len 70125)
  REMOVED 0033N00073000000: 2013-06-11 = #02ca75 (len 70615)
  REMOVED 0375B00092000000: 2009-02-12 = #02ca75 (len 71475)
  REMOVED 0023K00291000000: 2008-11-19 = #02ca75 (len 71685)
  REMOVED 0944N00358000000: 2005-10-05 = #02ca75 (len 71790)
  REMOVED 0305R00228000000: 1994-02-17 = #02ca75 (len 71855)
  REMOVED 0131G00098000000: 2011-01-19 = #02ca75 (len 71940)
  REMOVED 0144N000140000

  REMOVED 0098K00286000000: 2008-08-05 = #02ca75 (len 136060)
  REMOVED 0088M00146000000: 2013-07-09 = #02ca75 (len 136290)
  REMOVED 1206H00269000000: 1984-02-23 = #02ca75 (len 137035)
  REMOVED 0062E00112000000: 2010-02-12 = #02ca75 (len 137285)
  REMOVED 0168M00189000000: 2014-12-19 = #02ca75 (len 137375)
  REMOVED 0162R00054000000: 2010-12-29 = #02ca75 (len 138635)
  REMOVED 0094F00245000000: 2004-11-01 = #02ca75 (len 138745)
  REMOVED 0421J00249000600: 2014-12-23 = #02ca75 (len 139240)
  REMOVED 0448A00261000000: 2004-09-09 = #02ca75 (len 139785)
  REMOVED 0167F00250000000: 1999-06-16 = #02ca75 (len 140180)
  REMOVED 0230S00029000000: 2012-11-09 = #02ca75 (len 140905)
  REMOVED 0099K00047000000: 2013-04-24 = #02ca75 (len 141065)
0351B00248000000 is missing from centroids, skipping
18001-19000: processing u'0279R00265002500', 0:00:00.262302 time elapsed
  0279R00265002500: 2010-10-29 = #ffffbf
  REMOVED 0354D00097000000: 2010-12-29 = #02ca75 (len 142330)
  REMOVED 0386S00033000000:

  REMOVED 1134G00225000000: 2010-10-13 = #02ca75 (len 206005)
  REMOVED 0060J00305000000: 2009-07-17 = #02ca75 (len 206600)
  REMOVED 0111L00232000000: 1998-12-09 = #02ca75 (len 207365)
  REMOVED 8000T02333000000: 2010-01-06 = #02ca75 (len 207830)
  REMOVED 0012J00435000000: 2011-08-19 = #02ca75 (len 208375)
0299F00066000200 is missing from centroids, skipping
  REMOVED 0032E00147000000: 2009-06-12 = #02ca75 (len 208625)
27001-28000: processing u'1271A00349000000', 0:00:00.294742 time elapsed
  1271A00349000000: 1950-05-05 = #a50026
  1271A00349000000: 1971-07-14 = #a50026
  REMOVED 0163A00143000000: 2012-01-12 = #02ca75 (len 209995)
  REMOVED 0660N00107000000: 2005-08-08 = #02ca75 (len 211640)
  REMOVED 0251B00216000000: 2010-10-15 = #02ca75 (len 213780)
  REMOVED 0116J00281000000: 2005-07-21 = #02ca75 (len 215785)
  REMOVED 0667B00072000000: 2010-03-01 = #02ca75 (len 216725)
28001-29000: processing u'0180F00081000000', 0:00:00.292779 time elapsed
  0180F00081000000: 1950-05-05 = #a50

  REMOVED 0383M00093000000: 2009-08-24 = #02ca75 (len 275150)
0564S00033000000 is missing from centroids, skipping
36001-37000: processing u'0061B00061000000', 0:00:00.301289 time elapsed
  0061B00061000000: 2003-12-24 = #02ca75
  REMOVED 0081C00232000000: 2010-01-20 = #02ca75 (len 278430)
  REMOVED 0628A00067000000: 1969-08-01 = #02ca75 (len 278630)
  REMOVED 0430A00319000000: 1978-07-12 = #02ca75 (len 279655)
  REMOVED 0071S00192000000: 2014-12-30 = #02ca75 (len 279670)
  REMOVED 1828L00001000000: 2012-12-28 = #02ca75 (len 280090)
0851E00032000000 is missing from centroids, skipping
  REMOVED 0821N00085070100: 2014-01-06 = #02ca75 (len 281315)
  REMOVED 0097E00154000000: 2011-12-08 = #02ca75 (len 281375)
  REMOVED 0321R00194000000: 2009-02-24 = #02ca75 (len 281395)
  REMOVED 0279G00144000000: 2008-01-11 = #02ca75 (len 281405)
  REMOVED 0060A00163000000: 1983-05-23 = #02ca75 (len 282480)
  REMOVED 0022B00238000000: 2013-10-11 = #02ca75 (len 282595)
  REMOVED 1352P00041000000: 2011-11-

  REMOVED 0701B00058000000: 2009-01-20 = #02ca75 (len 338515)
  REMOVED 0419D00289000000: 2010-03-12 = #02ca75 (len 339145)
  REMOVED 0180D00200000000: 2008-05-19 = #02ca75 (len 340210)
  REMOVED 0006R00206000000: 2013-05-17 = #02ca75 (len 341805)
  REMOVED 2204D00037000000: 2007-02-05 = #02ca75 (len 342375)
  REMOVED 1211R00003004400: 2012-12-21 = #02ca75 (len 342640)
  REMOVED 0059P00209000000: 2014-05-29 = #02ca75 (len 342930)
  REMOVED 0181M00206000000: 2010-10-06 = #02ca75 (len 342975)
45001-46000: processing u'0450M00261000000', 0:00:00.293602 time elapsed
  0450M00261000000: 2003-09-16 = #a50026
  REMOVED 0768K00125000000: 2002-07-01 = #02ca75 (len 344250)
  REMOVED 0115P00207000000: 1985-09-20 = #02ca75 (len 344280)
  REMOVED 0488M00005000000: 2011-06-23 = #02ca75 (len 344400)
Unrecognized owner type: Utility, 1
  REMOVED 0492M00070000000: 1994-08-09 = #02ca75 (len 345245)
  REMOVED 0632R00149000000: 2004-04-19 = #02ca75 (len 346330)
0004B00226000D00 is missing from centroids, 

  REMOVED 0389G00072000000: 2003-09-23 = #02ca75 (len 408395)
  REMOVED 0463A00095000000: 2010-04-05 = #02ca75 (len 410380)
  REMOVED 0094S00090000000: 2009-02-05 = #02ca75 (len 410640)
  REMOVED 0123N00296000000: 2010-05-07 = #02ca75 (len 410905)
54001-55000: processing u'0072M00101000000', 0:00:00.289605 time elapsed
  0072M00101000000: 2009-01-07 = #a50026
  0072M00101000000: 2017-05-01 = #a50026
  REMOVED 1517C00248000000: 1999-12-27 = #02ca75 (len 411890)
  REMOVED 0630M00122000000: 2010-04-14 = #02ca75 (len 412260)
  REMOVED 0231D00062000000: 2007-05-24 = #ffffbf (len 412745)
  REMOVED 0114P00124000000: 2010-05-05 = #02ca75 (len 413030)
  REMOVED 0050S00209000000: 2012-10-10 = #02ca75 (len 413285)
  REMOVED 0260M00002000000: 2010-07-26 = #02ca75 (len 413660)
  REMOVED 0530K00278000000: 2003-06-26 = #02ca75 (len 413685)
  REMOVED 0523P00028000000: 2013-06-14 = #02ca75 (len 413865)
  REMOVED 1068S00171000000: 2012-06-06 = #02ca75 (len 413980)
  REMOVED 2007R00032000000: 2013-05-02 

  REMOVED 0174B00323000000: 1989-10-05 = #02ca75 (len 469195)
  REMOVED 0447J00037000000: 1987-12-03 = #02ca75 (len 469670)
  REMOVED 1657D00002000000: 2006-05-09 = #02ca75 (len 469860)
  REMOVED 0507L00070000000: 2009-04-24 = #02ca75 (len 470070)
  REMOVED 0238P00032000000: 1999-09-09 = #02ca75 (len 470510)
  REMOVED 0180G00383000000: 2001-01-25 = #02ca75 (len 470900)
  REMOVED 0821N00085090100: 2004-10-14 = #02ca75 (len 471015)
62001-63000: processing u'0255R00242000000', 0:00:00.291339 time elapsed
  0255R00242000000: 1991-05-28 = #a50026
  0255R00242000000: 2015-09-29 = #a50026
1203C00035000300 is missing from centroids, skipping
  REMOVED 0318R00104000000: 2014-04-01 = #02ca75 (len 472370)
8000T03171000000 is missing from centroids, skipping
  REMOVED 0156H00038000000: 2015-02-26 = #02ca75 (len 473100)
  REMOVED 0249L00198000000: 2010-04-20 = #02ca75 (len 473530)
  REMOVED 0249L00198000000: 2013-08-30 = #02ca75 (len 473535)
  REMOVED 0627S00026000000: 2002-08-05 = #02ca75 (len 473

  REMOVED 0547H00260000000: 2009-07-16 = #02ca75 (len 528025)
  REMOVED 0305N00341000000: 2009-07-28 = #02ca75 (len 528430)
  REMOVED 0180C00052000000: 2010-05-24 = #02ca75 (len 528445)
  REMOVED 0219D00130000000: 2009-07-24 = #02ca75 (len 528575)
  REMOVED 0080G00283000C00: 2002-06-24 = #02ca75 (len 528870)
  REMOVED 0887L00091000000: 1998-11-25 = #02ca75 (len 529430)
  REMOVED 0667C00089000000: 2014-12-18 = #02ca75 (len 530285)
  REMOVED 1353H00034000000: 2003-04-29 = #02ca75 (len 530745)
  REMOVED 0632N00209000000: 2006-12-19 = #02ca75 (len 530800)
70001-71000: processing u'0358A00152000000', 0:00:00.286497 time elapsed
  0358A00152000000: 1975-11-06 = #a50026
  0358A00152000000: 2011-08-04 = #ffffbf
  0358A00152000000: 2015-01-09 = #a50026
  REMOVED 0422D00227000000: 2009-04-27 = #02ca75 (len 532115)
  REMOVED 0041B00160000000: 2010-04-19 = #02ca75 (len 532390)
  REMOVED 0374B00136000000: 2008-04-08 = #02ca75 (len 532435)
  REMOVED 0062H00037000000: 2011-03-25 = #02ca75 (len 532515

  REMOVED 0296C00160000000: 2007-11-13 = #02ca75 (len 580705)
  REMOVED 0033G00227000000: 1983-05-27 = #02ca75 (len 580725)
  REMOVED 0028K00021000000: 2009-02-17 = #02ca75 (len 581005)
  REMOVED 0476N00258000000: 2004-01-23 = #02ca75 (len 581250)
  REMOVED 0062G00132000000: 2001-11-29 = #02ca75 (len 581285)
  REMOVED 0088H00147000000: 2013-07-23 = #02ca75 (len 582415)
  REMOVED 0305M00345000000: 2009-09-11 = #02ca75 (len 582815)
77001-78000: processing u'0120S00246000000', 0:00:00.272525 time elapsed
  0120S00246000000: 2006-07-12 = #a50026
  0120S00246000000: 2017-01-17 = #a50026
  REMOVED 0262G00002000000: 2009-04-01 = #02ca75 (len 585090)
  REMOVED 0050N00333010100: 2013-05-09 = #02ca75 (len 586090)
  REMOVED 0254C00108000000: 2009-09-28 = #02ca75 (len 586260)
  REMOVED 0176A00159000000: 2009-08-31 = #02ca75 (len 587105)
  REMOVED 0161K00285000000: 2007-07-12 = #02ca75 (len 587270)
  REMOVED 0001N00177000000: 2010-01-25 = #02ca75 (len 587660)
  REMOVED 0558F00142000000: 2014-02-21 

  REMOVED 0419D00335000000: 2004-10-14 = #02ca75 (len 643980)
  REMOVED 0039J00305000000: 2009-11-03 = #02ca75 (len 644460)
  REMOVED 0075H00147000000: 2009-03-10 = #02ca75 (len 645600)
  REMOVED 0051B00058000000: 2009-07-06 = #02ca75 (len 646040)
  REMOVED 0029D00038000000: 2010-01-27 = #02ca75 (len 646340)
  REMOVED 0058N00126000000: 2005-11-03 = #02ca75 (len 647570)
  REMOVED 0003M00366000000: 2009-08-20 = #02ca75 (len 647580)
  REMOVED 0615N00188000000: 2010-02-18 = #02ca75 (len 647600)
  REMOVED 0094C00160000000: 2010-02-12 = #02ca75 (len 649300)
  REMOVED 0042L00063000000: 2010-04-08 = #02ca75 (len 649955)
  REMOVED 0165G00211000000: 1987-12-01 = #02ca75 (len 649970)
  REMOVED 1906L00078000000: 2010-01-22 = #02ca75 (len 650575)
  REMOVED 0024J00252000A00: 2009-06-30 = #02ca75 (len 650810)
86001-87000: processing u'0236M00308000000', 0:00:00.285276 time elapsed
  0236M00308000000: 2002-07-17 = #a50026
  REMOVED 2192A00365000000: 1987-03-19 = #02ca75 (len 652205)
  REMOVED 0742G003

  REMOVED 0544S00298000000: 2007-12-20 = #02ca75 (len 706520)
  REMOVED 0162J00071000000: 2009-08-04 = #02ca75 (len 708630)
  REMOVED 1206D00102000000: 2013-06-21 = #02ca75 (len 709005)
  REMOVED 0253M00029000000: 2010-12-13 = #02ca75 (len 710735)
94001-95000: processing u'0372G00178000000', 0:00:00.284516 time elapsed
  0372G00178000000: 1950-05-05 = #a50026
  0372G00178000000: 1964-08-27 = #a50026
  REMOVED 0030A00300000000: 2008-02-22 = #02ca75 (len 711565)
  REMOVED 0072F00222000000: 2012-07-02 = #02ca75 (len 711985)
  REMOVED 0004J00040000000: 1974-11-26 = #02ca75 (len 712065)
  REMOVED 0040F00077000000: 2006-12-13 = #02ca75 (len 712135)
  REMOVED 1655S00045000000: 2006-02-21 = #02ca75 (len 713495)
  REMOVED 0762R00040000000: 2009-03-30 = #02ca75 (len 714955)
  REMOVED 0546R00203000000: 2011-01-20 = #02ca75 (len 715030)
  REMOVED 0018E00016000000: 1989-02-03 = #02ca75 (len 715765)
  REMOVED 0232J00258000000: 2011-01-21 = #02ca75 (len 716015)
  REMOVED 1272B00074000000: 2013-03-18 

0011A00173000000 is missing from centroids, skipping
  REMOVED 0003M00255000A00: 2006-07-13 = #02ca75 (len 779000)
  REMOVED 0460R00142000000: 2009-05-05 = #02ca75 (len 779380)
  REMOVED 0460R00142000000: 2011-04-04 = #02ca75 (len 779385)
  REMOVED 0523B00302000000: 2015-02-04 = #02ca75 (len 779470)
0154S00030000000 is missing from centroids, skipping
103001-104000: processing u'0732J00186000000', 0:00:00.294256 time elapsed
  0732J00186000000: 2004-08-05 = #a50026
  0732J00186000000: 2012-10-09 = #a50026
  REMOVED 0097G00223000000: 2010-06-24 = #02ca75 (len 779675)
  REMOVED 0321S00060013200: 2003-08-15 = #02ca75 (len 779895)
  REMOVED 0711P00021000000: 2013-10-31 = #02ca75 (len 781545)
  REMOVED 0457N00163000000: 2005-05-20 = #02ca75 (len 781720)
  REMOVED 0150S00096023100: 2007-01-12 = #02ca75 (len 782630)
  REMOVED 0047A00287000000: 2007-04-24 = #02ca75 (len 782800)
  REMOVED 0198M00006000000: 2013-04-30 = #02ca75 (len 783560)
  REMOVED 1658G00097000000: 2006-11-27 = #02ca75 (len 7

  REMOVED 0509R00045000000: 2007-07-11 = #02ca75 (len 845220)
  REMOVED 0208H00056000000: 2012-04-17 = #02ca75 (len 845235)
  REMOVED 0331G00008000000: 2000-05-04 = #02ca75 (len 845335)
  REMOVED 0124N00309000000: 2009-01-20 = #02ca75 (len 845885)
  REMOVED 0275G00012000000: 2014-05-27 = #02ca75 (len 846335)
  REMOVED 0173K00076000000: 1995-06-20 = #02ca75 (len 846780)
  REMOVED 0241L00137000000: 2009-03-12 = #02ca75 (len 847090)
112001-113000: processing u'0375F00260000000', 0:00:00.288542 time elapsed
  0375F00260000000: 1979-03-09 = #a50026
  REMOVED 0072F00208000000: 2000-06-15 = #02ca75 (len 847195)
  REMOVED 0711G00280000000: 2010-02-16 = #02ca75 (len 847730)
  REMOVED 0086K00212000000: 2013-07-16 = #02ca75 (len 848415)
  REMOVED 0188E00200000000: 2014-05-22 = #02ca75 (len 848725)
  REMOVED 0549P00028000000: 2004-11-08 = #02ca75 (len 849460)
  REMOVED 0314N00222000000: 2014-05-22 = #02ca75 (len 849565)
  REMOVED 0432K00092000000: 2015-02-25 = #02ca75 (len 849645)
  REMOVED 0917H0

  REMOVED 0033H00143000000: 1989-04-26 = #02ca75 (len 905275)
  REMOVED 0381K00218000000: 2009-03-18 = #02ca75 (len 905810)
  REMOVED 1820P00005000000: 2007-01-23 = #02ca75 (len 906705)
120001-121000: processing u'0194M00182000000', 0:00:00.277436 time elapsed
  0194M00182000000: 1987-02-06 = #a50026
  REMOVED 0063A00043000000: 2009-05-27 = #02ca75 (len 907310)
  REMOVED 0059L00063000000: 2010-04-08 = #02ca75 (len 907390)
1365G00112000000 is missing from centroids, skipping
  REMOVED 0373E00119000000: 2009-06-11 = #02ca75 (len 907600)
  REMOVED 1737A00076000000: 2007-04-18 = #ffffbf (len 907775)
  REMOVED 0522C00286000000: 2010-10-27 = #02ca75 (len 908085)
0382B00395000000 is missing from centroids, skipping
  REMOVED 0856A00161000000: 2010-02-04 = #02ca75 (len 909945)
1077E00156000001 is missing from centroids, skipping
  REMOVED 0029N00095000000: 2010-03-31 = #02ca75 (len 910585)
  REMOVED 0798K00010000000: 2012-02-13 = #02ca75 (len 912295)
  REMOVED 0024G00151000000: 2010-12-30 = #0

  REMOVED 0261G00002000000: 2004-12-06 = #02ca75 (len 964020)
  REMOVED 0040E00212000000: 1996-02-27 = #02ca75 (len 964435)
  REMOVED 0277P00090000000: 2004-08-10 = #02ca75 (len 964845)
  REMOVED 0002F00175003100: 2013-11-18 = #02ca75 (len 965180)
  REMOVED 0381S00219000000: 2007-10-31 = #02ca75 (len 965350)
  REMOVED 0732C00160000000: 2001-04-25 = #02ca75 (len 966280)
  REMOVED 0653F00263000000: 2014-01-07 = #02ca75 (len 966440)
128001-129000: processing u'0690D00062000000', 0:00:00.293001 time elapsed
  0690D00062000000: 1950-05-05 = #ffffbf
  0690D00062000000: 1998-04-08 = #ffffbf
  REMOVED 0112J00177000000: 2009-06-24 = #02ca75 (len 967320)
  REMOVED 0180G00164000000: 2003-07-21 = #02ca75 (len 968370)
  REMOVED 0364L00396000000: 1974-09-05 = #02ca75 (len 969235)
  REMOVED 0712C00388000000: 2007-01-19 = #02ca75 (len 969315)
  REMOVED 0704F00278000000: 2009-09-12 = #02ca75 (len 969605)
  REMOVED 0235E00332000000: 2009-08-10 = #02ca75 (len 970640)
  REMOVED 0825R00167000000: 2012-08-0

  REMOVED 0180R00123000000: 1961-11-08 = #02ca75 (len 1038970)
  REMOVED 1348C00026000000: 2010-03-24 = #02ca75 (len 1040290)
  REMOVED 0549G00125000000: 2004-11-23 = #02ca75 (len 1040940)
  REMOVED 0046K00151000000: 2000-05-12 = #02ca75 (len 1041115)
  REMOVED 0879L00235000000: 2009-03-30 = #02ca75 (len 1041365)
  REMOVED 0206J01006000000: 2006-10-04 = #02ca75 (len 1041385)
0081A00128000000 is missing from centroids, skipping
  REMOVED 0074P00054000000: 2012-07-05 = #02ca75 (len 1041580)
  REMOVED 0252N00067000000: 2009-09-04 = #02ca75 (len 1041795)
  REMOVED 0035G00086000000: 2012-08-24 = #02ca75 (len 1042005)
  REMOVED 0629L00065000000: 2015-03-18 = #02ca75 (len 1042025)
138001-139000: processing u'1242C00314000000', 0:00:00.279634 time elapsed
  1242C00314000000: 2003-07-08 = #a50026
  1242C00314000000: 2013-12-02 = #a50026
  REMOVED 0039E00179000000: 2014-01-13 = #02ca75 (len 1042320)
  REMOVED 1367R00363000000: 2015-04-20 = #02ca75 (len 1043445)
  REMOVED 0713D00611420900: 2014-1

  REMOVED 0609H00077000000: 2014-03-14 = #02ca75 (len 1104770)
  REMOVED 0117A00086000000: 1990-05-29 = #02ca75 (len 1105310)
  REMOVED 0649H00214000000: 2011-02-10 = #02ca75 (len 1106925)
  REMOVED 0429E00295000000: 2004-04-01 = #02ca75 (len 1107170)
0746G00016000000 is missing from centroids, skipping
  REMOVED 0835F00096000000: 2010-03-10 = #02ca75 (len 1107885)
  REMOVED 0219M00170000000: 2002-02-22 = #02ca75 (len 1108010)
147001-148000: processing u'0573L00019920200', 0:00:00.285651 time elapsed
  0573L00019920200: 2006-09-21 = #a50026
  REMOVED 0082K00154000000: 2005-06-23 = #02ca75 (len 1110295)
  REMOVED 0317G00202000000: 2012-10-29 = #02ca75 (len 1112455)
  REMOVED 0013L00256000000: 1987-04-30 = #ffffbf (len 1112735)
  REMOVED 0520L00018000000: 2010-03-16 = #02ca75 (len 1113045)
  REMOVED 0876G00195000000: 2012-12-13 = #02ca75 (len 1114760)
  REMOVED 0298N00058000000: 2014-05-23 = #02ca75 (len 1115180)
  REMOVED 0454J00051000000: 2005-09-07 = #02ca75 (len 1115485)
  REMOVED 07

  REMOVED 0255A00018000000: 1996-06-13 = #02ca75 (len 1178715)
0024N00056000A00 is missing from centroids, skipping
  REMOVED 1655C00008000000: 2006-12-12 = #02ca75 (len 1180385)
  REMOVED 0367F00356000000: 2012-02-27 = #02ca75 (len 1180545)
  REMOVED 0627G00038000000: 1985-02-26 = #02ca75 (len 1181335)
  REMOVED 0118L00287000000: 2007-10-04 = #02ca75 (len 1182735)
  REMOVED 0276L00003000000: 2000-04-24 = #02ca75 (len 1182880)
  REMOVED 0051E00320000000: 2010-01-26 = #02ca75 (len 1184135)
  REMOVED 0227N00075040000: 1981-11-10 = #02ca75 (len 1184570)
157001-158000: processing u'0381B00082000000', 0:00:00.278642 time elapsed
  0381B00082000000: 1976-11-08 = #a50026
  REMOVED 0280P00380000000: 2002-11-06 = #02ca75 (len 1185040)
  REMOVED 0423E00040000000: 2013-08-29 = #02ca75 (len 1185145)
  REMOVED 0012S00090000000: 2000-11-29 = #02ca75 (len 1186400)
  REMOVED 0099D00278000000: 2009-03-10 = #02ca75 (len 1186795)
  REMOVED 0572F00025000000: 2012-03-21 = #02ca75 (len 1187905)
  REMOVED 00

  REMOVED 0193B00204000000: 2010-03-01 = #02ca75 (len 1243365)
  REMOVED 0874P00183000000: 2011-01-25 = #02ca75 (len 1243455)
  REMOVED 0251L00068000000: 2014-12-19 = #02ca75 (len 1243790)
165001-166000: processing '0422H00604000000', 0:00:00.277364 time elapsed
  0422H00604000000: 2014-11-28 = #ffffbf
  REMOVED 0015D00064000000: 2009-05-22 = #02ca75 (len 1244630)
  REMOVED 0121G00094000000: 2012-01-27 = #02ca75 (len 1244845)
  REMOVED 0430J00092000000: 2015-02-06 = #02ca75 (len 1245725)
0152M00265000000 is missing from centroids, skipping
  REMOVED 0322M00178000000: 2010-08-05 = #02ca75 (len 1247135)
  REMOVED 1672N00261000000: 2012-04-27 = #02ca75 (len 1248240)
  REMOVED 0855A00044000000: 2011-01-04 = #02ca75 (len 1248655)
  REMOVED 0503S00315000000: 2008-09-23 = #02ca75 (len 1248685)
  REMOVED 0014N00046000000: 2003-12-05 = #02ca75 (len 1248900)
  REMOVED 0876R00393000000: 1981-07-24 = #02ca75 (len 1249245)
  REMOVED 0713J00338000000: 2004-11-19 = #02ca75 (len 1249535)
  REMOVED 059

  REMOVED 0083E00212000000: 2001-05-08 = #02ca75 (len 1300345)
  REMOVED 0093E00207000000: 2008-01-29 = #02ca75 (len 1301195)
  REMOVED 0036R00204000000: 2009-05-14 = #02ca75 (len 1301240)
  REMOVED 0054J00099000000: 2011-02-22 = #02ca75 (len 1301895)
  REMOVED 0307N00291000000: 1962-01-31 = #02ca75 (len 1302030)
  REMOVED 0517L00193000000: 2010-03-15 = #02ca75 (len 1302370)
  REMOVED 0098J00292000000: 2006-11-08 = #02ca75 (len 1302470)
  REMOVED 1203N00133000000: 2007-06-06 = #02ca75 (len 1303535)
  REMOVED 0448N00193000000: 2012-07-19 = #02ca75 (len 1304095)
  REMOVED 0558F00080000000: 2014-01-14 = #02ca75 (len 1304210)
  REMOVED 0122C00122000000: 2010-03-01 = #02ca75 (len 1304270)
  REMOVED 0458P00247000000: 2009-03-11 = #02ca75 (len 1304535)
  REMOVED 0039D00376000000: 2008-09-23 = #02ca75 (len 1304560)
173001-174000: processing u'0977B00476000000', 0:00:00.290710 time elapsed
  0977B00476000000: 1994-09-23 = #a50026
  REMOVED 0060C00181000000: 2010-07-19 = #02ca75 (len 1305255)
  

  REMOVED 0290R00108000000: 2014-04-01 = #02ca75 (len 1359315)
  REMOVED 0006M00312114000: 2006-12-06 = #02ca75 (len 1361385)
0464L00135000000 is missing from centroids, skipping
  REMOVED 0054M00304000000: 2014-03-03 = #02ca75 (len 1361475)
  REMOVED 0219M00265000000: 2013-04-16 = #02ca75 (len 1362535)
  REMOVED 0296H00165000000: 2009-06-01 = #02ca75 (len 1362620)
  REMOVED 0532P00172000000: 2015-02-05 = #02ca75 (len 1362865)
  REMOVED 0537D00151000000: 2015-03-06 = #02ca75 (len 1362895)
181001-182000: processing u'0077B00151000000', 0:00:00.289729 time elapsed
  0077B00151000000: 2008-08-22 = #a50026
  REMOVED 0179R00142000000: 2010-01-26 = #02ca75 (len 1364935)
  REMOVED 0069S00160000000: 2012-11-13 = #02ca75 (len 1366250)
  REMOVED 0160C00173000000: 2014-05-05 = #02ca75 (len 1367590)
  REMOVED 0035N00038000000: 2006-08-17 = #02ca75 (len 1367855)
  REMOVED 0714J00183000000: 2000-11-14 = #02ca75 (len 1368460)
  REMOVED 1837K00301000000: 2012-12-12 = #02ca75 (len 1368560)
  REMOVED 02

190001-191000: processing u'0517K00247000000', 0:00:00.281601 time elapsed
  0517K00247000000: 2001-09-18 = #a50026
  0517K00247000000: 2013-09-10 = #a50026
  0517K00247000000: 2016-08-22 = #02ca75
  REMOVED 0012J00020000000: 2004-04-06 = #02ca75 (len 1432045)
  REMOVED 0773M00100000000: 2007-03-01 = #02ca75 (len 1432395)
  REMOVED 1132F00090000000: 2010-03-29 = #02ca75 (len 1432490)
  REMOVED 0281E00211000000: 2009-08-26 = #02ca75 (len 1432860)
  REMOVED 0455M00221000000: 2015-04-07 = #02ca75 (len 1432905)
  REMOVED 0193K00124000000: 2006-06-13 = #02ca75 (len 1433105)
  REMOVED 0002F00175006100: 2014-03-31 = #02ca75 (len 1433175)
  REMOVED 0839P00035000000: 2005-06-30 = #02ca75 (len 1434300)
0006L00324000000 is missing from centroids, skipping
  REMOVED 0174A00098000000: 2004-10-14 = #02ca75 (len 1435890)
  REMOVED 0087L00049010900: 2011-02-10 = #02ca75 (len 1436320)
0011D00161000000 is missing from centroids, skipping
  REMOVED 0306A00210000000: 2011-10-17 = #02ca75 (len 1437460)
  R

  REMOVED 0233F00115000000: 1996-08-30 = #02ca75 (len 1489880)
  REMOVED 0055J00172000000: 2003-11-13 = #02ca75 (len 1490480)
  REMOVED 0160H00172000000: 1998-07-20 = #02ca75 (len 1490535)
  REMOVED 1203N00137000000: 2013-01-09 = #02ca75 (len 1490680)
  REMOVED 0426D00258000000: 2011-11-10 = #02ca75 (len 1491550)
0154S00042000000 is missing from centroids, skipping
198001-199000: processing u'0045P00153000100', 0:00:00.279208 time elapsed
  0045P00153000100: 2008-12-10 = #a50026
  REMOVED 0878J00121000000: 2003-07-08 = #02ca75 (len 1492740)
  REMOVED 0026D00190000A00: 2010-12-30 = #02ca75 (len 1492920)
  REMOVED 0090R00212000000: 2008-01-22 = #02ca75 (len 1493600)
  REMOVED 0236B00024000000: 2007-08-28 = #02ca75 (len 1494760)
  REMOVED 0734B00240000L00: 2003-08-28 = #02ca75 (len 1497665)
  REMOVED 0065A00174000000: 2010-09-03 = #02ca75 (len 1497955)
  REMOVED 0960K00095000000: 2009-05-21 = #02ca75 (len 1498060)
  REMOVED 0160A00132060300: 2012-06-29 = #02ca75 (len 1498125)
  REMOVED 04

207001-208000: processing u'0054S00104000000', 0:00:00.271573 time elapsed
  0054S00104000000: 2005-09-16 = #a50026
  REMOVED 0808S00042000000: 2013-01-14 = #02ca75 (len 1559600)
  REMOVED 0060E00120000000: 2015-03-20 = #02ca75 (len 1559995)
  REMOVED 1507H00315000000: 2013-05-30 = #02ca75 (len 1560580)
  REMOVED 0837R00222000000: 2015-02-23 = #02ca75 (len 1560685)
  REMOVED 0981G00005000000: 2010-09-16 = #02ca75 (len 1563130)
  REMOVED 0088C00022000000: 2001-10-01 = #02ca75 (len 1563730)
  REMOVED 0114F00068000000: 2014-12-29 = #02ca75 (len 1564965)
  REMOVED 0263L00015000000: 2012-10-09 = #02ca75 (len 1564990)
  REMOVED 0019B00133000000: 2007-07-31 = #02ca75 (len 1565305)
  REMOVED 0034A00169000000: 2009-08-20 = #02ca75 (len 1565605)
  REMOVED 0004F00051000000: 2004-01-13 = #02ca75 (len 1566360)
  REMOVED 0392M00056000000: 2012-12-07 = #02ca75 (len 1566675)
208001-209000: processing u'1223L00237000000', 0:00:00.272387 time elapsed
  1223L00237000000: 1994-01-21 = #a50026
  1223L00237

  REMOVED 0591K00038000000: 2010-04-20 = #02ca75 (len 1622975)
  REMOVED 0003R00323000000: 2009-01-29 = #02ca75 (len 1623800)
8000T04710000000 is missing from centroids, skipping
  REMOVED 1996C00010000000: 2013-09-16 = #02ca75 (len 1624900)
  REMOVED 0493F00006000000: 2007-05-31 = #02ca75 (len 1626810)
216001-217000: processing u'0850J00385000000', 0:00:00.295690 time elapsed
  0850J00385000000: 2000-09-01 = #a50026
  REMOVED 0450C00196000000: 2006-07-10 = #02ca75 (len 1627060)
  REMOVED 0383S00161000000: 2000-04-26 = #02ca75 (len 1627490)
  REMOVED 0049B00378000A00: 1993-08-19 = #02ca75 (len 1629180)
  REMOVED 0295G00204000000: 2010-05-11 = #02ca75 (len 1630165)
  REMOVED 1218H00231000000: 1997-05-05 = #02ca75 (len 1630790)
  REMOVED 0544L00311000500: 2004-08-13 = #02ca75 (len 1631190)
  REMOVED 0663R00022000000: 2011-07-29 = #02ca75 (len 1631825)
  REMOVED 0075R00016000000: 2005-12-21 = #02ca75 (len 1631945)
  REMOVED 0320C00017000000: 2015-02-05 = #02ca75 (len 1633515)
  REMOVED 03

8000T02530000000 is missing from centroids, skipping
  REMOVED 0004A00136000000: 2015-02-06 = #02ca75 (len 1690350)
  REMOVED 0023N00177081400: 2012-07-02 = #02ca75 (len 1691270)
  REMOVED 0197J00001000000: 2010-07-13 = #02ca75 (len 1691530)
  REMOVED 0026D00188000000: 2004-08-30 = #02ca75 (len 1693270)
  REMOVED 0173B00202000000: 2009-06-03 = #02ca75 (len 1693590)
1834D00046000000 is missing from centroids, skipping
225001-226000: processing u'0344L00060000000', 0:00:00.278354 time elapsed
  0344L00060000000: 1996-05-06 = #a50026
  0344L00060000000: 2013-10-22 = #ffffbf
  REMOVED 0422K00153000000: 2015-02-06 = #02ca75 (len 1695715)
  REMOVED 0321A00024000000: 2009-06-18 = #02ca75 (len 1696340)
  REMOVED 0275L00089000000: 2011-01-20 = #02ca75 (len 1696915)
  REMOVED 0016P00148000000: 2009-01-23 = #02ca75 (len 1697215)
  REMOVED 0004F00057000000: 2010-05-03 = #02ca75 (len 1697440)
  REMOVED 0026R00164000000: 2010-08-09 = #02ca75 (len 1697720)
  REMOVED 0464F00127000000: 2014-03-11 = #02

  REMOVED 1418B00133000000: 1988-05-10 = #02ca75 (len 1758900)
  REMOVED 0202F00033000000: 2013-10-01 = #02ca75 (len 1759410)
  REMOVED 0051J00089000000: 1978-10-25 = #02ca75 (len 1760335)
  REMOVED 0300D00205030400: 2002-05-15 = #02ca75 (len 1761385)
  REMOVED 0040K00324000000: 2004-05-07 = #02ca75 (len 1761430)
234001-235000: processing u'0370B00226000000', 0:00:00.284281 time elapsed
  0370B00226000000: 1989-10-27 = #a50026
  REMOVED 0214E00053000000: 2014-05-13 = #02ca75 (len 1763115)
  REMOVED 0015M00248000000: 2002-05-15 = #02ca75 (len 1763570)
  REMOVED 1320K00378000000: 2012-04-10 = #02ca75 (len 1763680)
  REMOVED 1671B00139000000: 2005-07-26 = #02ca75 (len 1763700)
  REMOVED 0380C00115000000: 1997-09-22 = #02ca75 (len 1764395)
  REMOVED 0086R00094000000: 1998-07-27 = #02ca75 (len 1764640)
  REMOVED 0029G00197000000: 2014-05-06 = #02ca75 (len 1764660)
  REMOVED 0175E00217000000: 1992-10-13 = #02ca75 (len 1765395)
  REMOVED 0072L00059000000: 1997-04-29 = #02ca75 (len 1766120)
  

  REMOVED 0935B00110000000: 2011-09-16 = #02ca75 (len 1820555)
  REMOVED 0173N00276000000: 1985-12-16 = #02ca75 (len 1820775)
  REMOVED 2469C00057000000: 2014-03-21 = #02ca75 (len 1821765)
8000T00459000000 is missing from centroids, skipping
242001-243000: processing '0156S00225000000', 0:00:00.290348 time elapsed
  0156S00225000000: 2016-05-19 = #a50026
  REMOVED 0052H00130010300: 2000-09-26 = #02ca75 (len 1822950)
  REMOVED 0396A00138000000: 2014-02-11 = #02ca75 (len 1823960)
0050K00057000000 is missing from centroids, skipping
  REMOVED 0173N00020000000: 2001-07-10 = #02ca75 (len 1825385)
  REMOVED 0155N00020000000: 2011-01-20 = #02ca75 (len 1825480)
  REMOVED 0773L00352000000: 2010-05-17 = #02ca75 (len 1825725)
  REMOVED 0481R00055000B00: 2010-12-09 = #02ca75 (len 1825955)
0180D00279000000 is missing from centroids, skipping
8000T03184000000 is missing from centroids, skipping
  REMOVED 0419H00323000000: 2005-02-17 = #02ca75 (len 1827560)
  REMOVED 0054M00037000000: 2003-07-28 = #0

  REMOVED 0012E00110000500: 1997-07-29 = #02ca75 (len 1891500)
  REMOVED 0136L00070000000: 2005-11-03 = #02ca75 (len 1893595)
  REMOVED 0188G00146000000: 1995-12-29 = #02ca75 (len 1894015)
  REMOVED 0233D00014000000: 2008-09-09 = #02ca75 (len 1894195)
  REMOVED 0072G00348000000: 2008-05-13 = #02ca75 (len 1894340)
  REMOVED 0023H00156000000: 2013-11-01 = #02ca75 (len 1894455)
  REMOVED 0592R00256000000: 2013-02-01 = #02ca75 (len 1894530)
0248N00160000000 is missing from centroids, skipping
252001-253000: processing u'1087H00004000000', 0:00:00.281448 time elapsed
  1087H00004000000: 1950-05-05 = #ffffbf
  1087H00004000000: 1962-08-23 = #ffffbf
  REMOVED 0502N00051000000: 2014-01-21 = #02ca75 (len 1897020)
0269S00017000000 is missing from centroids, skipping
  REMOVED 0214G00070000000: 2013-09-30 = #02ca75 (len 1897535)
  REMOVED 0059L00020000000: 2009-09-01 = #02ca75 (len 1897540)
8000T03103000000 is missing from centroids, skipping
  REMOVED 1006N00087000000: 2012-05-24 = #02ca75 (len 

  REMOVED 0522C00105000000: 2014-04-29 = #02ca75 (len 1953150)
  REMOVED 1070N00245000000: 2006-08-04 = #02ca75 (len 1953905)
  REMOVED 0102A00082000000: 2003-02-04 = #02ca75 (len 1954555)
  REMOVED 0160P00183000000: 2015-03-09 = #02ca75 (len 1954605)
0198H00031000000 is missing from centroids, skipping
  REMOVED 0729P00100000000: 2008-11-10 = #02ca75 (len 1954685)
  REMOVED 0080D00280000A00: 2009-05-09 = #02ca75 (len 1955050)
  REMOVED 0159D00185000000: 2015-03-17 = #02ca75 (len 1955425)
  REMOVED 0296G00029000000: 2010-01-25 = #02ca75 (len 1955780)
  REMOVED 0015D00217000000: 2011-09-27 = #02ca75 (len 1955885)
  REMOVED 0184N00120000000: 1998-08-03 = #02ca75 (len 1956145)
Unrecognized owner type: Utility, 3
260001-261000: processing u'0734F00234000000', 0:00:00.288537 time elapsed
  0734F00234000000: 1984-03-06 = #a50026
  REMOVED 0074N00299000000: 2006-07-25 = #02ca75 (len 1956980)
  REMOVED 0040H00269000000: 2009-07-28 = #02ca75 (len 1956985)
  REMOVED 0365B00015000000: 2006-04-03 

267001-268000: processing u'1997R00008000000', 0:00:00.281807 time elapsed
  1997R00008000000: 1991-12-19 = #a50026
  1997R00008000000: 2017-07-01 = #a50026
  REMOVED 0704L00144000000: 2009-03-24 = #02ca75 (len 2009440)
  REMOVED 0176F00070000000: 2005-07-07 = #02ca75 (len 2010105)
  REMOVED 0350M00010000000: 2010-03-30 = #02ca75 (len 2010305)
  REMOVED 0063H00031000000: 2002-10-04 = #02ca75 (len 2010585)
  REMOVED 0292F00076000000: 2010-03-02 = #02ca75 (len 2011185)
  REMOVED 0014N00247000000: 2010-01-05 = #02ca75 (len 2011205)
0228J00210050300 is missing from centroids, skipping
  REMOVED 0124R00279000000: 2015-01-23 = #02ca75 (len 2011575)
  REMOVED 1007J00101000000: 2010-04-09 = #02ca75 (len 2012830)
  REMOVED 0175D00087000000: 2009-06-18 = #02ca75 (len 2013625)
  REMOVED 0050P00056000000: 2010-12-30 = #02ca75 (len 2013860)
  REMOVED 0073E00187000000: 2004-06-24 = #02ca75 (len 2013915)
  REMOVED 0026F00254000000: 2013-04-17 = #02ca75 (len 2015390)
  REMOVED 0034M00127000000: 1950-0

  REMOVED 8000T01058000000: 2014-09-15 = #02ca75 (len 2077350)
2001G00005000000 is missing from centroids, skipping
  REMOVED 0013M00265000000: 2005-08-24 = #02ca75 (len 2078875)
0414G00034000000 is missing from centroids, skipping
  REMOVED 0550J00303000000: 2003-05-29 = #02ca75 (len 2079170)
  REMOVED 0550J00303000000: 2013-06-27 = #02ca75 (len 2079175)
  REMOVED 0567A00042000000: 2014-02-13 = #02ca75 (len 2079900)
  REMOVED 0744H00091000000: 2013-12-27 = #02ca75 (len 2080495)
  REMOVED 0220K00070000000: 2015-03-27 = #02ca75 (len 2081380)
  REMOVED 0946M00237000000: 2013-10-18 = #02ca75 (len 2081605)
  REMOVED 0255R00323000000: 2005-03-16 = #02ca75 (len 2081925)
  REMOVED 0319B00070000000: 2011-10-06 = #02ca75 (len 2083305)
277001-278000: processing u'0235A00113000000', 0:00:00.277359 time elapsed
  0235A00113000000: 2003-10-07 = #02ca75
  0235A00113000000: 2016-08-16 = #a50026
  REMOVED 0657K00077000000: 2003-10-28 = #02ca75 (len 2084245)
1202H00200000000 is missing from centroids, 

  REMOVED 0746P00151000001: 2009-05-21 = #02ca75 (len 2140820)
  REMOVED 0716E00312000000: 2015-03-13 = #02ca75 (len 2141005)
  REMOVED 0163M00283000000: 2001-06-19 = #02ca75 (len 2141720)
  REMOVED 0063R00285000000: 2004-10-07 = #02ca75 (len 2141970)
  REMOVED 0050L00325000B00: 1995-10-16 = #02ca75 (len 2142255)
  REMOVED 0609F00343000000: 2010-12-07 = #02ca75 (len 2143110)
  REMOVED 1825A00017000000: 2009-02-17 = #02ca75 (len 2143965)
285001-286000: processing u'0720J00282000000', 0:00:00.272329 time elapsed
  0720J00282000000: 1982-08-27 = #a50026
  0720J00282000000: 2016-06-06 = #a50026
  REMOVED 1272E00211000000: 2010-12-01 = #02ca75 (len 2144165)
8000T03178000000 is missing from centroids, skipping
  REMOVED 0448P00260000000: 1997-08-08 = #02ca75 (len 2144420)
  REMOVED 0997L00151000000: 2009-05-04 = #02ca75 (len 2145320)
  REMOVED 0124N00144000000: 2009-06-09 = #02ca75 (len 2145335)
  REMOVED 0101A00172000000: 2001-12-14 = #02ca75 (len 2145895)
  REMOVED 1203B00063000300: 2014-1

  REMOVED 0942P00291000000: 2011-10-07 = #02ca75 (len 2204715)
  REMOVED 0042M00095000000: 2014-04-24 = #02ca75 (len 2207060)
  REMOVED 0617S00110000000: 2013-09-12 = #02ca75 (len 2208415)
  REMOVED 0551P00059000000: 2013-04-16 = #02ca75 (len 2208630)
  REMOVED 0087L00144000000: 2009-08-31 = #02ca75 (len 2208645)
  REMOVED 0292E00009000000: 1982-03-23 = #02ca75 (len 2209505)
294001-295000: processing u'0045K00169000000', 0:00:00.287138 time elapsed
  0045K00169000000: 2006-10-03 = #a50026
  REMOVED 0006M00312241000: 1993-04-02 = #02ca75 (len 2210590)
  REMOVED 1657C00120000000: 1989-07-31 = #02ca75 (len 2210885)
  REMOVED 1078G00196000000: 2011-09-16 = #02ca75 (len 2212305)
  REMOVED 0261L00036000000: 1996-02-06 = #02ca75 (len 2212325)
  REMOVED 1826M00001000000: 2012-12-17 = #02ca75 (len 2212425)
  REMOVED 0046K00241000000: 2009-02-23 = #02ca75 (len 2212485)
  REMOVED 0998G00208000000: 1964-01-31 = #02ca75 (len 2215690)
  REMOVED 0520P00016000000: 2014-05-30 = #02ca75 (len 2215745)
  

  REMOVED 1666F00349000000: 2014-04-03 = #02ca75 (len 2264830)
  REMOVED 0051B00204000000: 1992-06-25 = #02ca75 (len 2264845)
  REMOVED 0419B00082000000: 2009-09-15 = #02ca75 (len 2265435)
  REMOVED 0543C00090000000: 2009-07-21 = #02ca75 (len 2265450)
  REMOVED 0449C00221000000: 2008-06-04 = #02ca75 (len 2265690)
  REMOVED 0033K00076000000: 2010-01-27 = #02ca75 (len 2265740)
  REMOVED 0115D00014000000: 2012-05-15 = #02ca75 (len 2266605)
  REMOVED 1182C00265000000: 1983-06-08 = #02ca75 (len 2266610)
  REMOVED 0154D00392000000: 2009-08-05 = #02ca75 (len 2267070)
  REMOVED 0815H00162000000: 2015-02-26 = #02ca75 (len 2267380)
  REMOVED 0024F00001000000: 2007-11-20 = #02ca75 (len 2267785)
  REMOVED 1132A00239000000: 2008-05-13 = #02ca75 (len 2267820)
  REMOVED 0070R00180000000: 2008-03-20 = #02ca75 (len 2267975)
  REMOVED 0195C00135000000: 2012-02-29 = #02ca75 (len 2267985)
  REMOVED 0384L00093000000: 2009-08-25 = #02ca75 (len 2268190)
  REMOVED 0440N00054000000: 2013-09-18 = #02ca75 (len 2

  REMOVED 0035M00025000000: 1999-08-10 = #02ca75 (len 2322445)
  REMOVED 0738E00330000000: 1998-10-13 = #02ca75 (len 2322685)
  REMOVED 0026R00054000000: 1995-01-24 = #02ca75 (len 2322730)
  REMOVED 0087M00060000000: 2014-01-27 = #02ca75 (len 2322795)
309001-310000: processing u'2387M00109000000', 0:00:00.286482 time elapsed
  2387M00109000000: 1950-05-05 = #a50026
  2387M00109000000: 1967-06-26 = #a50026
  2387M00109000000: 2012-12-20 = #a50026
  2387M00109000000: 2014-08-07 = #a50026
  REMOVED 0115P00160000000: 2006-09-28 = #02ca75 (len 2322965)
1658M00026000000 is missing from centroids, skipping
  REMOVED 0066K00220000000: 2012-09-29 = #02ca75 (len 2323895)
  REMOVED 0596F00205202800: 2004-07-08 = #02ca75 (len 2324335)
  REMOVED 0382R00121000000: 1998-09-30 = #02ca75 (len 2324805)
  REMOVED 0077S00184000000: 1966-05-09 = #02ca75 (len 2324850)
  REMOVED 0157B00028004600: 2013-05-17 = #02ca75 (len 2325005)
  REMOVED 0131M00058000000: 2010-03-04 = #02ca75 (len 2325230)
  REMOVED 0846L

  REMOVED 0219N00263000000: 2010-02-03 = #02ca75 (len 2378675)
  REMOVED 1660B00001000000: 2010-02-22 = #02ca75 (len 2379180)
  REMOVED 0454L00204000000: 2010-02-18 = #02ca75 (len 2379345)
  REMOVED 0084K00265000000: 1999-06-04 = #ffffbf (len 2380285)
  REMOVED 0032C00107000000: 2006-01-17 = #02ca75 (len 2380770)
  REMOVED 0051F00198000000: 2009-02-26 = #02ca75 (len 2381960)
  REMOVED 0051A00040000000: 1981-09-09 = #02ca75 (len 2382410)
  REMOVED 0447C00115000000: 2011-05-06 = #02ca75 (len 2382625)
317001-318000: processing u'0944M00316000000', 0:00:00.287820 time elapsed
  0944M00316000000: 2009-05-28 = #02ca75
  REMOVED 0094C00096000000: 2011-04-15 = #02ca75 (len 2383175)
  REMOVED 0315E00035000000: 1983-11-04 = #02ca75 (len 2383750)
  REMOVED 1833J00262000000: 2015-04-10 = #02ca75 (len 2383955)
  REMOVED 1519D00117000000: 2009-07-13 = #02ca75 (len 2384030)
  REMOVED 0276K00286000000: 2000-09-07 = #02ca75 (len 2384080)
  REMOVED 0255H00113000000: 2006-08-29 = #02ca75 (len 2385180)
  

  REMOVED 0127J00214000000: 2003-10-29 = #02ca75 (len 2436730)
  REMOVED 0942E00087000000: 2002-11-26 = #02ca75 (len 2437145)
  REMOVED 0057G00186000000: 2009-04-08 = #02ca75 (len 2437175)
  REMOVED 0733A00160000000: 1997-07-01 = #02ca75 (len 2437200)
  REMOVED 0014A00047000000: 2008-01-25 = #02ca75 (len 2437295)
  REMOVED 1345L00107160100: 2009-09-04 = #02ca75 (len 2437510)
  REMOVED 0254R00009000000: 2012-02-10 = #02ca75 (len 2437620)
  REMOVED 0941A00261000000: 2010-02-23 = #02ca75 (len 2437630)
  REMOVED 0704S00081000000: 2009-05-18 = #02ca75 (len 2437695)
0198H00030000000 is missing from centroids, skipping
  REMOVED 0062S00145000000: 2000-07-18 = #02ca75 (len 2438525)
  REMOVED 0523H00153000000: 1995-08-17 = #02ca75 (len 2438570)
  REMOVED 0502F00109000000: 2009-08-17 = #02ca75 (len 2438690)
  REMOVED 0051C00151000000: 2011-02-18 = #02ca75 (len 2439370)
  REMOVED 0019J00090000000: 2007-04-25 = #02ca75 (len 2439870)
  REMOVED 0043A00056000000: 2012-10-25 = #02ca75 (len 2440630)
  

  REMOVED 0023J00132000000: 2001-04-18 = #02ca75 (len 2502160)
  REMOVED 0086P00118000000: 2013-02-21 = #02ca75 (len 2502180)
  REMOVED 0022S00078000000: 1992-10-02 = #02ca75 (len 2502405)
0035R00120000001 is missing from centroids, skipping
  REMOVED 1661M00008000000: 2009-12-10 = #02ca75 (len 2502830)
  REMOVED 0173N00123000000: 1979-04-03 = #02ca75 (len 2502840)
  REMOVED 1668M00013000000: 2003-07-09 = #02ca75 (len 2503115)
  REMOVED 1844M00265000000: 2010-03-05 = #02ca75 (len 2503175)
333001-334000: processing u'0024E00183000000', 0:00:00.273201 time elapsed
  0024E00183000000: 1950-05-05 = #a50026
  0024E00183000000: 1964-03-23 = #a50026
  REMOVED 0507S00104000000: 2013-04-15 = #02ca75 (len 2503370)
8000T03144000000 is missing from centroids, skipping
  REMOVED 0049P00372000000: 2003-05-29 = #02ca75 (len 2503845)
  REMOVED 0663H00066000000: 2013-03-18 = #02ca75 (len 2504470)
  REMOVED 0439G00148000000: 2013-12-18 = #02ca75 (len 2504625)
  REMOVED 0027H00074020100: 2013-09-24 = #02

341001-342000: processing u'2015J00264000000', 0:00:00.289139 time elapsed
  2015J00264000000: 1976-08-06 = #a50026
  REMOVED 0068J00050000000: 2007-01-02 = #02ca75 (len 2563525)
  REMOVED 0376B00044000000: 2010-01-22 = #02ca75 (len 2564515)
  REMOVED 1570A00098000000: 2009-05-12 = #02ca75 (len 2564735)
  REMOVED 0022H00040000000: 2003-05-08 = #02ca75 (len 2564940)
  REMOVED 0050H00128000000: 2015-04-10 = #02ca75 (len 2565455)
  REMOVED 0185S00265000000: 2015-03-19 = #02ca75 (len 2565925)
  REMOVED 0174L00011000000: 2014-04-28 = #02ca75 (len 2566070)
  REMOVED 0136A00238000000: 2014-05-29 = #02ca75 (len 2566185)
  REMOVED 1209K00136000000: 2000-08-31 = #02ca75 (len 2566915)
  REMOVED 0003R00079000000: 2009-09-03 = #02ca75 (len 2567040)
  REMOVED 0244F00050000000: 2010-05-06 = #02ca75 (len 2567530)
  REMOVED 1659R00006000000: 2014-03-11 = #02ca75 (len 2568085)
  REMOVED 1359M00109000000: 2004-11-30 = #02ca75 (len 2569000)
  REMOVED 0126B00298000000: 2009-01-02 = #02ca75 (len 2569650)
  

0083F00269000000 is missing from centroids, skipping
  REMOVED 0734B00057000000: 2001-11-15 = #02ca75 (len 2629955)
350001-351000: processing u'0194J00202000000', 0:00:00.214816 time elapsed
  0194J00202000000: 1950-05-05 = #a50026
  0194J00202000000: 1956-06-26 = #a50026
  0194J00202000000: 2015-10-19 = #a50026
  REMOVED 0349P00015000000: 2012-12-05 = #02ca75 (len 2631730)
  REMOVED 0100H00323000000: 2012-10-31 = #02ca75 (len 2631865)
  REMOVED 0072K00073000000: 2014-05-30 = #02ca75 (len 2632295)
  REMOVED 0012E00057000000: 2010-03-04 = #02ca75 (len 2632670)
  REMOVED 0299G00030000000: 2013-07-11 = #02ca75 (len 2633140)
  REMOVED 0269A00046000000: 2013-02-04 = #02ca75 (len 2633210)
  REMOVED 0351S00181000000: 2012-05-01 = #02ca75 (len 2634010)
  REMOVED 0318K00127000000: 2013-08-08 = #02ca75 (len 2634070)
  REMOVED 0369B00150000000: 1999-09-09 = #02ca75 (len 2634240)
1999P00028000000 is missing from centroids, skipping
  REMOVED 0541B00020000000: 2014-05-12 = #02ca75 (len 2635110)
  R

  REMOVED 0194R00106000000: 2013-11-06 = #02ca75 (len 2696520)
0228J00210030500 is missing from centroids, skipping
  REMOVED 0414E00012000000: 2013-12-05 = #02ca75 (len 2697485)
  REMOVED 0043K00234000000: 2001-11-07 = #02ca75 (len 2697790)
  REMOVED 0296K00006000000: 2014-12-19 = #02ca75 (len 2698030)
  REMOVED 0305M00170000000: 1982-02-24 = #02ca75 (len 2698270)
359001-360000: processing u'0099C00180008700', 0:00:00.266423 time elapsed
  0099C00180008700: 2000-08-14 = #a50026
  REMOVED 0942C00089000000: 1992-04-16 = #02ca75 (len 2699010)
  REMOVED 0131G00163000000: 2013-12-20 = #02ca75 (len 2699295)
  REMOVED 0083G00248000000: 2005-10-20 = #02ca75 (len 2701010)
  REMOVED 0126F00194000000: 2012-06-01 = #02ca75 (len 2701100)
  REMOVED 0279J00060000000: 2009-01-30 = #02ca75 (len 2701290)
  REMOVED 0828J00141000000: 2009-02-19 = #02ca75 (len 2701510)
  REMOVED 2190K00062000000: 2012-04-30 = #02ca75 (len 2701785)
  REMOVED 0547G00285000000: 2014-04-24 = #02ca75 (len 2702740)
  REMOVED 02

  REMOVED 0205G00165000000: 2010-03-05 = #02ca75 (len 2755650)
  REMOVED 0464C00116000000: 2011-08-15 = #02ca75 (len 2755820)
  REMOVED 0082A00208000000: 2012-12-06 = #02ca75 (len 2756125)
  REMOVED 0030E00258000000: 2015-01-29 = #02ca75 (len 2756210)
0060N00106000000 is missing from centroids, skipping
367001-368000: processing u'0449H00224000000', 0:00:00.283728 time elapsed
  0449H00224000000: 1997-03-21 = #a50026
  REMOVED 0356C00096000000: 2013-04-01 = #02ca75 (len 2759510)
  REMOVED 0039J00053000000: 2015-04-03 = #02ca75 (len 2759660)
  REMOVED 0878N00218000000: 2007-04-20 = #02ca75 (len 2759735)
  REMOVED 0445F00271000000: 1998-12-31 = #02ca75 (len 2760110)
  REMOVED 0116P00165000000: 2012-09-14 = #02ca75 (len 2760815)
  REMOVED 0296G00118000000: 2007-04-13 = #02ca75 (len 2760945)
  REMOVED 0970N00136000000: 2006-05-24 = #02ca75 (len 2762270)
  REMOVED 1367G00096000000: 2009-03-30 = #02ca75 (len 2763010)
  REMOVED 0230S00020000000: 2014-01-02 = #02ca75 (len 2763565)
  REMOVED 00

  REMOVED 0541K00054000000: 1992-01-06 = #02ca75 (len 2824445)
0364G00249000000 is missing from centroids, skipping
  REMOVED 0324S00044000000: 2013-04-18 = #02ca75 (len 2825835)
  REMOVED 0134C00014000000: 2009-02-17 = #02ca75 (len 2826050)
  REMOVED 0013J00081000000: 1990-11-28 = #02ca75 (len 2826405)
376001-377000: processing u'0086G00236000000', 0:00:00.287023 time elapsed
  0086G00236000000: 2006-11-21 = #a50026
  REMOVED 0142C00106000000: 2009-11-02 = #02ca75 (len 2826835)
  REMOVED 0099R00201000000: 2014-01-30 = #02ca75 (len 2827650)
  REMOVED 1518M00198000000: 2010-02-24 = #02ca75 (len 2827795)
  REMOVED 2372L00044000000: 2013-04-12 = #02ca75 (len 2829820)
  REMOVED 0710C00398000000: 2014-01-08 = #02ca75 (len 2831315)
  REMOVED 0175K00214000400: 2002-12-06 = #02ca75 (len 2831335)
  REMOVED 0255M00308000000: 2010-01-26 = #02ca75 (len 2832595)
  REMOVED 0368A00240000000: 2010-03-26 = #02ca75 (len 2832650)
  REMOVED 0713C00604120300: 2009-03-20 = #02ca75 (len 2832705)
  REMOVED 03

  REMOVED 0508P00115000000: 2011-01-20 = #02ca75 (len 2883535)
  REMOVED 0162S00097000000: 2006-05-10 = #02ca75 (len 2885055)
  REMOVED 0013A00139000000: 2006-03-31 = #02ca75 (len 2885215)
  REMOVED 0608D00360000000: 2010-05-25 = #02ca75 (len 2885250)
384001-385000: processing u'0176A00077000000', 0:00:00.276793 time elapsed
  0176A00077000000: 1950-05-05 = #a50026
  0176A00077000000: 1988-06-22 = #a50026
  REMOVED 0033R00186000000: 2014-12-23 = #02ca75 (len 2887115)
  REMOVED 0560M00074000000: 2008-01-31 = #02ca75 (len 2887420)
  REMOVED 0764B00285000000: 2004-06-18 = #02ca75 (len 2888290)
  REMOVED 0020N00098000000: 2007-03-01 = #02ca75 (len 2888555)
  REMOVED 0178L00070000000: 2012-11-20 = #02ca75 (len 2888955)
  REMOVED 0975N00079000000: 2011-09-16 = #02ca75 (len 2890265)
  REMOVED 0094K00137000000: 2008-10-06 = #02ca75 (len 2890635)
  REMOVED 0217B00056000000: 2005-11-15 = #02ca75 (len 2891545)
  REMOVED 0944K00405000000: 2010-04-06 = #02ca75 (len 2892125)
  REMOVED 0098A000460000

  REMOVED 0080P00261000000: 2009-04-01 = #02ca75 (len 2950080)
  REMOVED 0035G00042000000: 2004-12-22 = #02ca75 (len 2950160)
  REMOVED 0046R00051000000: 2010-01-15 = #02ca75 (len 2950935)
  REMOVED 0654J00128000000: 1992-03-30 = #02ca75 (len 2953425)
  REMOVED 0126C00264000000: 2011-02-17 = #02ca75 (len 2954245)
393001-394000: processing u'0173F00233000000', 0:00:00.271371 time elapsed
  0173F00233000000: 1950-05-05 = #02ca75
  0173F00233000000: 1971-07-19 = #02ca75
  REMOVED 0859M00320000000: 2011-02-01 = #02ca75 (len 2955245)
  REMOVED 1068H00231000000: 2011-09-14 = #02ca75 (len 2955405)
  REMOVED 0028E00053000000: 2010-05-05 = #02ca75 (len 2956855)
  REMOVED 0004N00145000000: 2004-04-05 = #02ca75 (len 2956930)
0549L00079000000 is missing from centroids, skipping
  REMOVED 0055S00074000000: 1975-10-28 = #02ca75 (len 2957455)
  REMOVED 0879L00247000000: 2005-02-22 = #02ca75 (len 2958085)
  REMOVED 0445N00316000000: 1989-11-29 = #02ca75 (len 2958725)
  REMOVED 0180B00202000000: 2009-0

  REMOVED 0071K00009000000: 2001-06-28 = #02ca75 (len 3014310)
  REMOVED 0568J00320000000: 2009-02-20 = #02ca75 (len 3014445)
  REMOVED 0815J00016000000: 2014-12-12 = #02ca75 (len 3014695)
  REMOVED 0829B00001R00400: 2014-04-16 = #02ca75 (len 3014990)
  REMOVED 1367M00058000000: 2009-01-28 = #02ca75 (len 3015230)
  REMOVED 0348F00020000000: 2012-09-24 = #02ca75 (len 3016770)
  REMOVED 0977C00113000000: 2011-02-16 = #02ca75 (len 3017105)
  REMOVED 0306B00256000000: 2007-10-25 = #02ca75 (len 3017135)
  REMOVED 0645L00062000000: 2000-02-18 = #02ca75 (len 3017325)
  REMOVED 0507C00373000000: 2015-04-10 = #02ca75 (len 3018185)
  REMOVED 1203H00001000000: 2000-09-12 = #02ca75 (len 3018205)
  REMOVED 0828S00241000000: 2012-06-29 = #02ca75 (len 3020685)
0050K00009000000 is missing from centroids, skipping
  REMOVED 0941M00089000000: 2008-12-03 = #02ca75 (len 3021050)
402001-403000: processing u'0047C00019000000', 0:00:00.286804 time elapsed
  0047C00019000000: 2006-02-16 = #02ca75
  REMOVED 04

  REMOVED 0236R00137000000: 1977-09-19 = #02ca75 (len 3075410)
  REMOVED 0047J00165000000: 2010-09-02 = #02ca75 (len 3076705)
  REMOVED 0712F00065000000: 2010-01-28 = #02ca75 (len 3077080)
  REMOVED 1822M00020000000: 2013-02-07 = #02ca75 (len 3077090)
  REMOVED 0191J00256000000: 2003-12-12 = #02ca75 (len 3077150)
  REMOVED 0087J00127000000: 2014-05-27 = #02ca75 (len 3077160)
  REMOVED 0023F00006000000: 2006-07-28 = #02ca75 (len 3077680)
  REMOVED 0236B00293000000: 2009-08-03 = #02ca75 (len 3078125)
  REMOVED 1657C00141000000: 1989-07-31 = #02ca75 (len 3078340)
  REMOVED 0126G00257000000: 2014-04-01 = #02ca75 (len 3078715)
  REMOVED 0043E00113000000: 2009-09-15 = #02ca75 (len 3078895)
  REMOVED 0082K00052000000: 2010-05-03 = #02ca75 (len 3079975)
  REMOVED 0194K00202000000: 2006-08-29 = #02ca75 (len 3080235)
  REMOVED 0229S00160000000: 2009-01-02 = #02ca75 (len 3080455)
  REMOVED 0543G00098000000: 2008-08-22 = #02ca75 (len 3080715)
410001-411000: processing u'0177R00272000000', 0:00:00.

  REMOVED 0085A00222008100: 2000-05-22 = #02ca75 (len 3136295)
  REMOVED 1844N00058000000: 2010-01-20 = #02ca75 (len 3136370)
  REMOVED 0060E00278000000: 2002-05-01 = #02ca75 (len 3138350)
  REMOVED 0526K00191000000: 2008-07-11 = #02ca75 (len 3139365)
  REMOVED 0046K00189000000: 1970-07-28 = #02ca75 (len 3139540)
  REMOVED 1130K00011000000: 2003-01-15 = #02ca75 (len 3139980)
  REMOVED 0645R00213000000: 2007-04-02 = #02ca75 (len 3140895)
  REMOVED 0023K00306000000: 2011-01-27 = #02ca75 (len 3141205)
418001-419000: processing u'1507J00325000000', 0:00:00.279029 time elapsed
  1507J00325000000: 1983-02-18 = #a50026
  1507J00325000000: 2016-04-22 = #a50026
  REMOVED 0013C00301000000: 1995-08-23 = #02ca75 (len 3141815)
  REMOVED 0062F00137000000: 1999-06-16 = #02ca75 (len 3143150)
  REMOVED 0126A00167000000: 2009-04-27 = #02ca75 (len 3143360)
  REMOVED 1354J00216000000: 2009-06-16 = #02ca75 (len 3143725)
  REMOVED 0174A00362000000: 2009-02-13 = #02ca75 (len 3144615)
  REMOVED 0041C003410000

  REMOVED 0077F00013000000: 2012-01-11 = #02ca75 (len 3202530)
  REMOVED 0295R00013000000: 2008-04-16 = #02ca75 (len 3203145)
  REMOVED 0118H00194000500: 2010-02-03 = #02ca75 (len 3203255)
  REMOVED 0023D00234000000: 2001-05-17 = #02ca75 (len 3203345)
  REMOVED 0876R00079000000: 2009-12-21 = #02ca75 (len 3204890)
  REMOVED 0970K00138000000: 2013-05-14 = #02ca75 (len 3205020)
  REMOVED 0439P00015000000: 2007-09-28 = #02ca75 (len 3205175)
  REMOVED 0094B00205000000: 2013-10-18 = #02ca75 (len 3205465)
  REMOVED 0187G00062000000: 2000-05-09 = #02ca75 (len 3206750)
  REMOVED 0960N00095000000: 2003-12-02 = #02ca75 (len 3206770)
  REMOVED 0530E00025000000: 2009-06-11 = #02ca75 (len 3206865)
  REMOVED 0103K00182000000: 2009-02-11 = #02ca75 (len 3207480)
427001-428000: processing u'0015N00034000000', 0:00:00.273598 time elapsed
  0015N00034000000: 1993-06-11 = #a50026
  REMOVED 0373B00083000000: 2010-05-20 = #02ca75 (len 3208150)
  REMOVED 1519J00103000000: 1990-06-06 = #02ca75 (len 3208355)
  

  REMOVED 0352C00011000000: 2007-03-27 = #02ca75 (len 3263335)
  REMOVED 0432S00221000000: 2004-07-27 = #02ca75 (len 3264105)
  REMOVED 0449B00041000000: 2006-09-20 = #02ca75 (len 3264155)
  REMOVED 0301K00230000000: 2009-06-12 = #02ca75 (len 3264285)
  REMOVED 0083M00016000000: 1990-02-07 = #02ca75 (len 3265535)
  REMOVED 0015M00110000000: 1999-09-29 = #02ca75 (len 3265855)
  REMOVED 0880M00180000000: 2008-10-15 = #02ca75 (len 3266390)
  REMOVED 1421C00292000000: 2009-01-08 = #02ca75 (len 3267150)
435001-436000: processing u'0065K00090002300', 0:00:00.293853 time elapsed
  0065K00090002300: 1999-03-31 = #a50026
  0065K00090002300: 2017-08-22 = #02ca75
  REMOVED 0430E00070000000: 2015-02-02 = #02ca75 (len 3268110)
  REMOVED 0250N00056000000: 2009-03-17 = #02ca75 (len 3268290)
  REMOVED 0015B00316000000: 2014-01-13 = #02ca75 (len 3270180)
  REMOVED 0132R00047000000: 1976-09-10 = #02ca75 (len 3270855)
  REMOVED 0972L00069000000: 2012-05-11 = #02ca75 (len 3270915)
  REMOVED 0041J001440000

  REMOVED 0747B00248000000: 2001-01-09 = #02ca75 (len 3328785)
  REMOVED 0386F00176000000: 2003-07-30 = #02ca75 (len 3329495)
  REMOVED 0015D00302000000: 1994-06-10 = #02ca75 (len 3329960)
  REMOVED 0241K00141000000: 2005-10-14 = #02ca75 (len 3330020)
  REMOVED 0364H00394000000: 1999-10-18 = #02ca75 (len 3330040)
  REMOVED 0167M00046000000: 1994-07-27 = #02ca75 (len 3330360)
  REMOVED 0572C00101022A00: 2009-05-07 = #02ca75 (len 3330505)
  REMOVED 0584N00010000000: 2009-12-15 = #02ca75 (len 3331410)
  REMOVED 1837M00312000000: 2009-10-21 = #02ca75 (len 3331600)
  REMOVED 0381A00135000000: 2009-05-21 = #02ca75 (len 3331730)
  REMOVED 0230D00040000000: 2014-02-07 = #02ca75 (len 3332375)
  REMOVED 0173L00044000000: 2010-08-26 = #02ca75 (len 3332400)
  REMOVED 0114G00286000000: 1990-05-09 = #02ca75 (len 3332530)
  REMOVED 0156H00224000000: 2013-02-01 = #02ca75 (len 3333940)
  REMOVED 0024B00100000000: 2014-03-19 = #02ca75 (len 3334570)
444001-445000: processing u'0270K00011000000', 0:00:00.

  REMOVED 0305R00338000000: 2009-09-02 = #02ca75 (len 3402990)
  REMOVED 0466J00169000000: 2010-05-04 = #02ca75 (len 3404665)
  REMOVED 0829A00096000000: 2002-01-24 = #02ca75 (len 3404770)
  REMOVED 0620K00109000000: 2009-04-22 = #02ca75 (len 3405625)
  REMOVED 0547B00301000000: 2014-12-04 = #02ca75 (len 3405655)
  REMOVED 0516K00214000000: 1995-05-18 = #02ca75 (len 3407065)
  REMOVED 0176K00115000000: 2014-01-10 = #02ca75 (len 3407885)
  REMOVED 0419H00191000000: 2012-07-13 = #02ca75 (len 3407960)
  REMOVED 0046F00090000000: 2009-05-05 = #02ca75 (len 3409725)
  REMOVED 1659J00003000000: 2012-09-17 = #02ca75 (len 3409895)
  REMOVED 0071E00056000000: 2014-01-31 = #02ca75 (len 3410010)
454001-455000: processing u'0463D00315000000', 0:00:00.292828 time elapsed
  0463D00315000000: 1998-11-04 = #a50026
  0463D00315000000: 2014-04-25 = #a50026
  REMOVED 1740B00368000000: 1995-08-14 = #02ca75 (len 3410170)
  REMOVED 1506K00065000000: 1987-06-11 = #02ca75 (len 3410305)
  REMOVED 0188P000200000

0004B00224000200 is missing from centroids, skipping
  REMOVED 0250A00048000000: 1999-03-16 = #02ca75 (len 3459400)
  REMOVED 0305D00034000000: 2009-03-31 = #02ca75 (len 3459710)
  REMOVED 0746R00232000000: 2011-01-14 = #02ca75 (len 3460655)
  REMOVED 0957G00005080B00: 2003-12-29 = #ffffbf (len 3460710)
  REMOVED 0098F00195000000: 2012-12-31 = #02ca75 (len 3461825)
  REMOVED 0221M00127000000: 2009-03-18 = #02ca75 (len 3462430)
  REMOVED 0082L00061000000: 2009-06-16 = #02ca75 (len 3462470)
461001-462000: processing u'0464F00039000000', 0:00:00.286347 time elapsed
  0464F00039000000: 1996-02-21 = #a50026
  REMOVED 0053D00104000000: 2012-06-29 = #02ca75 (len 3463115)
  REMOVED 0627H00130000000: 2010-05-19 = #02ca75 (len 3463820)
  REMOVED 1361C00195000000: 2010-05-11 = #02ca75 (len 3465095)
  REMOVED 0235N00011000000: 2012-12-28 = #02ca75 (len 3465340)
  REMOVED 1132A00249000000: 2008-05-13 = #02ca75 (len 3465635)
  REMOVED 0176G00368000000: 2014-01-03 = #02ca75 (len 3467860)
  REMOVED 00

  REMOVED 0238H00040000000: 2010-01-22 = #02ca75 (len 3532205)
  REMOVED 0168J00077000000: 2010-10-01 = #02ca75 (len 3532425)
  REMOVED 0178G00168000000: 2010-04-29 = #02ca75 (len 3533330)
0050F00004000000 is missing from centroids, skipping
  REMOVED 0036D00100000000: 2013-08-23 = #02ca75 (len 3535450)
  REMOVED 0184A00278000000: 1997-03-14 = #02ca75 (len 3535550)
  REMOVED 0167L00061000000: 2003-07-01 = #02ca75 (len 3536170)
  REMOVED 0084J00100000000: 2009-05-04 = #02ca75 (len 3536440)
  REMOVED 0467S00245000000: 2009-01-14 = #02ca75 (len 3536570)
  REMOVED 0654J00055000000: 2009-08-11 = #02ca75 (len 3536825)
  REMOVED 0028P00126027100: 2013-08-01 = #02ca75 (len 3537075)
471001-472000: processing u'0039J00076000000', 0:00:00.286632 time elapsed
  0039J00076000000: 1950-05-05 = #a50026
  0039J00076000000: 1962-05-14 = #a50026
  REMOVED 0115A00097000000: 2011-10-12 = #02ca75 (len 3538265)
  REMOVED 0470D00292000000: 2014-01-30 = #02ca75 (len 3539445)
  REMOVED 0297H00020000000: 2005-0

  REMOVED 0301N00323000000: 2012-08-24 = #02ca75 (len 3590375)
478001-479000: processing u'0522C00109000000', 0:00:00.289314 time elapsed
  0522C00109000000: 2009-01-22 = #a50026
  REMOVED 0469D00137000000: 2002-09-09 = #02ca75 (len 3591230)
  REMOVED 0951M00227000000: 2013-03-07 = #02ca75 (len 3591660)
  REMOVED 0143P00210000000: 2009-10-21 = #02ca75 (len 3591705)
  REMOVED 1367G00149000000: 1975-12-03 = #02ca75 (len 3591735)
  REMOVED 0980B00112000000: 2010-03-26 = #02ca75 (len 3591805)
  REMOVED 0980B00112000000: 2012-07-26 = #02ca75 (len 3591810)
  REMOVED 1270G00013000000: 2008-07-23 = #ffffbf (len 3592695)
1365G00118000000 is missing from centroids, skipping
  REMOVED 0828N00343000000: 2002-06-25 = #02ca75 (len 3593005)
  REMOVED 0279A00332000000: 2010-04-27 = #02ca75 (len 3593795)
  REMOVED 0176J00172000000: 2013-06-10 = #02ca75 (len 3594070)
  REMOVED 0717J00052000000: 2011-12-14 = #02ca75 (len 3594770)
  REMOVED 1996A00030000000: 2011-06-22 = #02ca75 (len 3594875)
  REMOVED 02

  REMOVED 0049J00272000000: 2012-01-06 = #02ca75 (len 3653465)
  REMOVED 0181B00150000000: 2009-02-09 = #02ca75 (len 3653605)
  REMOVED 1067B00004000000: 2013-08-19 = #02ca75 (len 3653735)
  REMOVED 0513S00023000000: 2013-07-18 = #02ca75 (len 3655080)
  REMOVED 0454L00188000000: 2007-07-18 = #02ca75 (len 3656520)
0084M00346000000 is missing from centroids, skipping
  REMOVED 0045E00203000000: 2011-03-02 = #02ca75 (len 3657170)
  REMOVED 0019S00008000000: 1978-12-31 = #02ca75 (len 3657240)
  REMOVED 0084A00275000000: 2005-11-03 = #02ca75 (len 3657635)
  REMOVED 0100C00050800500: 2004-01-14 = #02ca75 (len 3658025)
487001-488000: processing u'0649C00004000000', 0:00:00.280211 time elapsed
  0649C00004000000: 1998-07-16 = #a50026
  REMOVED 1216H00187000000: 2014-11-03 = #02ca75 (len 3659020)
  REMOVED 0429M00231000000: 2012-12-07 = #02ca75 (len 3659420)
  REMOVED 0311N00312000000: 2015-01-29 = #02ca75 (len 3659740)
  REMOVED 0191P00044000000: 2009-05-26 = #02ca75 (len 3659990)
  REMOVED 03

  REMOVED 0467F00079000000: 2014-05-22 = #02ca75 (len 3712725)
  REMOVED 0252K00306000000: 1961-11-28 = #02ca75 (len 3712820)
0119S00157000000 is missing from centroids, skipping
  REMOVED 0464L00027000000: 1987-01-28 = #02ca75 (len 3715920)
  REMOVED 0145D00027000000: 2008-02-26 = #02ca75 (len 3715970)
  REMOVED 0746H00376000000: 1980-09-04 = #02ca75 (len 3716310)
  REMOVED 0167F00312000000: 2013-10-02 = #02ca75 (len 3718070)
  REMOVED 0100K00172000000: 2012-01-06 = #02ca75 (len 3718185)
  REMOVED 0879N00046000200: 2007-04-02 = #02ca75 (len 3718370)
495001-496000: processing u'0102K00142000000', 0:00:00.282301 time elapsed
  0102K00142000000: 1993-10-15 = #ffffbf
  REMOVED 1355F00138510900: 2012-04-04 = #02ca75 (len 3718795)
  REMOVED 1203N00018000000: 2012-04-13 = #02ca75 (len 3721840)
2012M00049000000 is missing from centroids, skipping
  REMOVED 0467R00021000000: 2013-04-04 = #02ca75 (len 3721865)
  REMOVED 0033L00039000000: 2004-07-30 = #02ca75 (len 3721910)
  REMOVED 0162J0018300

  REMOVED 0138C00034000000: 2011-02-18 = #02ca75 (len 3774180)
  REMOVED 0737D00328000000: 2014-05-27 = #02ca75 (len 3774715)
  REMOVED 0464G00034000000: 1998-02-27 = #02ca75 (len 3774800)
  REMOVED 0054R00249000000: 2008-08-28 = #02ca75 (len 3776035)
  REMOVED 0596D00031000000: 2013-08-12 = #02ca75 (len 3776090)
  REMOVED 0691E00006000000: 2014-05-29 = #02ca75 (len 3776330)
  REMOVED 0124N00371000000: 2006-03-01 = #02ca75 (len 3776675)
  REMOVED 1494L00003000000: 2009-08-11 = #02ca75 (len 3777010)
  REMOVED 0741K00017000000: 2012-10-23 = #02ca75 (len 3777230)
  REMOVED 0823S00076000000: 2012-12-05 = #02ca75 (len 3777625)
  REMOVED 0229S00393000000: 2015-01-27 = #02ca75 (len 3777655)
0051L00239000000 is missing from centroids, skipping
503001-504000: processing u'0232R00300000000', 0:00:00.287264 time elapsed
  0232R00300000000: 1950-05-05 = #02ca75
  REMOVED 0341D00135000000: 2014-03-10 = #02ca75 (len 3779535)
  REMOVED 0464G00383000000: 2009-08-25 = #02ca75 (len 3780030)
  REMOVED 06

In [536]:
parids_15213=apd_2017[apd_2017.PROPERTYZIP=='15213'].index

In [581]:
valid_parids_15213=list(set(parids_15213) & set(property_map.keys()) & set(parid2centroid.keys()))

In [583]:
len(valid_parids_15213)

4797

In [571]:
process_assessment_record(apd_2017, 2742)

0028G00056000000: Missing saledate


False

In [596]:
output_ownertype_dots(valid_parids_15213,"_15213")

  0011F00188000000: 2008-11-17 = #ffffbf
  0011F00188000000: 2008-11-17 - 2018-08-11 = #ffffbf (len 5)
  0028M00263000000: 1950-05-05 - 2018-08-11 = #a50026 (len 10)
  0028M00263000000: 1964-09-11 - 2018-08-11 = #a50026 (len 15)
  0026F00070000000: 1982-09-24 - 2018-08-11 = #a50026 (len 20)
  0027H00117110700: 1982-11-17 - 2014-11-10 = #a50026 (len 25)
  0027H00117110700: 2014-11-10 - 2018-08-11 = #a50026 (len 30)
  0052J00198000000: 1994-08-15 - 2016-10-11 = #02ca75 (len 35)
  0052J00198000000: 2016-10-11 - 2018-08-11 = #ffffbf (len 40)
  0028A00093000000: 2006-06-23 - 2018-08-11 = #02ca75 (len 45)
  0026M00263000000: 2004-06-29 - 2010-07-16 = #a50026 (len 50)
  0026M00263000000: 2010-07-16 - 2017-08-09 = #a50026 (len 55)
  0026M00263000000: 2017-08-09 - 2018-08-11 = #a50026 (len 60)
  0053C00170000000: 1926-08-11 - 2018-08-11 = #ffffbf (len 65)
  0053C00170000000: 1950-05-05 - 2018-08-11 = #ffffbf (len 70)
  0027C00230000000: 1955-03-28 - 2013-06-03 = #02ca75 (len 75)
  0027C00230000

  0028P00126026200: 1999-05-28 - 2018-08-11 = #02ca75 (len 1515)
  0027H00144007500: 1995-04-05 - 2011-02-02 = #a50026 (len 1520)
  0027H00144007500: 2011-02-02 - 2018-08-11 = #a50026 (len 1525)
  0027H00144007500: 2011-06-27 - 2018-08-11 = #a50026 (len 1530)
  0027H00144007500: 2016-05-03 - 2018-08-11 = #a50026 (len 1535)
  0027M00246070400: 2000-03-01 - 2018-08-11 = #a50026 (len 1540)
  0026S00054000400: 1995-06-05 - 2017-08-08 = #02ca75 (len 1545)
  0026S00054000400: 2017-08-08 - 2018-08-11 = #ffffbf (len 1550)
  0029G00208000000: 1990-04-23 - 2014-05-13 = #02ca75 (len 1555)
  0029G00208000000: 2014-05-13 - 2018-08-11 = #02ca75 (len 1560)
  0027G00167000001: 1995-08-09 - 2018-08-11 = #a50026 (len 1565)
  0052E00033007300: 2008-05-14 - 2014-02-21 = #a50026 (len 1570)
  0052E00033007300: 2014-02-21 - 2018-08-11 = #a50026 (len 1575)
  0028P00249000000: 1997-02-28 - 2018-08-11 = #ffffbf (len 1580)
  0029C00143000000: 2006-03-03 - 2010-04-27 = #02ca75 (len 1585)
  0029C00143000000: 2010-

  0028R00264000000: 2003-12-10 - 2011-03-21 = #a50026 (len 3320)
  0028R00264000000: 2011-03-21 - 2018-08-11 = #02ca75 (len 3325)
  0029C00270002500: 1988-05-23 - 2018-08-11 = #a50026 (len 3330)
  0027H00117050100: 1978-05-02 - 2018-08-11 = #a50026 (len 3335)
  0029C00270004500: 2007-06-07 - 2013-08-15 = #a50026 (len 3340)
  0029C00270004500: 2013-08-15 - 2018-08-11 = #a50026 (len 3345)
  0026R00061000000: 1998-10-30 - 2018-08-11 = #02ca75 (len 3350)
  0011K00078000000: 1979-12-07 - 2018-08-11 = #a50026 (len 3355)
  0051J00122000000: 2000-01-10 - 2014-07-21 = #02ca75 (len 3360)
  0051J00122000000: 2014-07-21 - 2018-08-11 = #ffffbf (len 3365)
  0011K00059000000: 1987-03-18 - 2018-08-11 = #ffffbf (len 3370)
  0011M00124000000: 2001-06-18 - 2018-08-11 = #ffffbf (len 3375)
  0052B00098031100: 1991-10-07 - 2011-06-21 = #a50026 (len 3380)
  0052B00098031100: 2011-06-21 - 2016-08-05 = #a50026 (len 3385)
  0052B00098031100: 2016-08-05 - 2018-08-11 = #02ca75 (len 3390)
  0026L00296000000: 1984-

  0028P00196000000: 2012-10-26 - 2018-08-11 = #ffffbf (len 5260)
  0052E00257001100: 1997-08-19 - 2015-06-10 = #02ca75 (len 5265)
  REMOVED 0052E00257001100: 1997-08-19 = #02ca75 (len 5260)
  0052E00257001100: 1997-08-19 - 2015-06-10 = #a50026 (len 5265)
  0052E00257001100: 2015-06-10 - 2018-08-11 = #a50026 (len 5270)
  0029C00280000000: 2003-06-12 - 2018-08-11 = #02ca75 (len 5275)
  0011D00019000000: 2005-03-18 - 2018-08-11 = #a50026 (len 5280)
  0028E00036000000: 1990-04-19 - 2018-08-11 = #ffffbf (len 5285)
  0027H00160008600: 1986-03-28 - 2015-10-14 = #a50026 (len 5290)
  0027H00160008600: 2015-10-14 - 2018-08-11 = #a50026 (len 5295)
  0027H00117040700: 2005-03-16 - 2011-07-05 = #a50026 (len 5300)
  0027H00117040700: 2011-07-05 - 2018-08-11 = #a50026 (len 5305)
  0028H00270000000: 2008-07-15 - 2018-08-11 = #a50026 (len 5310)
  0028H00270000000: 2014-08-06 - 2018-08-11 = #a50026 (len 5315)
  0052A00193000000: 1999-12-15 - 2013-08-02 = #a50026 (len 5320)
  0052A00193000000: 2013-08-02

  0011H00046000000: 2016-08-19 - 2018-08-11 = #ffffbf (len 7240)
  0028P00165046100: 2009-09-10 - 2013-05-24 = #a50026 (len 7245)
  0028P00165046100: 2013-05-24 - 2018-08-11 = #a50026 (len 7250)
  0026S00026000000: 2004-10-12 - 2018-08-11 = #ffffbf (len 7255)
  0028R00106000000: 1998-02-18 - 2018-08-11 = #a50026 (len 7260)
  0028R00303000000: 1950-05-05 - 2018-08-11 = #a50026 (len 7265)
  0028R00303000000: 1973-04-18 - 2018-08-11 = #a50026 (len 7270)
  0028M00251000000: 2005-02-25 - 2012-10-15 = #a50026 (len 7275)
  0028M00251000000: 2012-10-15 - 2018-08-11 = #02ca75 (len 7280)
  0027M00246080900: 1995-04-28 - 2013-12-09 = #a50026 (len 7285)
  0027M00246080900: 2013-12-09 - 2018-08-11 = #a50026 (len 7290)
  0011D00095000000: 2003-02-19 - 2015-12-11 = #a50026 (len 7295)
  0011D00095000000: 2015-12-11 - 2018-08-11 = #ffffbf (len 7300)
  0028R00276000000: 2000-05-23 - 2018-08-11 = #ffffbf (len 7305)
  0028G00265000000: 2009-07-16 - 2018-08-11 = #02ca75 (len 7310)
  0052B00098031800: 2004-

  0029D00107000000: 2013-04-18 - 2014-04-09 = #a50026 (len 9215)
  0029D00107000000: 2014-04-09 - 2018-08-11 = #02ca75 (len 9220)
  0028P00165018200: 2007-09-07 - 2018-08-11 = #a50026 (len 9225)
  0029H00191000000: 2002-12-27 - 2018-08-11 = #02ca75 (len 9230)
  0029H00191000000: 2014-10-16 - 2018-08-11 = #02ca75 (len 9235)
  0052E00047060100: 1991-06-05 - 2018-08-11 = #a50026 (len 9240)
  0027L00194000000: 1950-05-05 - 2017-05-01 = #ffffbf (len 9245)
  0027L00194000000: 1960-09-14 - 2017-05-01 = #ffffbf (len 9250)
  0027L00194000000: 2017-05-01 - 2018-08-11 = #02ca75 (len 9255)
  0052B00098050500: 1986-09-08 - 2011-07-28 = #a50026 (len 9260)
  0052B00098050500: 2011-07-28 - 2018-08-11 = #a50026 (len 9265)
  0029C00139000000: 2007-02-09 - 2012-03-21 = #02ca75 (len 9270)
  0029C00139000000: 2012-03-21 - 2018-08-11 = #02ca75 (len 9275)
  0028G00043000000: 1999-11-05 - 2014-06-23 = #02ca75 (len 9280)
  0028G00043000000: 2014-06-23 - 2017-02-21 = #ffffbf (len 9285)
  0028G00043000000: 2017-

  0029D00063000000: 2011-08-10 - 2018-08-11 = #a50026 (len 11240)
  0027M00159000000: 1981-10-21 - 2018-08-11 = #ffffbf (len 11245)
  0052F00284004100: 1999-06-23 - 2013-05-08 = #a50026 (len 11250)
  0052F00284004100: 2013-05-08 - 2018-08-11 = #a50026 (len 11255)
  0028G00235000000: 1984-10-18 - 2018-08-11 = #02ca75 (len 11260)
  0028P00165023100: 1986-07-18 - 2018-08-11 = #a50026 (len 11265)
  0028P00165023100: 2012-07-20 - 2018-08-11 = #02ca75 (len 11270)
  0026M00197000000: 1950-05-05 - 2016-02-23 = #a50026 (len 11275)
  0026M00197000000: 1967-03-27 - 2016-02-23 = #a50026 (len 11280)
  0026M00197000000: 2016-02-23 - 2018-08-11 = #a50026 (len 11285)
  0027H00047120100: 1999-09-22 - 2018-08-11 = #a50026 (len 11290)
  0052B00098040700: 1994-08-02 - 2000-08-31 = #a50026 (len 11295)
  0052B00098040700: 2000-08-31 - 2018-08-11 = #ffffbf (len 11300)
  0029D00065000000: 2004-09-10 - 2018-08-11 = #a50026 (len 11305)
  0026F00334000000: 1976-01-09 - 2016-12-21 = #02ca75 (len 11310)
  0026F003

  0026L00298000000: 1974-04-01 - 2012-12-03 = #a50026 (len 14095)
  0026L00298000000: 2012-12-03 - 2018-08-11 = #a50026 (len 14100)
  0029C00268000000: 2004-12-14 - 2017-04-27 = #ffffbf (len 14105)
  0029C00268000000: 2017-04-27 - 2018-08-11 = #a50026 (len 14110)
  0026L00271000000: 2008-03-18 - 2013-11-22 = #a50026 (len 14115)
  0026L00271000000: 2013-11-22 - 2018-08-11 = #a50026 (len 14120)
  0028J00020000000: 1994-10-05 - 2018-08-11 = #02ca75 (len 14125)
  0052E00033009400: 2001-09-28 - 2011-08-03 = #a50026 (len 14130)
  0052E00033009400: 2011-08-03 - 2018-08-11 = #a50026 (len 14135)
  0028J00001000000: 2005-12-30 - 2007-09-28 = #ffffbf (len 14140)
  0028J00001000000: 2007-09-28 - 2011-09-27 = #ffffbf (len 14145)
  0028J00001000000: 2011-09-27 - 2018-08-11 = #ffffbf (len 14150)
  0028E00061000000: 1994-06-16 - 2018-08-11 = #02ca75 (len 14155)
  0029C00113000000: 2004-05-14 - 2018-08-11 = #02ca75 (len 14160)
  0026K00111000000: 1950-05-05 - 2018-08-11 = #a50026 (len 14165)
  0026K001

  0028G00232000000: 1972-11-07 - 2012-01-31 = #a50026 (len 16970)
  0028G00232000000: 2012-01-31 - 2018-08-11 = #02ca75 (len 16975)
  0028J00014000000: 2001-05-22 - 2018-08-11 = #a50026 (len 16980)
  0028J00014000000: 2017-08-28 - 2018-08-11 = #a50026 (len 16985)
  0052A00288000000: 1985-12-16 - 2018-08-11 = #a50026 (len 16990)
  0027C00128000000: 1999-07-30 - 2018-08-11 = #a50026 (len 16995)
  0028N00060000000: 2006-09-13 - 2010-11-03 = #a50026 (len 17000)
  0028N00060000000: 2010-11-03 - 2018-08-11 = #02ca75 (len 17005)
  0029C00270000700: 2006-10-03 - 2014-01-06 = #a50026 (len 17010)
  0029C00270000700: 2014-01-06 - 2018-08-11 = #a50026 (len 17015)
  0029C00270000700: 2014-08-12 - 2018-08-11 = #a50026 (len 17020)
  0026F00061000000: 2006-01-17 - 2018-08-11 = #a50026 (len 17025)
  0026F00221000000: 2004-07-21 - 2018-08-11 = #a50026 (len 17030)
  0011H00013000000: 1994-08-24 - 2018-08-11 = #a50026 (len 17035)
  0028P00126047100: 2007-05-10 - 2011-08-01 = #a50026 (len 17040)
  0028P001

  0028L00352000000: 2001-10-01 - 2018-08-11 = #02ca75 (len 19440)
  0028R00144000000: 2006-08-03 - 2018-08-11 = #a50026 (len 19445)
  0052K00202000000: 1983-08-26 - 2018-08-11 = #a50026 (len 19450)
  0052E00105000000: 1990-04-12 - 2018-08-11 = #ffffbf (len 19455)
  0027H00144004300: 2009-05-29 - 2018-08-11 = #a50026 (len 19460)
  0052E00160000100: 2009-03-16 - 2018-08-11 = #02ca75 (len 19465)
  0029C00270005200: 1991-08-19 - 2018-08-11 = #a50026 (len 19470)
  0052F00284002000: 2005-05-18 - 2018-08-11 = #a50026 (len 19475)
  0028P00318000000: 2005-12-22 - 2012-12-11 = #02ca75 (len 19480)
  0028P00318000000: 2012-12-11 - 2018-08-11 = #02ca75 (len 19485)
  0052B00180000000: 1998-04-29 - 2018-08-11 = #a50026 (len 19490)
  0052F00284005500: 2008-01-24 - 2012-03-12 = #ffffbf (len 19495)
  0052F00284005500: 2012-03-12 - 2018-08-11 = #ffffbf (len 19500)
  0029C00256000A00: 2001-09-13 - 2018-08-11 = #02ca75 (len 19505)
  0029D00056000000: 2002-03-20 - 2011-07-28 = #02ca75 (len 19510)
  0029D000

  0026F00084000000: 1983-11-22 - 2013-10-25 = #a50026 (len 21950)
  0026F00084000000: 2013-10-25 - 2017-01-30 = #ffffbf (len 21955)
  0026F00084000000: 2017-01-30 - 2018-08-11 = #02ca75 (len 21960)
  0027H00117070400: 2007-03-20 - 2018-08-11 = #a50026 (len 21965)
  0028G00263000000: 1989-02-01 - 2018-08-11 = #a50026 (len 21970)
  0052B00098091800: 1997-04-07 - 2018-08-11 = #a50026 (len 21975)
  0026F00246000100: 1979-03-19 - 2018-08-11 = #a50026 (len 21980)
  0028M00016000000: 2006-02-23 - 2018-08-11 = #ffffbf (len 21985)
  0011D00072000000: 1985-01-15 - 2018-08-11 = #a50026 (len 21990)
  0052F00284004700: 2009-03-11 - 2011-06-10 = #ffffbf (len 21995)
  0052F00284004700: 2011-06-10 - 2015-12-18 = #a50026 (len 22000)
  0052F00284004700: 2015-12-18 - 2018-08-11 = #a50026 (len 22005)
  0052B00098032100: 1998-02-11 - 2014-06-25 = #a50026 (len 22010)
  0052B00098032100: 2014-06-25 - 2016-06-10 = #a50026 (len 22015)
  0052B00098032100: 2016-06-10 - 2018-08-11 = #a50026 (len 22020)
  0028A001

  0028P00194000000: 1950-05-05 - 2013-04-26 = #a50026 (len 24730)
  0028P00194000000: 2013-04-26 - 2018-08-11 = #ffffbf (len 24735)
  0028P00012000000: 2002-10-17 - 2018-08-11 = #02ca75 (len 24740)
  0029C00054000000: 1975-01-23 - 2010-12-03 = #a50026 (len 24745)
  0029C00054000000: 2010-12-03 - 2018-08-11 = #02ca75 (len 24750)
  0028P00156000000: 2008-04-21 - 2018-08-11 = #02ca75 (len 24755)
  0028S00150000000: 2007-04-04 - 2018-08-11 = #a50026 (len 24760)
  0028A00267000000: 2007-09-07 - 2015-03-11 = #a50026 (len 24765)
  0028A00267000000: 2015-03-11 - 2018-08-11 = #a50026 (len 24770)
  0051J00185000000: 2000-07-31 - 2018-08-11 = #a50026 (len 24775)
  0028R00135000000: 1994-05-02 - 2015-08-04 = #02ca75 (len 24780)
  0028R00135000000: 2015-08-04 - 2018-08-11 = #02ca75 (len 24785)
  0028G00346000000: 2004-05-21 - 2018-08-11 = #ffffbf (len 24790)
  0027D00295000000: 2000-10-23 - 2012-08-20 = #02ca75 (len 24795)
  0027D00295000000: 2012-08-20 - 2018-08-11 = #ffffbf (len 24800)
  0027H001

  0052E00011090300: 1976-05-04 - 2017-03-01 = #a50026 (len 27560)
  0052E00011090300: 2017-03-01 - 2018-08-11 = #a50026 (len 27565)
  0028N00061000000: 1994-10-07 - 2010-09-21 = #a50026 (len 27570)
  0028N00061000000: 2010-09-21 - 2014-11-04 = #ffffbf (len 27575)
  0028N00061000000: 2014-11-04 - 2018-08-11 = #ffffbf (len 27580)
  0051N00237000000: 2001-12-21 - 2018-08-11 = #ffffbf (len 27585)
  0028J00016000000: 1950-05-05 - 2010-07-29 = #a50026 (len 27590)
  0028J00016000000: 2010-07-29 - 2018-08-11 = #02ca75 (len 27595)
  0029C00048000A00: 2004-12-08 - 2015-12-15 = #ffffbf (len 27600)
  0029C00048000A00: 2015-12-15 - 2018-08-11 = #ffffbf (len 27605)
  0028P00165029100: 2006-05-09 - 2018-08-11 = #ffffbf (len 27610)
  0052E00257000700: 2006-03-16 - 2011-08-19 = #a50026 (len 27615)
  0052E00257000700: 2011-08-19 - 2017-06-08 = #a50026 (len 27620)
  0052E00257000700: 2017-06-08 - 2018-08-11 = #a50026 (len 27625)
  0027C00134000000: 1950-05-05 - 2018-08-11 = #02ca75 (len 27630)
  0027C001

  0028G00291000000: 1982-06-17 - 2018-08-11 = #ffffbf (len 30395)
  0052B00098020400: 2007-08-09 - 2010-09-29 = #a50026 (len 30400)
  0052B00098020400: 2010-09-29 - 2018-08-11 = #a50026 (len 30405)
  0028R00019000100: 2005-04-27 - 2018-08-11 = #a50026 (len 30410)
  0029B00193000000: 2007-02-02 - 2018-08-11 = #02ca75 (len 30415)
  0029C00270006100: 1997-12-18 - 2009-10-23 = #a50026 (len 30420)
  0029C00270006100: 2009-10-23 - 2018-08-11 = #a50026 (len 30425)
  0028A00356000000: 2009-05-20 - 2015-10-29 = #ffffbf (len 30430)
  0028A00356000000: 2015-10-29 - 2018-08-11 = #ffffbf (len 30435)
  0026S00030000000: 1996-06-20 - 2018-08-11 = #ffffbf (len 30440)
  0011D00341000000: 2000-07-17 - 2012-12-10 = #a50026 (len 30445)
  0011D00341000000: 2012-12-10 - 2018-08-11 = #a50026 (len 30450)
  0027M00246060200: 1982-09-21 - 2011-05-31 = #a50026 (len 30455)
  0027M00246060200: 2011-05-31 - 2018-08-11 = #ffffbf (len 30460)
  0026F00224000000: 1950-05-05 - 2018-08-11 = #a50026 (len 30465)
  0026F002

  0029D00036000000: 1986-12-05 - 2018-08-11 = #02ca75 (len 32525)
  0027M00246070200: 2004-09-14 - 2014-09-08 = #a50026 (len 32530)
  0027M00246070200: 2014-09-08 - 2018-08-11 = #a50026 (len 32535)
  0027H00074120300: 2003-06-02 - 2018-08-11 = #a50026 (len 32540)
  0028R00189000000: 1950-05-05 - 2013-03-11 = #a50026 (len 32545)
  0028R00189000000: 1956-05-01 - 2013-03-11 = #a50026 (len 32550)
  0028R00189000000: 2013-03-11 - 2018-08-11 = #a50026 (len 32555)
  0052E00047030200: 1950-05-05 - 2011-10-28 = #a50026 (len 32560)
  0052E00047030200: 1973-03-15 - 2011-10-28 = #a50026 (len 32565)
  0052E00047030200: 2011-10-28 - 2018-08-11 = #a50026 (len 32570)
  0026N00206000000: 1950-05-05 - 2010-11-15 = #02ca75 (len 32575)
  0026N00206000000: 2010-11-15 - 2018-08-11 = #02ca75 (len 32580)
  0029C00270002000: 1988-08-11 - 2014-04-18 = #a50026 (len 32585)
  0029C00270002000: 2014-04-18 - 2018-08-11 = #a50026 (len 32590)
  0029H00171000000: 2002-06-07 - 2018-08-11 = #02ca75 (len 32595)
  0027H001

  0052B00298000000: 2004-05-03 - 2018-08-11 = #a50026 (len 35080)
  0029C00194000000: 1950-05-05 - 1970-09-10 = #a50026 (len 35085)
  0029C00194000000: 1970-09-10 - 2018-08-11 = #a50026 (len 35090)
  0028E00237000000: 2007-10-31 - 2018-08-11 = #ffffbf (len 35095)
  0028P00181000000: 1997-01-02 - 2018-08-11 = #a50026 (len 35100)
  0052E00033009300: 1984-05-15 - 2017-08-14 = #a50026 (len 35105)
  0052E00033009300: 2017-08-14 - 2018-08-11 = #a50026 (len 35110)
  0028H00074000000: 1950-05-05 - 2012-05-02 = #a50026 (len 35115)
  0028H00074000000: 1972-02-10 - 2012-05-02 = #a50026 (len 35120)
  0028H00074000000: 2012-05-02 - 2018-08-11 = #ffffbf (len 35125)
  0028L00046000A00: 2008-08-19 - 2018-08-11 = #02ca75 (len 35130)
  0052N00065000000: 2007-01-16 - 2018-08-11 = #ffffbf (len 35135)
  0029C00247000A00: 1994-02-28 - 2018-08-11 = #02ca75 (len 35140)
  0029B00142000000: 2003-03-24 - 2015-08-17 = #a50026 (len 35145)
  0029B00142000000: 2015-08-17 - 2018-08-11 = #a50026 (len 35150)
  0027H000

In [482]:
apd_2013.loc['0191H00034000000'].PROPERTYOWNER

'ROBERT E KELLY JR REVOCABLE TRUST       ROBERT E KELLY JR \xce\x93\xc3\x87\xc3\xb4 TRUSTEE & BRI'

In [449]:
ownertype_to_color('0188A00063000000', property_map['0188A00063000000'][2])

'#02ca75'

In [637]:
start=arrow.now()

single_property_color = '#a50026'
corporate_color = '#ffffbf'
unknown_color = '#303030'

# Distressed sales are:
distressed_saledescs=['SHERIFF SALE', 'CITY TREASURER SALE','CITY TREASURER',
                      'PREV FORECLOSE','GOVT SALE','BANK SALE']
#affection_saledescs=['LOVE AND AFFECTION SALE','LOVE&AFFECTION']
affection_saledescs=[]

#distressed_colors=["#ff0000","#00ff00"]
#affection_colors=["#FA00D4","#15FA00"]
distressed_colors=["#FA00D4","#15FA00"]
affection_colors=[]

single_property_colors=[single_property_color, "#ff0000", "#FA00D4"]
def saletype_to_color(parid, property_event):
    try:
        ownerdesc = 'REGULAR'
        if('ownerdesc' in property_event.keys()):
           ownerdesc = property_event['ownerdesc']
        saledesc = 'VALID'
        if('saledesc' in property_event.keys()):
            saledesc = property_event['saledesc']
        volume_map=get_related_parids(parid, property_event['date'])
        volume = volume_map['parcount']

        # Check if homestead flag is set
        homesteadflag=False
        if('homesteadflag' in property_event.keys() and 
          (property_event['homesteadflag'].strip()=='HOM' or property_event['homesteadflag'].strip()=='C')):
            homesteadflag=True
        # Check if property went to a single owner or not
        single_owner = True
        if ('CORPORATION' in ownerdesc or 'Corporation' in ownerdesc or (volume>1 and not homesteadflag)):
            single_owner=False
        
        
        # Check for distressed sale
        if(saledesc in distressed_saledescs):
            # Is distressed sale, check if it went to a single owner or not
            if(single_owner):
                # Set to bright pink
                return "#FA00D4"
            else:
                # Return fluorescent green
                return "#15FA00"
#            if(single_owner):
#                # Set to bright red
#                return "#ff0000"
#            else:
#                # Return bright green
#                return "#00ff00"
#        elif(saledesc in affection_saledescs):
#            # Is affection sale, check if it went to a single owner or not
#            if(single_owner):
#                # Set to bright pink
#                return "#FA00D4"
#            else:
#                # Return fluorescent green
#                return "#15FA00"
        else:
            # Normal sale
#            if ('CORPORATION' in ownerdesc or 'Corporation' in ownerdesc):
#                # Ivory 
#                return '#ffffbf'
            if (single_owner):
                # Same color as 1 in volume view
                return single_property_color
            else:
                # Non-corporate multi-owner, reguarl sale
                # Same color as other in class view
                return '#02ca75'
    except:
        print "Exception processing saletype: %s" % (parid), sys.exc_info()[0]
        raise
        return '#303030'

def output_saletype_dots(parid_arr, suffix):
    # If parid_arr not specified, do all of the keys in property_map
    if(len(parid_arr)==0):
        parid_arr=property_map.keys()
    # Write out volume of ownership for each residential non-vacant land property
    points = []
    start=arrow.now()
    chunk_start_time=arrow.now()
    chunk_size=1000
    chunk_cnt=0
    
    for parid in parid_arr:
        centroid=None
        did_output_dot=False
        try:
            centroid = parid2centroid[parid]
        except:
            print "%s is missing from centroids, skipping" % (parid)
            continue

        try:
            # Handle periodic debug message
            if((chunk_cnt%chunk_size)==0 and chunk_cnt>0):
                print "%d-%d: processing %r, %s time elapsed" %(chunk_cnt-(chunk_size-1), chunk_cnt, parid, arrow.now()-chunk_start_time)
                addcnt=0
                chunk_start_time=arrow.now()

            # Get list of property events or this property
            property_events = property_map[parid]

            # Keep track of the date of the last datapoint. 
            last_date = '1900-01-01'
            last_color=0
            
            for i in range(0,len(property_events)):
                event_date = property_events[i]['date']
                # Get color for this property event
                color = saletype_to_color(parid, property_events[i])
                # Check if the date and color haven't changed, if so skip to the next event
                if(color == last_color and last_date == event_date):
                    # Nothing new here, move along
                    continue
                elif(last_date==event_date and i>0):
                    # Color changed without the sale date changing, possibly flag this as an issue
                    # If this new color is unknown, skip it
                    # If the last color was unknown, delete it and use this one
                    # Otherwise, distressed_colors take precedence if present, 
                    # if not, this might be a change in homesteadflag.  If so, single_property_color takes precedence.
                    # Corporate takes priority between multi and corporate
                    # If neither has single_property_color, raise an exception
                    if(color==unknown_color):
                        # Ok, just skip this one
                        continue
                    elif(last_color==unknown_color):
                        # Pop the previous point off (5 floats) and use this new one
                        for j in range(0,5):
                            del points[-1]
                    elif(last_color in distressed_colors):
                        # Ok, just skip this one
                        continue
                    elif(color in distressed_colors):
                        # Pop the previous point off (5 floats) and use this new one
                        for j in range(0,5):
                            del points[-1]
                    elif(last_color in affection_colors):
                        # Ok, just skip this one
                        continue
                    elif(color in affection_colors):
                        # Pop the previous point off (5 floats) and use this new one
                        for j in range(0,5):
                            del points[-1]
                    elif(color in single_property_colors):
                        # Pop the previous point off (5 floats) and use this new one
                        for j in range(0,5):
                            del points[-1]
                    elif(last_color in single_property_colors):
                        # Ok, just skip this one
                        continue
                    elif(last_color==corporate_color):
                        # Ok, just skip this one
                        continue
                    elif(color==corporate_color):
                        # Pop the previous point off (5 floats) and use this new one
                        for j in range(0,5):
                            del points[-1]
                    else:
                        print " PROBLEM %s: %s changed color on same date %s->%s" % (parid, event_date, last_color, color)
                        raise

                if((chunk_cnt%chunk_size)==0):
                    print "  %s: %s = %s" % (parid, event_date, color)
                
                # Get range of dates for this color
                saledate = SaledateToEpoch(event_date)
                next_date = get_next_sale_date(parid, event_date)
                if(next_date == None):
                    # No new owner after this, set end date to the end of time
                    enddate = float(1e38)
                    next_date=this_date
                else:
                    enddate = SaledateToEpoch(next_date)
                if(color != None):
                    points += PointToPixelXY(centroid)     
                    points.append(parse_color(color))
                    # Put epoch time for SALEDATE as start valid time, and next_date as end valid time
                    points.append(float(saledate))
                    points.append(float(enddate))
                    #print "  %s: %s - %s = %s (len %d)" % (parid, event_date, next_date, color, len(points))
                else:
                    print "Color of " + str(volume_map['parcount']) + " is None"

                # Set last_date and last_color for next loop
                last_date = event_date
                last_color= color
                did_output_dot=True
        except:
            print "Unexpected error processing %s:" % (parid), sys.exc_info()[0]
            raise

        #Increment debug message counter
        if(did_output_dot):
            chunk_cnt = chunk_cnt+1

    array.array('f', points).tofile(open(('assessments/saletype_color_m_epoch%s.bin'%suffix), 'wb'))

    end=arrow.now()
    print "Processing took %s for %d dots" % (str(end-start), chunk_cnt)
    

In [639]:
output_saletype_dots([],out_suffix)

  0177N00269000000: 2007-03-02 = #a50026
  0177N00269000000: 2011-02-28 = #a50026
1-1000: processing u'0981J00102000000', 0:00:00.430800 time elapsed
  0981J00102000000: 2008-04-21 = #02ca75
  0981J00102000000: 2010-07-08 = #a50026
1001-2000: processing u'0252H00202000000', 0:00:00.331908 time elapsed
  0252H00202000000: 2004-03-02 = #a50026
2001-3000: processing u'0362G00145000000', 0:00:00.329856 time elapsed
  0362G00145000000: 2000-05-03 = #02ca75
  0362G00145000000: 2015-04-10 = #02ca75
0178J00009000000 is missing from centroids, skipping
3001-4000: processing u'0878N00182000000', 0:00:00.314119 time elapsed
  0878N00182000000: 1999-12-29 = #02ca75
0024N00055000000 is missing from centroids, skipping
4001-5000: processing u'0435G00198000000', 0:00:00.313595 time elapsed
  0435G00198000000: 2002-05-14 = #a50026
  0435G00198000000: 2010-03-19 = #a50026
  0435G00198000000: 2013-05-23 = #a50026
0175C00169000000 is missing from centroids, skipping
0564S00037000000 is missing from centr

46001-47000: processing u'0517H00030240400', 0:00:00.282306 time elapsed
  0517H00030240400: 2009-08-06 = #a50026
0084M00362000000 is missing from centroids, skipping
47001-48000: processing u'0129G00202000000', 0:00:00.292338 time elapsed
  0129G00202000000: 1985-03-28 = #a50026
  0129G00202000000: 2014-05-21 = #a50026
0010C00013000000 is missing from centroids, skipping
48001-49000: processing u'0255E00082000000', 0:00:00.282780 time elapsed
  0255E00082000000: 1956-06-19 = #a50026
0178J00017000000 is missing from centroids, skipping
49001-50000: processing u'0551C00277000000', 0:00:00.287053 time elapsed
  0551C00277000000: 1950-05-05 = #a50026
  0551C00277000000: 1959-07-06 = #a50026
  0551C00277000000: 2011-12-13 = #02ca75
0349F00197000000 is missing from centroids, skipping
50001-51000: processing u'0692R00044000000', 0:00:00.285093 time elapsed
  0692R00044000000: 1950-05-05 = #02ca75
  0692R00044000000: 2003-08-22 = #02ca75
51001-52000: processing u'0634D00110000000', 0:00:00.2

89001-90000: processing u'0025N00080021900', 0:00:00.307087 time elapsed
  0025N00080021900: 2011-01-14 = #02ca75
  0025N00080021900: 2017-05-12 = #a50026
0175C00167000000 is missing from centroids, skipping
90001-91000: processing u'1357C00166000000', 0:00:00.287104 time elapsed
  1357C00166000000: 2001-02-27 = #a50026
1999L00012000000 is missing from centroids, skipping
91001-92000: processing u'0426D00233000000', 0:00:00.294100 time elapsed
  0426D00233000000: 2012-03-23 = #a50026
  0426D00233000000: 2015-06-18 = #a50026
8000T03141000000 is missing from centroids, skipping
1203F00095000300 is missing from centroids, skipping
0049C00209000000 is missing from centroids, skipping
92001-93000: processing '0268K00080000000', 0:00:00.290802 time elapsed
  0268K00080000000: 2014-12-31 = #a50026
  0268K00080000000: 2015-12-31 = #a50026
0024N00048000000 is missing from centroids, skipping
93001-94000: processing u'0087H00011000000', 0:00:00.292285 time elapsed
  0087H00011000000: 1998-07-15 

133001-134000: processing u'0014E00220000000', 0:00:00.280660 time elapsed
  0014E00220000000: 2006-08-29 = #02ca75
  0014E00220000000: 2013-03-05 = #02ca75
134001-135000: processing u'0341H00269000000', 0:00:00.286062 time elapsed
  0341H00269000000: 2007-08-29 = #02ca75
135001-136000: processing u'0042N00177000000', 0:00:00.289147 time elapsed
  0042N00177000000: 2003-03-05 = #02ca75
136001-137000: processing u'0003N00066000000', 0:00:00.286086 time elapsed
  0003N00066000000: 1991-01-02 = #a50026
  0003N00066000000: 2010-11-18 = #02ca75
137001-138000: processing u'0507S00164000000', 0:00:00.281678 time elapsed
  0507S00164000000: 2005-08-18 = #a50026
  0507S00164000000: 2014-06-02 = #a50026
  0507S00164000000: 2017-04-26 = #a50026
0023M00269000000 is missing from centroids, skipping
0081A00128000000 is missing from centroids, skipping
138001-139000: processing u'1242C00314000000', 0:00:00.280577 time elapsed
  1242C00314000000: 2003-07-08 = #a50026
  1242C00314000000: 2013-12-02 = #

181001-182000: processing u'0077B00151000000', 0:00:00.286298 time elapsed
  0077B00151000000: 2008-08-22 = #a50026
182001-183000: processing u'1354E00255000000', 0:00:00.278794 time elapsed
  1354E00255000000: 1997-08-11 = #02ca75
8000T03070000000 is missing from centroids, skipping
1735P00025000000 is missing from centroids, skipping
183001-184000: processing u'1355F00138512900', 0:00:00.291710 time elapsed
  1355F00138512900: 2008-12-05 = #a50026
0023M00247000000 is missing from centroids, skipping
0131E00042000000 is missing from centroids, skipping
0050K00060000000 is missing from centroids, skipping
184001-185000: processing u'0040B00232000000', 0:00:00.287649 time elapsed
  0040B00232000000: 1976-06-09 = #a50026
  0040B00232000000: 2016-09-30 = #02ca75
185001-186000: processing u'0299F00151000000', 0:00:00.302115 time elapsed
  0299F00151000000: 2006-07-13 = #a50026
0001N00281004B00 is missing from centroids, skipping
186001-187000: processing u'0305N00043000000', 0:00:00.277306

1834D00046000000 is missing from centroids, skipping
225001-226000: processing u'0344L00060000000', 0:00:00.284510 time elapsed
  0344L00060000000: 1996-05-06 = #a50026
  0344L00060000000: 2013-10-22 = #02ca75
226001-227000: processing u'0019D00211000000', 0:00:00.279635 time elapsed
  0019D00211000000: 2007-07-05 = #02ca75
1999P00022000000 is missing from centroids, skipping
0512L00014000100 is missing from centroids, skipping
227001-228000: processing u'0516B00165000000', 0:00:00.247704 time elapsed
  0516B00165000000: 1950-05-05 = #a50026
8000T00534000000 is missing from centroids, skipping
228001-229000: processing u'0206C00007000000', 0:00:00.218144 time elapsed
  0206C00007000000: 1993-12-21 = #a50026
  0206C00007000000: 2010-08-10 = #a50026
  0206C00007000000: 2017-06-26 = #a50026
229001-230000: processing u'0364M00249000000', 0:00:00.228667 time elapsed
  0364M00249000000: 1971-12-08 = #a50026
9946X50070000000 is missing from centroids, skipping
230001-231000: processing u'0960

0049N00131000000 is missing from centroids, skipping
268001-269000: processing u'0590H00007000000', 0:00:00.290801 time elapsed
  0590H00007000000: 2004-04-06 = #a50026
  0590H00007000000: 2010-04-08 = #a50026
  0590H00007000000: 2012-09-12 = #a50026
  0590H00007000000: 2014-10-03 = #a50026
0094C00246000000 is missing from centroids, skipping
269001-270000: processing u'0393H00086000000', 0:00:00.283025 time elapsed
  0393H00086000000: 1984-03-06 = #a50026
0361B00060000000 is missing from centroids, skipping
270001-271000: processing u'1498E00005000000', 0:00:00.285776 time elapsed
  1498E00005000000: 2000-06-23 = #a50026
  1498E00005000000: 2011-09-14 = #a50026
  1498E00005000000: 2015-08-10 = #a50026
0738H00044000000 is missing from centroids, skipping
271001-272000: processing u'0316S00012000000', 0:00:00.273254 time elapsed
  0316S00012000000: 2006-05-30 = #a50026
8000T02397000000 is missing from centroids, skipping
8000T00558000000 is missing from centroids, skipping
272001-273000

0119S00102000000 is missing from centroids, skipping
0086N00185000200 is missing from centroids, skipping
308001-309000: processing u'0301J00080000000', 0:00:00.279322 time elapsed
  0301J00080000000: 1993-02-26 = #02ca75
1658M00433000000 is missing from centroids, skipping
0851E00028000000 is missing from centroids, skipping
0692A00109000000 is missing from centroids, skipping
0034K00043000000 is missing from centroids, skipping
309001-310000: processing u'2387M00109000000', 0:00:00.283428 time elapsed
  2387M00109000000: 1950-05-05 = #a50026
  2387M00109000000: 1967-06-26 = #a50026
  2387M00109000000: 2012-12-20 = #a50026
  2387M00109000000: 2014-08-07 = #a50026
1658M00026000000 is missing from centroids, skipping
310001-311000: processing u'0101P00043000000', 0:00:00.264802 time elapsed
  0101P00043000000: 1996-02-08 = #02ca75
8000T03078000000 is missing from centroids, skipping
9946X50494000000 is missing from centroids, skipping
311001-312000: processing u'0447G00028000000', 0:00:

350001-351000: processing u'0194J00202000000', 0:00:00.274987 time elapsed
  0194J00202000000: 1950-05-05 = #a50026
  0194J00202000000: 1956-06-26 = #a50026
  0194J00202000000: 2015-10-19 = #a50026
1999P00028000000 is missing from centroids, skipping
351001-352000: processing u'0098R00100000000', 0:00:00.285733 time elapsed
  0098R00100000000: 2002-07-02 = #a50026
2181M00444000000 is missing from centroids, skipping
352001-353000: processing u'0101F00305000000', 0:00:00.269826 time elapsed
  0101F00305000000: 1984-09-05 = #a50026
0564M00201000000 is missing from centroids, skipping
353001-354000: processing u'0438E00065000000', 0:00:00.282291 time elapsed
  0438E00065000000: 2000-04-03 = #a50026
354001-355000: processing u'0087N00022000000', 0:00:00.277699 time elapsed
  0087N00022000000: 1986-04-29 = #a50026
  0087N00022000000: 2015-11-04 = #a50026
0050F00015000000 is missing from centroids, skipping
355001-356000: processing u'0698C00184002000', 0:00:00.282094 time elapsed
  0698C001

394001-395000: processing u'0088C00235000000', 0:00:00.281529 time elapsed
  0088C00235000000: 1950-05-05 = #02ca75
  0088C00235000000: 1952-07-23 = #02ca75
  0088C00235000000: 2014-05-15 = #a50026
0524M00300000000 is missing from centroids, skipping
395001-396000: processing u'0003S00087000000', 0:00:00.278894 time elapsed
  0003S00087000000: 1950-05-05 = #a50026
  0003S00087000000: 1961-08-17 = #a50026
  0003S00087000000: 2011-04-04 = #a50026
396001-397000: processing u'0517N00087000000', 0:00:00.301600 time elapsed
  0517N00087000000: 1950-05-05 = #a50026
  0517N00087000000: 1969-10-21 = #a50026
0084M00360000000 is missing from centroids, skipping
0430A00007000000 is missing from centroids, skipping
0737A00256000000 is missing from centroids, skipping
397001-398000: processing u'0449E00254000000', 0:00:00.282263 time elapsed
  0449E00254000000: 2006-03-02 = #a50026
1203C00036000300 is missing from centroids, skipping
398001-399000: processing u'0042N00323000000', 0:00:00.289951 time

9946X83322000000 is missing from centroids, skipping
439001-440000: processing u'0566E00044000000', 0:00:00.279421 time elapsed
  0566E00044000000: 2001-03-05 = #a50026
440001-441000: processing u'0227N00075041500', 0:00:00.271357 time elapsed
  0227N00075041500: 2002-06-20 = #a50026
  0227N00075041500: 2014-05-23 = #a50026
0638S00018000000 is missing from centroids, skipping
2011A00326161400 is missing from centroids, skipping
0363E00366000000 is missing from centroids, skipping
441001-442000: processing u'0014J00278000000', 0:00:00.279421 time elapsed
  0014J00278000000: 1985-04-04 = #a50026
1735P00011000000 is missing from centroids, skipping
442001-443000: processing u'0390P00312000000', 0:00:00.276759 time elapsed
  0390P00312000000: 2000-05-16 = #02ca75
1202D00220000000 is missing from centroids, skipping
443001-444000: processing u'0159D00258000000', 0:00:00.218172 time elapsed
  0159D00258000000: 1950-05-05 = #a50026
  0159D00258000000: 1970-11-25 = #a50026
444001-445000: proce

482001-483000: processing u'0301J00296000000', 0:00:00.276532 time elapsed
  0301J00296000000: 1950-05-05 = #a50026
  0301J00296000000: 1954-09-01 = #a50026
  0301J00296000000: 2012-03-13 = #02ca75
2011A00326161200 is missing from centroids, skipping
0668L00040000000 is missing from centroids, skipping
483001-484000: processing u'0300D00205040200', 0:00:00.279787 time elapsed
  0300D00205040200: 1995-06-30 = #02ca75
  0300D00205040200: 2012-03-29 = #a50026
484001-485000: processing u'0602L00010000000', 0:00:00.281338 time elapsed
  0602L00010000000: 2005-10-04 = #a50026
485001-486000: processing u'0320D00178000000', 0:00:00.280653 time elapsed
  0320D00178000000: 2007-04-05 = #a50026
0461L00003000000 is missing from centroids, skipping
8000T03755000000 is missing from centroids, skipping
0030E00307000000 is missing from centroids, skipping
486001-487000: processing u'0876P00020000000', 0:00:00.284632 time elapsed
  0876P00020000000: 1988-04-29 = #02ca75
  0876P00020000000: 2015-04-03 =

In [625]:
parid='0028L00264000000'
print property_map[parid]
for j in range(0,len(property_map[parid])):
    print saletype_to_color(parid, property_map[parid][j])

[{'event_type': 'PURCHASE', 'changeaddr': '1461 PROSPECT RD, PITTSBURGH, PA 15227', 'ownername': u'KHALIL ELIAS & YASSEMINE (W)', 'date': u'1986-06-11', 'saledesc': u'OTHER INVALID SALES INDICATED', 'ownerdesc': u'Regular'}, {'event_type': 'PURCHASE', 'changeaddr': '2777 SOULIER ST, PITTSBURGH, PA 15227', 'ownername': u'KHALIL JAMIL', 'date': u'2010-07-30', 'saledesc': u'LOVE AND AFFECTION SALE', 'ownerdesc': u'Regular'}, {'event_type': 'PURCHASE', 'changeaddr': '345 LINHART LN, PITTSBURGH, PA 15236', 'ownername': 'KHALIL JAMIL', 'date': u'2010-07-30', 'saledesc': 'LOVE&AFFECTION', 'ownerdesc': 'REGULAR'}, {'event_type': 'PURCHASE', 'changeaddr': '345 LINHART LN, PITTSBURGH, PA 15236', 'ownername': u'KHALIL JAMIL', 'date': u'2010-07-30', 'saledesc': u'LOVE AND AFFECTION SALE', 'ownerdesc': u'Regular'}, {'event_type': 'PURCHASE', 'homesteadflag': u'   ', 'changeaddr': '2777 SOULIER ST, PITTSBURGH, PA 15227', 'ownername': u'KHALIL JAMIL', 'date': u'2010-07-30', 'saledesc': u'BRAND NEW SA

In [628]:
'#a50026'  in single_property_colors

True

In [632]:
output_saletype_dots(['0028L00264000000'],"_d1")

  0028L00264000000: 1986-06-11 = #a50026
  0028L00264000000: 1986-06-11 - 2010-07-30 = #a50026 (len 5)
  0028L00264000000: 2010-07-30 = #15FA00
  0028L00264000000: 2010-07-30 - 2018-08-11 = #15FA00 (len 10)
Processing took 0:00:00.005889 for 1 dots


In [638]:
output_saletype_dots(valid_parids_15213,"_15213")

  0011F00188000000: 2008-11-17 = #15FA00
1-1000: processing '0026L00164000000', 0:00:00.363045 time elapsed
  0026L00164000000: 1978-10-02 = #02ca75
  0026L00164000000: 2013-10-04 = #02ca75
  0026L00164000000: 2013-12-26 = #02ca75
1001-2000: processing '0029B00145000000', 0:00:00.322082 time elapsed
  0029B00145000000: 1989-12-21 = #02ca75
  0029B00145000000: 2013-10-25 = #15FA00
2001-3000: processing '0028H00016000000', 0:00:00.305576 time elapsed
  0028H00016000000: 1950-05-05 = #a50026
  0028H00016000000: 1967-08-22 = #a50026
3001-4000: processing '0028G00064000000', 0:00:00.314717 time elapsed
  0028G00064000000: 1988-12-19 = #02ca75
Processing took 0:00:01.555014 for 4797 dots


In [None]:
# Below is Anne's attempt to put floating point representations of colors into the .bin file for a new 
# shader Randy's writing

In [None]:
def incrementKey(adict,key):
    if(key == '' or math.isnan(key)):
       return
    if not adict.has_key(key):
        adict[key] = 1
    else:
        adict[key] += 1

In [None]:
year_built_colors = ['#ffffe0','#ffdaa3','#ffb27c','#fb8768','#eb5f5b','#d3394a','#b3152f','#8b0000']
def year_built_to_color(year):
    if (year <= 1900.0):
        return color_from_floats(64,64,64)
    elif (year < 1916.0):
        return parse_color(year_built_colors[7])       
    elif (year < 1932.0):
        return parse_color(year_built_colors[6])
    elif (year < 1948.0):
        return parse_color(year_built_colors[5])
    elif (year < 1964.0):
        return parse_color(year_built_colors[4])
    elif (year < 1980.0):
        return parse_color(year_built_colors[3])
    elif (year < 1996.0):
        return parse_color(year_built_colors[2])
    elif  (year < 2012.0):
        return parse_color(year_built_colors[1])
    elif  (year < 2016.0):
        return parse_color(year_built_colors[0])
    else:
        return color_from_floats(255,255,255)
 
   
points = []
for i in range(0, len(apd.index)):
    par_id = apd['PARID'][i]
    year_built = apd['YEARBLT'][i]
    if year_built == '' or year_built == '0' or year_built == '0000' or math.isnan(year_built):
        year_built = '0001' # Null Value, -62135596800.0 Epoch
    elif isinstance(year_built, numbers.Number):
        if(year_built < 1):
            year_built = '0001' # Null Value, -62135596800.0 Epoch
        else:
            year_built = "%04d" % (year_built)
    if centroids.has_key(par_id):
        centroid = centroids[par_id]
        points += LonLatToPixelXY(centroid)        
        points.append(year_built_to_color(float(year_built)))
        # Set start valid time as year_built, and max positive float as end valid time
        date = datetime.datetime.strptime(year_built, '%Y')        
        points.append(GetEpoch(date))
        points.append(float(1e38))
array.array('f', points).tofile(open(('assessments/parcels_bltyr_color_epoch%s.bin'%out_suffix), 'wb'))

In [None]:
# Hightlight based on likelyhood of lead
# <1950 = Probably lead
# 1950-1978 = Maybe lead (not illegal, but being phased out)
# >1978 = Probably not lead (illegal)
year_built_colors = ['#ffffe0','#ffdaa3','#ffb27c','#fb8768','#eb5f5b','#d3394a','#b3152f','#8b0000']
def year_built_to_lead_color(year):
    if (year <= 1900.0):
        return color_from_floats(64,64,64)
    elif (year < 1950.0):
        return parse_color(year_built_colors[7])       
    elif (year < 1979.0):
        return parse_color(year_built_colors[3])
    else:
        return parse_color(year_built_colors[0])
    
points = []
for i in range(0, len(apd.index)):
    par_id = apd['PARID'][i]
    year_built = apd['YEARBLT'][i]
    if year_built == '' or year_built == '0' or year_built == '0000' or math.isnan(year_built):
        year_built = '0001' # Null Value, -62135596800.0 Epoch
    elif isinstance(year_built, numbers.Number):
        if(year_built < 1):
            year_built = '0001' # Null Value, -62135596800.0 Epoch
        else:
            year_built = "%04d" % (year_built)
    if centroids.has_key(par_id):
        centroid = centroids[par_id]
        points += LonLatToPixelXY(centroid)        
        points.append(year_built_to_lead_color(float(year_built)))
        # Set start valid time as year_built, and max positive float as end valid time
        date = datetime.datetime.strptime(year_built, '%Y')        
        points.append(GetEpoch(date))
        points.append(float(1e38))
array.array('f', points).tofile(open(('assessments/parcels_bltyr_lead_color_epoch%s.bin'%out_suffix), 'wb'))

In [None]:
                                     
fm_colors = ['#a50026','#cd2827','#e75436','#f7804b','#fdad61','#fed788','#ffffbf','#b9e0ed','#8dc0db','#699fca','#4d7db9','#3e5aa7','#313695']
def fm_total_to_color(fm_total):
    if (fm_total < 25000.):
        return parse_color(fm_colors[0]) 
    elif (fm_total < 50000.):
        return parse_color(fm_colors[1]) 
    elif (fm_total < 75000.):
        return parse_color(fm_colors[2]) 
    elif (fm_total < 100000.):
        return parse_color(fm_colors[3]) 
    elif (fm_total < 125000.):
        return parse_color(fm_colors[4]) 
    elif (fm_total < 150000.):
        return parse_color(fm_colors[5]) 
    elif (fm_total < 200000.):
        return parse_color(fm_colors[6]) 
    elif (fm_total < 250000.):
        return parse_color(fm_colors[7]) 
    elif (fm_total < 300000.):
        return parse_color(fm_colors[8]) 
    elif (fm_total < 400000.):
        return parse_color(fm_colors[9]) 
    elif (fm_total < 500000.):
        return parse_color(fm_colors[10]) 
    elif (fm_total < 750000.):
        return parse_color(fm_colors[11]) 
    else:
        return parse_color(fm_colors[12]) 
 
points = []
for i in range(0, len(apd.index)):
    par_id = apd['PARID'][i]
    fm_total = float(apd['FAIRMARKETTOTAL'][i])   
    
    year_built = apd['YEARBLT'][i]
    if year_built == '' or year_built == '0' or year_built == '0000' or math.isnan(year_built):
        year_built = '0001' # Null Value, -62135596800.0 Epoch
    elif isinstance(year_built, numbers.Number):
        if(year_built < 1):
            year_built = '0001' # Null Value, -62135596800.0 Epoch
        else:
            year_built = "%04d" % (year_built)

    if centroids.has_key(par_id):
        centroid = centroids[par_id]
        points += LonLatToPixelXY(centroid)        
        points.append(fm_total_to_color(fm_total))
        # Set start valid time as year_built, and max positive float as end valid time
        date = datetime.datetime.strptime(year_built, '%Y')        
        points.append(GetEpoch(date))
        points.append(float(1e38))
array.array('f', points).tofile(open(('assessments/parcels_fm_color_epoch%s.bin'%out_suffix), 'wb'))

In [None]:
parcel_colors = ['#fb3059','#fe6b2d','#d1947a','#c6a900','#02ca75','#00a2de','#9529b1']
class_descriptions = sorted(class_descs.keys())

def class_desc_to_color(class_description):
    index = class_descriptions.index(class_description)
    return(parse_color(parcel_colors[index]))

points = []
for i in range(0, len(apd.index)):
    par_id = apd['PARID'][i]
    class_description = apd['CLASSDESC'][i] 
    year_built = apd['YEARBLT'][i]

    if year_built == '' or year_built == '0' or year_built == '0000' or math.isnan(year_built):
        year_built = '0001' # Null Value, -62135596800.0 Epoch
    elif isinstance(year_built, numbers.Number):
        if(year_built < 1):
            year_built = '0001' # Null Value, -62135596800.0 Epoch
        else:
            year_built = "%04d" % (year_built)

    if centroids.has_key(par_id):
        centroid = centroids[par_id]
        points += LonLatToPixelXY(centroid)        
        points.append(class_desc_to_color(class_description))
        # Set start valid time as year_built, and max positive float as end valid time
        date = datetime.datetime.strptime(year_built, '%Y')        
        points.append(GetEpoch(date))
        points.append(float(1e38))
array.array('f', points).tofile(open(('assessments/parcels_class_color_epoch%s.bin'%out_suffix), 'wb'))

In [None]:
# Highlight vacant lots
points = []
for i in range(0, len(apd.index)):
    par_id = apd['PARID'][i]
    class_description = apd['CLASSDESC'][i] 
    usedesc = apd['USEDESC'][i]

    # default to grey
    color = "#555555"
    if(usedesc =='VACANT LAND'):
        # vacant lot, make it green
        color = "#02ca75"

    if centroids.has_key(par_id):
        centroid = centroids[par_id]
        points += LonLatToPixelXY(centroid)        
        points.append(parse_color(color))
        # Set start valid time as forever, and max positive float as end valid time
        points.append(0)
        points.append(float(1e38))
array.array('f', points).tofile(open(('assessments/vacant_lots%s.bin'%out_suffix), 'wb'))

In [None]:
# Here are Anne's efforts using the version with owner names to generate volume of properties owned by current owner

In [None]:
# SALEDATE field is in '%m-%d-%Y' format (ex 10-26-2012) when present
def SaledateToEpoch(datestr):
    return calendar.timegm(time.strptime(datestr, '%m-%d-%Y'))

In [None]:
# Parse apd table into dictionaries from owner names and change addresses to parid, and from parid to other info
# about the property
owner_names_to_parids = {}
owner_changeaddrs_to_parids = {}
parid_info = {}
volume_counts = {}

# For now, only look at residential properties with known centroids in 15213
for i in range(0, len(apd.index)):
    par_id = apd['PARID'][i]
    zipcode = apd['PROPERTYZIP'][i]
    class_description = apd['CLASSDESC'][i] 
    usedesc = apd['USEDESC'][i] 
    if (usedesc == 'VACANT LAND' or not centroids.has_key(par_id)):
        # skip this one
        continue
    
    # We want to include residential, apartments (usedesc includes APART)
    if(class_description != 'RESIDENTIAL' and (isinstance(usedesc, numbers.Number) or 'APART' not in usedesc)):
        # skip this one
        continue
        
    # This one meets our criteria
    owner_name = apd['PROPERTYOWNER'][i]
    owner_changeaddr = string.strip(str(apd['CHANGENOTICEADDRESS1'][i]))
    c2 = string.strip(str(apd['CHANGENOTICEADDRESS2'][i]))
    c3 = string.strip(str(apd['CHANGENOTICEADDRESS3'][i]))
    c4 = apd['CHANGENOTICEADDRESS4'][i]
    if(c2 != ''):
        owner_changeaddr+=", " + c2
    if(c3 != ''):
        owner_changeaddr+=", " + c3
    if(c4 != ''):
        if(isinstance(c4, numbers.Number)):
            if(not math.isnan(float(c4))):
                owner_changeaddr+=", " + str(int(c4))
        else:
            owner_changeaddr+=", " + c4
        
    if(owner_changeaddr == ''):
        # skip this one
        continue
        
    if(owner_names_to_parids.has_key(owner_name)):
        owner_names_to_parids[owner_name].append(par_id)
    else:
        owner_names_to_parids[owner_name]=[par_id]
        
    if(owner_changeaddrs_to_parids.has_key(owner_changeaddr)):
        owner_changeaddrs_to_parids[owner_changeaddr].append(par_id)
    else:
        owner_changeaddrs_to_parids[owner_changeaddr]=[par_id]

    parid_info[par_id]={'owner_name':owner_name,
                        'owner_changeaddr':owner_changeaddr,
                        'centroid':centroids[par_id],
                        'HOMESTEADFLAG':apd['HOMESTEADFLAG'][i],
                        'USEDESC':apd['USEDESC'][i],
                        'OWNERDESC':apd['OWNERDESC'][i],
                        'SALEDATE':apd['SALEDATE'][i],
                        'OWNERDESC':apd['OWNERDESC'][i],
                        'CLASSDESC':apd['CLASSDESC'][i]
                       }

In [None]:
# Add counts to parid_info based on max of owner_names_to_parids or owner_changeaddrs_to_parids
# Special case properties with HOMESTEADFLAG set to HOM to be volume=1
for par_id in parid_info.keys():
    volume=1
    if(parid_info[par_id]['HOMESTEADFLAG']!='HOM'):
        ownername_num = len(owner_names_to_parids[parid_info[par_id]['owner_name']])
        ownerchangeaddr_num = len(owner_changeaddrs_to_parids[parid_info[par_id]['owner_changeaddr']])
        volume = max(ownername_num,ownerchangeaddr_num)
    parid_info[par_id]['volume'] = volume
    incrementKey(volume_counts,volume)

In [None]:
volume_counts

In [None]:
for ownername in owner_names_to_parids.keys():
    ownername_num = len(owner_names_to_parids[ownername])
    if(ownername_num>10):
        print "%s: %d" % (ownername,ownername_num)

In [None]:
for ownerchangeaddr in owner_changeaddrs_to_parids.keys():
    ownerchangeaddr_num = len(owner_changeaddrs_to_parids[ownerchangeaddr])
    if(ownerchangeaddr_num>10):
        print "%s: %d" % (ownerchangeaddr,ownerchangeaddr_num)

In [None]:
for par_id in parid_info.keys():
    if(parid_info[par_id]['volume']>10):
        print parid_info[par_id]

In [None]:
# Write out a binary file with the volume colors
vol_colors = ['#a50026','#cd2827','#e75436','#f7804b','#fdad61','#fed788','#ffffbf','#b9e0ed','#8dc0db','#699fca','#4d7db9','#3e5aa7','#313695']
def volume_to_color(volume):
    if (volume < 2):
        return parse_color(vol_colors[0]) 
    #elif (volume < 3):
    #    return parse_color(vol_colors[1]) 
    #elif (volume < 4):
    #    return parse_color(vol_colors[2]) 
    elif (volume < 5):
        return parse_color(vol_colors[3]) 
    elif (volume < 10):
        return parse_color(vol_colors[4]) 
    elif (volume < 20):
        return parse_color(vol_colors[5]) 
    elif (volume < 40):
        return parse_color(vol_colors[6]) 
    elif (volume < 60):
        return parse_color(vol_colors[7]) 
    elif (volume < 80):
        return parse_color(vol_colors[8]) 
    elif (volume < 150):
        return parse_color(vol_colors[9]) 
    elif (volume < 300):
        return parse_color(vol_colors[10]) 
    elif (volume < 500):
        return parse_color(vol_colors[11]) 
    else:
        return parse_color(vol_colors[12]) 

# Write out volume of ownership for each residential non-vacant land property
points = []
for par_id in parid_info.keys():
    centroid = parid_info[par_id]['centroid']
    color = volume_to_color(parid_info[par_id]['volume'])
    saledate = SaledateToEpoch('01-01-1900')
    saledate_raw = parid_info[par_id]['SALEDATE']
    if(not(saledate_raw == '' or (isinstance(saledate_raw, numbers.Number) and math.isnan(saledate_raw)))):
        # If not valid, leave as 1900, otherwise parse it into an epoch time
        saledate = SaledateToEpoch(saledate_raw)
        
    if(color != None):
        points += LonLatToPixelXY(centroid)        
        points.append(color)
        # Put epoch time for SALEDATE as start valid time, and max positive float as end valid time
        points.append(float(saledate))
        points.append(float(1e38))
    else:
        print "Color of " + str(parid_info[par_id]['volume']) + " is None"
array.array('f', points).tofile(open(('assessments/res_volume_color_epoch%s.bin'%out_suffix), 'wb'))

In [None]:
#original version
legend = '<svg width="400" height="30"><rect fill="#a50026" x="0" height="10" width="30"></rect><rect fill="#cd2827" x="30" height="10" width="30"></rect><rect fill="#e75436" x="60" height="10" width="30"></rect><rect fill="#f7804b" x="90" height="10" width="30"></rect><rect fill="#fdad61" x="120" height="10" width="30"></rect><rect fill="#fed788" x="150" height="10" width="30"></rect><rect fill="#ffffbf" x="180" height="10" width="30"></rect><rect fill="#b9e0ed" x="210" height="10" width="30"></rect><rect fill="#8dc0db" x="240" height="10" width="30"></rect><rect fill="#699fca" x="270" height="10" width="30"></rect><rect fill="#4d7db9" x="300" height="10" width="30"></rect><rect fill="#3e5aa7" x="330" height="10" width="30"></rect><text font-size="10.5px" y="29" x="0" fill="#ffffff">1</text><text font-size="10.5px" y="29" x="20"  fill="#ffffff">&nbsp;2</text><text font-size="10.5px" y="29" x="50"  fill="#ffffff">&nbsp;3</text><text font-size="10.5px" y="29" x="80"  fill="#ffffff"> 4</text><text font-size="10.5px" y="29" x="110"  fill="#ffffff"> 5</text><text font-size="10.5px" y="29" x="140"  fill="#ffffff">10</text><text font-size="10.5px" y="29" x="170"  fill="#ffffff">20</text><text font-size="10.5px" y="29" x="200"  fill="#ffffff">40</text><text font-size="10.5px" y="29" x="230"  fill="#ffffff">60</text><text font-size="10.5px" y="29" x="260"  fill="#ffffff">80</text><text font-size="10.5px" y="29" x="290"  fill="#ffffff">150</text><text font-size="10.5px" y="29" x="320"  fill="#ffffff">300</text><text font-size="10.5px" y="29" x="350"  fill="#ffffff">500</text>'
HTML(legend)

In [None]:
#modified version
legend = '<svg width="400" height="30"><rect fill="#a50026" x="0" height="10" width="30"></rect><rect fill="#f7804b" x="30"  height="10" width="30"></rect><rect fill="#fdad61" x="60"  height="10" width="30"></rect><rect fill="#fed788" x="90"  height="10" width="30"></rect><rect fill="#ffffbf" x="120" height="10" width="30"></rect><rect fill="#b9e0ed" x="150" height="10" width="30"></rect><rect fill="#8dc0db" x="180" height="10" width="30"></rect><rect fill="#699fca" x="210" height="10" width="30"></rect><rect fill="#4d7db9" x="240" height="10" width="30"></rect><rect fill="#3e5aa7" x="270" height="10" width="30"></rect><text font-size="10.5px" y="29" x="0" fill="#ffffff">1</text><text font-size="10.5px" y="29" x="20"  fill="#ffffff">&nbsp;2</text><text Font-size="10.5px" y="29" x="50"   fill="#ffffff"> 5</text><text font-size="10.5px" y="29" x="80"   fill="#ffffff">10</text><text font-size="10.5px" y="29" x="110"  fill="#ffffff">20</text><text font-size="10.5px" y="29" x="140"  fill="#ffffff">40</text><text font-size="10.5px" y="29" x="170"  fill="#ffffff">60</text><text font-size="10.5px" y="29" x="200"  fill="#ffffff">80</text><text font-size="10.5px" y="29" x="230"  fill="#ffffff">150</text><text font-size="10.5px" y="29" x="260"  fill="#ffffff">300</text><text font-size="10.5px" y="29" x="290"  fill="#ffffff">500</text>'
HTML(legend)

In [None]:
# Write out a binary file with the colors for residential owned by corporate, government, or regular owners
def ownertype_to_color(par_id):
    ownerdesc = parid_info[par_id]['OWNERDESC']
    classdesc = parid_info[par_id]['CLASSDESC']
    volume = parid_info[par_id]['volume']
    if (classdesc == "GOVERNMENT"):
        print "Government: %s, %s, %d" % (ownerdesc, classdesc,volume)
        # Same color as government in property class view
        return parse_color("#d1947a") 
    elif ('CORPORATION' in ownerdesc):
        # Same color as 500+ properties in volume view
        #return parse_color('#313695') 
        return parse_color('#ffffbf')
    elif ('REGULAR' in ownerdesc):
        # Regular owner, what volume?
        if(volume == 1):
            # Same color as 1 in volume view
            return parse_color('#a50026')
        else:
            # Same color as other in class view
            return parse_color('#02ca75')
    else:
        print "Unrecognized owner type: %s, %s, %d" % (ownerdesc, classdesc,volume)
        return parse_color('#303030') 

# Write out volume of ownership for each residential non-vacant land property
points = []
for par_id in parid_info.keys():
    centroid = parid_info[par_id]['centroid']
    color = ownertype_to_color(par_id)
    saledate = SaledateToEpoch('01-01-1900')
    saledate_raw = parid_info[par_id]['SALEDATE']
    if(not(saledate_raw == '' or (isinstance(saledate_raw, numbers.Number) and math.isnan(saledate_raw)))):
        # If not valid, leave as 1900, otherwise parse it into an epoch time
        saledate = SaledateToEpoch(saledate_raw)
        
    if(color != None):
        points += LonLatToPixelXY(centroid)        
        points.append(color)
        # Put epoch time for SALEDATE as start valid time, and max positive float as end valid time
        points.append(float(saledate))
        points.append(float(1e38))
    else:
        print "Color of " + str(parid_info[par_id]['volume']) + " is None"
array.array('f', points).tofile(open(('assessments/ownertype_color_epoch%s.bin'%out_suffix), 'wb'))

In [None]:
parid_info['0002K00028000000']

In [None]:
# Foreclosures

In [None]:
# SALEDATE field is in '%m-%d-%Y' format (ex 10-26-2012) when present
def FilingdateToEpoch(datestr):
    return calendar.timegm(time.strptime(datestr, '%Y-%m-%d'))

In [None]:
# Read in foreclosure data 
fpath = "assessments/foreclosures-180327.csv"
fapd = pandas.read_csv(fpath)

In [None]:
# Write out volume of ownership for each residential non-vacant land property
points = []
missing_pin_c=0
found_pin_c=0
missing_pin_a=0
found_pin_a=0
foreclosure_color = '#ff00ff'

for i in range(0, len(fapd.index)):
    par_id = fapd['pin'][i]
    filing_date_raw = fapd['filing_date'][i]
    
    if(filing_date_raw == '' or (isinstance(filing_date_raw, numbers.Number) and math.isnan(filing_date_raw))):
        print "%d: Can't read filing_date %s" % (i, filing_date_raw)
        continue
    filing_date = FilingdateToEpoch(filing_date_raw)
    
    if par_id not in parid_info:
        missing_pin_c+=1
        continue
    found_pin_c+= 1
    
    centroid = parid_info[par_id]['centroid']
    
    if par_id not in parid_info:
        missing_pin_a+=1
    found_pin_a+=1
    
    saledate = SaledateToEpoch('01-01-1900')
    saledate_raw = parid_info[par_id]['SALEDATE']
    if(not(saledate_raw == '' or (isinstance(saledate_raw, numbers.Number) and math.isnan(saledate_raw)))):
        # If not valid, leave as 1900, otherwise parse it into an epoch time
        saledate = SaledateToEpoch(saledate_raw)
        
    startdate = filing_date
    enddate = saledate
    
    if(filing_date<saledate):
        # Sold after foreclosure, who bought it?
        points += LonLatToPixelXY(centroid)        
        points.append(ownertype_to_color(par_id))
        # Put epoch time for SALEDATE as start valid time, and max positive float as end valid time
        points.append(float(filing_date))
        points.append(float(1e38))
    else:
        # Sold before foreclosure, who owned it before?
        points += LonLatToPixelXY(centroid)        
        points.append(ownertype_to_color(par_id))
        # Put epoch time for SALEDATE as start valid time, and max positive float as end valid time
        points.append(float(saledate))
        points.append(float(filing_date))
        # Not sold after foreclosure, show ownership before sale and end foreclosure at end of time
        enddate=float(1e38)
        
    # Output foreclosure dot
    points += LonLatToPixelXY(centroid)        
    points.append(parse_color(foreclosure_color))
    # Put epoch time for SALEDATE as start valid time, and max positive float as end valid time
    points.append(float(startdate))
    points.append(float(enddate))

array.array('f', points).tofile(open(('assessments/foreclosure_color_epoch%s.bin'%out_suffix), 'wb'))

In [None]:
# Below here are modified versions of the blocks from Gabriel's original python notebook,
# none of which are needed anymore

In [None]:
sale_prices = {}
for i in range(0, len(apd.index)):
    sale_price = apd['SALEPRICE'][i]
    if not sale_prices.has_key(sale_price):
        sale_prices[sale_price] = 1
    else:
        sale_prices[sale_price] += 1


In [None]:
len(sale_prices.keys())


In [None]:
years_built = {}
sale_dates = {}
fairmarket_totals = {}
classes = {}
class_descs = {}

def incrementKey(adict,key):
    if(key == '' or math.isnan(key)):
       return
    if not adict.has_key(key):
        adict[key] = 1
    else:
        adict[key] += 1
    
for i in range(0, len(apd.index)):
    year_built = apd['YEARBLT'][i]
    incrementKey(years_built, year_built)
    sale_date = apd['SALEDATE'][i]
    incrementKey(sale_dates, sale_date)
    fairmarket_total = apd['FAIRMARKETTOTAL'][i]
    incrementKey(fairmarket_totals, fairmarket_total)
    class_ = apd['CLASS'][i]
    incrementKey(classes, class_)
    class_desc = apd['CLASSDESC'][i]
    incrementKey(class_descs, class_desc)


In [None]:
i

In [None]:
len(apd.index)

In [None]:
len(years_built.keys())
years_built['']

In [None]:
years_built

In [None]:
points = []
for i in range(0, len(apd.index)):
    par_id = apd['PARID'][i]
    year_built = apd['YEARBLT'][i]
    if year_built == '' or year_built == '0' or year_built == '0000' or math.isnan(year_built):
        year_built = '0001' # Null Value, -62135596800.0 Epoch
    elif isinstance(year_built, numbers.Number):
        if(year_built < 1):
            year_built = '0001' # Null Value, -62135596800.0 Epoch
        else:
            year_built = "%04d" % (year_built)
    if centroids.has_key(par_id):
        centroid = centroids[par_id]
        points += LonLatToPixelXY(centroid)
        date = datetime.datetime.strptime(year_built, '%Y')        
        points.append(GetEpoch(date))
array.array('f', points).tofile(open(('assessments/parcels_yr%s.bin'%out_suffix), 'wb'))


In [None]:
year_built
date = datetime.datetime.strptime('2000', '%Y')   
GetEpoch(date)

In [None]:
fairmarket_totals['0']

In [None]:
classes

In [None]:
class_descs

In [None]:
sorted(class_descs.keys())

In [None]:
no_year_built = 0
no_= 0
for i in range(0, len(apd.index)):
    year_built = apd['YEARBLT'][i]
    class_ = apd['CLASS'][i]
    if year_built == '' and class_ == 'R':
        no_year_built += 1
    elif year_built == '':
        no_ += 1
print no_year_built
print no_

In [None]:
class_descriptions = sorted(class_descs.keys())
points = []
for i in range(0, len(apd.index)):
    par_id = apd['PARID'][i]
    year_built = apd['YEARBLT'][i]
    class_description = apd['CLASSDESC'][i]  
    if year_built == '' or year_built == '0' or year_built == '0000' or math.isnan(year_built):
        year_built = '0001' # Null Value, -62135596800.0 Epoch
    elif isinstance(year_built, numbers.Number):
        if(year_built < 1):
            year_built = '0001' # Null Value, -62135596800.0 Epoch
        else:
            year_built = "%04d" % (year_built)
    if centroids.has_key(par_id):
        centroid = centroids[par_id]
        points += LonLatToPixelXY(centroid)
        date = datetime.datetime.strptime(year_built, '%Y')        
        points.append(GetEpoch(date))
        points.append(class_descriptions.index(class_description))
array.array('f', points).tofile(open(('assessments/parcels_yrblt%s.bin'%out_suffix), 'wb'))


In [None]:
foo = sorted(class_descs.keys())

In [None]:
len(foo)

In [None]:
parcel_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00','#ffff33','#a65628']

In [None]:
for color in hex_colors:
    rgb_color = HexToRgb(color)
    print "%s,%s,%s" % (rgb_color[0]/255.0, rgb_color[1]/255.0, rgb_color[2]/255.0) 

In [None]:
hex_colors = ["#fb3059",
"#fe6b2d",
"#d1947a",
"#c6a900",
"#02ca75",
"#00a2de",
"#9529b1"]
for color in hex_colors:
    rgb_color = HexToRgb(color)
    print "%.4f,%.4f,%.4f" % (rgb_color[0]/255.0, rgb_color[1]/255.0, rgb_color[2]/255.0) 

In [None]:
hex_colors = ['#ffffff','#fffffa','#fffdf5','#fffdf1','#fffcef','#fffbed','#fffbea','#fff9e6','#fff9e3','#fff7e0','#fff7dd','#fff6db','#fff5d8','#fff4d6','#fff3d4','#fff3d2','#fff2d0','#fff0cd','#fff0cb','#ffefc8','#ffeec6','#ffedc3','#ffecc1','#ffeabe','#ffeabc','#ffe9b9','#ffe7b7','#ffe7b4','#ffe6b2','#ffe4b0','#ffe4ad','#ffe3ab','#ffe1a8','#ffe1a6','#ffe0a4','#ffdea1','#ffde9f','#ffdc9d','#ffdc9b','#ffda98','#ffd996','#ffd994','#ffd891','#ffd68f','#ffd68d','#ffd58a','#ffd388','#ffd386','#ffd284','#ffd081','#ffcf7f','#ffcf7d','#ffcd7b','#ffcc78','#ffcc76','#ffca74','#ffc971','#ffc96f','#ffc76d','#ffc76b','#ffc568','#ffc566','#ffc464','#ffc361','#ffc15d','#ffc05a','#ffbf58','#ffbe56','#ffbc53','#ffbc51','#ffba4e','#ffba4c','#ffb849','#ffb747','#ffb644','#ffb53f','#ffb43d','#ffb23a','#ffb237','#ffb134','#ffaf31','#ffaf2e','#ffad2a','#ffab24','#ffab21','#ffa91c','#ffa818','#ffa813','#ffa60b','#ffa400','#ffa300','#ffa200','#ffa100','#ff9f00','#ff9e00','#ff9d00','#ff9c00','#ff9b00','#ff9900','#ff9800','#ff9700','#ff9500','#ff9500','#ff9300','#ff9100','#ff9000','#ff8f00','#ff8d00','#ff8c00','#ff8a00','#ff8a00','#fe8800','#fe8800','#fe8700','#fe8400','#fd8400','#fd8300','#fd8200','#fd8000','#fc7f00','#fc7d00','#fc7c00','#fc7b00','#fb7a00','#fb7900','#fb7800','#fa7700','#fa7400','#fa7400','#f97200','#f97100','#f87000','#f86e00','#f86e00','#f76d00','#f76c00','#f66a00','#f66900','#f56800','#f56600','#f46600','#f46400','#f36400','#f36200','#f36100','#f26000','#f15e00','#f15d00','#f05d00','#ef5b00','#ef5900','#ef5900','#ee5800','#ed5700','#ed5500','#ec5500','#eb5400','#ea5200','#ea5100','#e95000','#e84f00','#e84e00','#e74c00','#e74b00','#e64a00','#e54900','#e44800','#e44700','#e34600','#e34500','#e14400','#e14300','#e04100','#df4000','#de4000','#de3e00','#dd3d00','#dc3d00','#db3c00','#db3a00','#da3900','#d83800','#d83700','#d73600','#d63500','#d63401','#d43301','#d43201','#d23101','#d13001','#d12e01','#d02e01','#d02d01','#ce2c01','#ce2b01','#cd2a01','#cc2901','#ca2801','#c92701','#c92601','#c82501','#c72401','#c62301','#c52201','#c42102','#c32002','#c21f02','#c01e02','#bf1d02','#bf1c02','#be1b02','#bd1a02','#bc1902','#ba1902','#ba1802','#b81702','#b81602','#b71502','#b51402','#b51302','#b41202','#b21202','#b11102','#b01002','#af0e02','#ae0d02','#ac0d02','#ac0c02','#aa0b02','#a90a02','#a80902','#a80902','#a60802','#a50702','#a30602','#a30602','#a20502','#a00502','#a00402','#9e0302','#9c0302','#9b0302','#9b0201','#9a0201','#990201','#970101','#950101','#950101','#940101','#920001','#920001','#900001','#8f0000','#8d0000','#8d0000','#8b0000']

img = Image.new( 'RGB', (255,255), "white") # create a new image
pixels = img.load() # create the pixel map
for i in range(img.size[0]):    # for every pixel:
    for j in range(img.size[1]):
        pixels[i,j] = hex_to_rgb(hex_colors[i]) # set the colour accordingly
img.show
img.save("assessments/year-built-color-map.png")


In [None]:
hex_stops =  ['#1d1e4e','#482045','#6f1f4b','#8d2747','#aa3f4c','#a9513f','#ba663a','#cf8139','#e59f44','#e9bb76']
hex_colors = ['#1d1e4e','#201e4e','#241e4d','#261e4d','#291e4c','#291f4c','#2b1f4c','#2d1f4b','#301f4b','#331f4a','#341f4a','#371f49','#381f49','#3b1f48','#3d1f48','#3d1f48','#3f2047','#402047','#432046','#452046','#462046','#482045','#492045','#4b2045','#4d2046','#4d2046','#502046','#502046','#522047','#532047','#552047','#562047','#572047','#5a2048','#5b2048','#5c2048','#5d2048','#5e2048','#602049','#622049','#632049','#652049','#65204a','#67204a','#69204a','#6b1f4a','#6b1f4a','#6c1f4b','#6f1f4b','#701f4b','#711f4b','#72204b','#72204b','#75204a','#75214a','#76214a','#77214a','#78214a','#7a224a','#7a224a','#7d2249','#7d2249','#7e2349','#7f2349','#802449','#812449','#832448','#842548','#852548','#862548','#882548','#882648','#892648','#8b2647','#8b2747','#8d2747','#8e2847','#8e2847','#902947','#902948','#912a48','#922b48','#932c48','#932c48','#952d48','#952d48','#952e49','#972f49','#973049','#983049','#993149','#9a3249','#9a3249','#9c334a','#9c334a','#9d354a','#9d354a','#9f364a','#9f364a','#a0374a','#a1384a','#a2384b','#a2394b','#a33a4b','#a43a4b','#a43b4b','#a63c4b','#a73c4b','#a83d4c','#a83d4c','#a93e4c','#a93f4c','#aa414b','#aa424a','#aa4349','#aa4448','#aa4747','#aa4846','#a94b44','#a94b43','#a94d43','#a94e41','#a94f40','#a9513f','#a9523f','#aa523f','#ab533e','#ac543e','#ac543e','#ad563e','#ad563e','#ae573e','#ae583e','#af593d','#b05a3d','#b05a3d','#b15b3d','#b15b3d','#b25d3c','#b35d3c','#b35e3c','#b45f3c','#b55f3c','#b5603c','#b6623b','#b6623b','#b7623b','#b8643b','#b9643b','#b9653a','#ba653a','#ba673a','#bb683a','#bb683a','#bc683a','#bd693a','#bd6a3a','#bd6a3a','#be6c3a','#bf6c3a','#c06d3a','#c16e3a','#c16e3a','#c16f3a','#c2713a','#c2713a','#c3723a','#c4733a','#c5743a','#c5753a','#c5753a','#c6753a','#c7773a','#c8773a','#c87939','#c97939','#c97939','#ca7a39','#ca7b39','#cb7c39','#cc7d39','#cc7e39','#cd7e39','#ce7f39','#ce8039','#cf8139','#cf8139','#d08239','#d0833a','#d1843a','#d2843a','#d2853b','#d2863b','#d3873b','#d4883b','#d5883c','#d5893c','#d58a3c','#d68b3c','#d78c3d','#d78d3d','#d88d3d','#d88d3e','#d98f3e','#da8f3e','#da903f','#da913f','#db923f','#dc933f','#dc9340','#dd9340','#dd9440','#de9641','#df9641','#df9741','#e09741','#e09942','#e19942','#e19a42','#e29a42','#e29c43','#e39d43','#e49d43','#e59e44','#e59e44','#e59f45','#e5a047','#e6a149','#e6a24b','#e6a44c','#e6a54e','#e6a650','#e6a652','#e6a652','#e7a754','#e7a855','#e7aa57','#e7ab59','#e7ab5b','#e7ac5d','#e8ae5e','#e8af60','#e8af62','#e8b063','#e8b165','#e8b265','#e8b367','#e8b469','#e8b56b','#e9b66c','#e9b76e','#e9b870','#e9b872','#e9ba73','#e9ba75','#e9bb76']

In [None]:
hex_colors.index(hex_stops[9])

In [None]:
years_built

In [None]:
hex_stops = ['#f7fbff','#deebf7','#c6dbef','#9ecae1','#6baed6','#4292c6','#2171b5','#08519c','#08306b']

In [None]:
for i in hex_stops:
  print i + ","

In [None]:
hex_stops = ['#ffffe0','#dcfac5','#b8f4ab','#90ee90','#6dc88c','#46a386','#008080','#265a81','#233381','#000080']
for color in hex_stops:
    rgb_color = HexToRgb(color)
    print "%.4f,%.4f,%.4f" % (rgb_color[0]/255.0, rgb_color[1]/255.0, rgb_color[2]/255.0) 

In [None]:
hex_stops = ['#1d1e4e','#5f2049','#8c2747','#aa414b','#ba653a','#d0823a','#e5a045','#ebc07f','#f7e0af','#ffffe0']
years = [1880,1895,1910,1925,1940,1955,1970,1985,2000,2015]

In [None]:
for i in range(0,10):
    print "if (year < %s) {" % (years[i])
    rgb_color = HexToRgb(hex_stops[i])
    print "  color = vec4(%.4f,%.4f,%.4f,1.0)" % (rgb_color[0]/255.0, rgb_color[1]/255.0, rgb_color[2]/255.0)
    print "}"

In [None]:
len(years)

In [None]:
hex_colors = ['#ffffe0','#ffeec1','#ffdea7','#ffcb91','#ffb880','#ffa474','#fe906a','#f87d64','#f06a5e','#e75758','#db4551','#cf3447','#c0223b','#b0122c','#9e051b','#8b0000']
img = Image.new( 'RGB', (255,255), "white") # create a new image
pixels = img.load() # create the pixel map
for i in range(img.size[0]):    # for every pixel:
    for j in range(img.size[1]):
        pixels[i,j] = hex_to_rgb(hex_colors[i/16]) # set the colour accordingly
img.show()
img.save("assessments/year-built-color-map.png")



In [None]:
parcel_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00','#ffff33','#a65628']
parcel_colors = ['#fb3059','#fe6b2d','#d1947a','#c6a900','#02ca75','#00a2de','#9529b1']

year_built_colors = ['#ffffe0','#ffeec1','#ffdea7','#ffcb91','#ffb880','#ffa474','#fe906a','#f87d64','#f06a5e','#e75758','#db4551','#cf3447','#c0223b','#b0122c','#9e051b','#8b0000']
year_built_colors = ['#ffffe0','#ffdaa3','#ffb27c','#fb8768','#eb5f5b','#d3394a','#b3152f','#8b0000']
img = Image.new( 'RGB', (255,255), "white") # create a new image
pixels = img.load() # create the pixel map
for i in range(0,len(parcel_colors)):
    pixels[0,i] = HexToRgb(parcel_colors[i]) # set the colour accordingly
j = 0
for i in list(reversed(range(len(year_built_colors)))):
    pixels[1,j] = HexToRgb(year_built_colors[i]) # set the colour accordingly
    j += 1
#img.show()
img.save("assessments/color-map.png")


In [None]:
class_descriptions = sorted(class_descs.keys())
points = []
array.array('f', points).tofile(open('parcels_class.bin', 'wb'))
    par_id = apd['PARID'][i]
    year_built = apd['YEARBLT'][i]
    class_description = apd['CLASSDESC'][i]   
    if year_built == '' or year_built == '0':
        year_built = '0001' # Null Value, -62135596800.0 Epoch
    if centroids.has_key(par_id):
        centroid = centroids[par_id]
        points += LonLatToPixelXY(centroid)        
        points.append(float(year_built))
        points.append(class_descriptions.index(class_description)+1.0)
array.array('f', points).tofile(open('assessments/parcels_class.bin', 'wb'))


In [None]:
sorted(class_descs.keys())

In [None]:
class_descriptions.index('RESIDENTIAL')

In [None]:
points[10000:10005]

In [None]:
def scale_year(year):
    if year < 1800.0: 
        year = 1800.0
    return int(((year - 1800.0) * 8.0) / (2016.0-1800.))


In [None]:
scale_year(1800.0)

In [None]:
len(year_built_colors)

In [None]:
j = 0
for i in list(reversed(range(len(year_built_colors)))):
    print '<rect fill="%s" x="%s" height="10" width="25" stroke-width="1px" stroke="#666"></rect>' % (year_built_colors[i],j*25)
    j += 1
    
j = 0
for i in list(reversed(range(len(year_built_colors)))):
    print '<text font-size="10.5px" fill="%s" y="29" x="%s">%s</text>' % (year_built_colors[i], j*25, 1800)
    j += 1


In [None]:
list(reversed(range(10)))

In [None]:
len(fairmarket_totals)

In [None]:
fm_keys = sorted(map(int,fairmarket_totals.keys()))

In [None]:
numpy.average(fm_keys)

In [None]:
numpy.mean(fm_keys)

In [None]:
numpy.std(fm_keys)

In [None]:
fairmarket_totals['50000']

In [None]:
parcel_colors = ['#fb3059','#fe6b2d','#d1947a','#c6a900','#02ca75','#00a2de','#9529b1']
year_built_colors = ['#ffffe0','#ffdaa3','#ffb27c','#fb8768','#eb5f5b','#d3394a','#b3152f','#8b0000']
fm_colors = ['#a50026','#cd2827','#e75436','#f7804b','#fdad61','#fed788','#ffffbf','#b9e0ed','#8dc0db','#699fca','#4d7db9','#3e5aa7','#313695'
]

img = Image.new( 'RGB', (255,255), "white") # create a new image
pixels = img.load() # create the pixel map
for i in range(0,len(parcel_colors)):
    pixels[0,i] = HexToRgb(parcel_colors[i]) # set the colour accordingly
j = 0
for i in list(reversed(range(len(year_built_colors)))):
    pixels[1,j] = HexToRgb(year_built_colors[i]) # set the colour accordingly
    j += 1
for i in range(0,len(fm_colors)):
    pixels[2,i] = HexToRgb(fm_colors[i]) # set the colour accordingly
    
#img.show()
img.save("assessments/color-map.png")


In [None]:
class_descriptions = sorted(class_descs.keys())
points = []
for i in range(0, len(apd.index)):
    par_id = apd['PARID'][i]
    year_built = apd['YEARBLT'][i]
    class_description = apd['CLASSDESC'][i] 
    fm_total = float(apd['FAIRMARKETTOTAL'][i])    
    if year_built == '' or year_built == '0' or year_built == '0000' or math.isnan(year_built):
        year_built = '0001' # Null Value, -62135596800.0 Epoch
    elif isinstance(year_built, numbers.Number):
        if(year_built < 1):
            year_built = '0001' # Null Value, -62135596800.0 Epoch
        else:
            year_built = "%04d" % (year_built)
    if centroids.has_key(par_id):
        centroid = centroids[par_id]
        points += LonLatToPixelXY(centroid)        
        points.append(float(year_built))
        points.append(class_descriptions.index(class_description)+1.0)
        points.append(fm_total)
array.array('f', points).tofile(open(('assessments/parcels_all%s.bin'%out_suffix), 'wb'))


In [None]:
sorted(class_descs.keys())

In [None]:
# Below is Anne's attempt to put floating point representations of colors into the .bin file for a new 
# shader Randy's writing

In [None]:
parcel_colors = ['#fb3059','#fe6b2d','#d1947a','#c6a900','#02ca75','#00a2de','#9529b1']
class_descriptions = sorted(class_descs.keys())

def class_desc_to_color(class_description):
    index = class_descriptions.index(class_description)
    return(parse_color(parcel_colors[index]))

points = []
for i in range(0, len(apd.index)):
    par_id = apd['PARID'][i]
    class_description = apd['CLASSDESC'][i] 
    if centroids.has_key(par_id):
        centroid = centroids[par_id]
        points += LonLatToPixelXY(centroid)        
        points.append(class_desc_to_color(class_description))
array.array('f', points).tofile(open(('assessments/parcels_class_color%s.bin'%out_suffix), 'wb'))

In [None]:
len(fm_colors)

In [None]:
fairmarket_totals['0']

In [None]:
plt.plot(map(int,fairmarket_totals.keys()), fairmarket_totals.values(), 'ro')
plt.show()

In [None]:
fm_keys[len(fm_keys) - 100:len(fm_keys)]

In [None]:
fm_values = ["0", "25K", "50K", "75K", "100K", "125K", "150K", "200K", "250K", "300K", "400K", "500K", "+500K"]
for i in range(len(fm_colors)):
    print '<rect fill="%s" x="%s" height="10" width="30"></rect>' % (fm_colors[i],i*30)
    
for i in range(len(fm_values)):
    print '<text font-size="10.5px" y="29" x="%s">%s</text>' % (i*30, fm_values[i])
