# Analysis of vacancy and foreclosures, transactions, sheriff's auctions, and armslength sales

In [2]:
import matplotlib.pyplot as plt
import datetime as dt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import re

%matplotlib inline

path = '/'.join(os.getcwd().split('/')[:-2])
print(path)

/Volumes/Dropbox/largetransfer/luc/carter


In [3]:
# import data

tci = pd.read_csv(path+'/data/model_data/tci_1_0.csv', parse_dates=['Date'])
tci = tci[['parcel','vacant','SPA_NAME','Date']]

dates = dict(zip(tci.parcel, tci.Date))
print(tci.shape)

(113132, 4)


  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
# assume real estate owned (by companies or banks) are organizations that have these series of letters in their name.
def find_REO(s):
    if isinstance(s,str):
        s = s.lower()
        if re.search("llc", s):
            return True
        if re.search("bank", s):
            return True
        if re.search("mortg", s):
            return True
        if re.search("mort.", s):
            return True
        if re.search("comp", s):
            return True
        if re.search("corp", s):
            return True
        if re.search("fannie", s):
            return True
        if re.search("housing", s):
            return True
        if re.search("sec.", s):
            return True
        if re.search("loan", s):
            return True
        if re.search("inc", s):
            return True
        if re.search("ohio", s):
            return True
        if re.search("cleveland", s):
            return True
        if re.search("estate", s):
            return True
        if re.search("organization", s):
            return True
        if re.search("develop", s):
            return True
        if re.search("ltd", s):
            return True
        if re.search("hsg", s):
            return True
        if re.search("limited", s):
            return True
        if re.search("cuyahoga", s):
            return True
        if re.search("propert", s):
            return True
        if re.search("invest", s):
            return True
        if re.search("realt", s):
            return True
        if re.search("homes", s):
            return True
        if re.search("neighbor", s):
            return True
        return False
    else:
        return False

In [5]:
#
# METHODOLOGY:

# Data included:
# Foreclosures
# Transfers: filtered for sheriff's deeds (assumed to be sheriff's sales)
# Armslength sales

# All three datasets are combined such that the dates, parcels, and types are included in the final dataset 
# Should be encoded as follows: 
# 0: parcels with no history
# 1: foreclosure
# 2: sold at sheriff's auction (to a REO entity) – considered to be vacant
# 3: sold at armslength (or sold at sheriff's auction to non-REO)

# At each date, assign each parcel a number based on the above rubric. The numbers may depend on previous date's 
# numbers and continue to increment, such that a parcel with the number 4 is in its second foreclosure process


fc = pd.read_csv(path+'/data/clean_data/foreclosure_filings2.csv', parse_dates=[2])
# fc = fc.sort_values('filedate').groupby('caseno').last()
fc = fc.rename(columns={'filedate':'date'})
fc['type'] = 'fc'
fc = fc[['date','type','parcel']]


t = pd.read_csv(path+'/data/clean_data/transfers.csv', parse_dates=['mdate'], dtype=str)
t['REO'] = t.GRANTEE1.apply(find_REO)
t = t[(t.DEED_TYPE.isin(['Sheriffs Deed',
 'Sheriffs Deed Ex']))]
t = t[['PROPERTY_NUMBER','mdate','REO']].rename(columns={'PROPERTY_NUMBER':'parcel','mdate':'date'})
t['type'] = 't'

t.loc[t.REO==False,'type'] = 'al'

al = pd.read_csv(path+'/data/clean_data/armslength.csv', dtype=str)
al = al.drop_duplicates()

# parse armslength dates
months = dict(zip(['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'],range(1,13)))
def parse_date(x):
    day = int(x[0:2])
    month = months[x[2:5]]
    year = int(x[5:])
    return dt.datetime(year,month,day)

al['date'] = al.mdate.apply(parse_date)
al = al.sort_values('date').rename(columns={'PROPERTY_NUMBER':'parcel'})

def parse_amount(x):
    return float(str(x)[1:].replace(',',''))

al['type'] = 'al'

# copies of datasets for finding the vacancy status according to the TCI survey

t_copy = pd.merge(t, tci[['parcel','Date']].set_index('parcel'), how='left',left_on='parcel',right_index=True)
t_copy = t_copy.loc[(t_copy.date < t_copy.Date)]

fc_copy = pd.merge(fc, tci[['parcel','Date']].set_index('parcel'), how='left',left_on='parcel',right_index=True)
fc_copy = fc_copy.loc[(fc_copy.date < fc_copy.Date)]

al_copy = pd.merge(al, tci[['parcel','Date']].set_index('parcel'), how='left',left_on='parcel',right_index=True)
al_copy = al_copy.loc[(al_copy.date < al_copy.Date)]

In [6]:
df = fc[['parcel','date','type']].append(t[['parcel','date','type','REO']]).append(al[['parcel','date','type']])
df2 = fc_copy[['parcel','date','type']].append(t_copy[['parcel','date','type','REO']]).append(al_copy[['parcel','date','type']])
# df = df.fillna(False)
df = df.sort_values(by='date')
df2 = df2.sort_values(by='date')

In [7]:
s = {0:'al', 1:'fc', 2:'t'}

def get_number(x):
    for col in range(1,23):
        if x.iloc[col] == s[x.iloc[col-1]%3]:
            x.iloc[col] = x.iloc[col-1]
        elif (s[x.iloc[col-1]%3] == 'fc') & (x.iloc[col] == 't'):
            x.iloc[col] =  x.iloc[col-1]+1
        elif (s[x.iloc[col-1]%3] == 'fc') & (x.iloc[col] == 'al'):
            x.iloc[col] =  x.iloc[col-1]+2
        elif (s[x.iloc[col-1]%3] == 't') & (x.iloc[col] == 'al'):
            x.iloc[col] =  x.iloc[col-1]+1
        elif (s[x.iloc[col-1]%3] == 't') & (x.iloc[col] == 'fc'):
            x.iloc[col] =  x.iloc[col-1]+2
        elif (s[x.iloc[col-1]%3] == 'al') & (x.iloc[col] == 'fc'):
            x.iloc[col] =  x.iloc[col-1]+1
        elif (s[x.iloc[col-1]%3] == 'al') & (x.iloc[col] == 't'):
            x.iloc[col] =  x.iloc[col-1]+2
        else:
            x.iloc[col] =  0
    return x

In [11]:
tem.head()

Unnamed: 0_level_0,REO,date,type
parcel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
001-01-003,,2005-12-01,al
001-01-004,,2007-03-08,al
001-01-005,,2015-02-04,al
001-01-008,,2003-12-11,al
001-01-009,,2014-06-16,al


In [10]:
tem = df2.groupby('parcel').nth(-1)
status.loc[status.iloc[:,0]=='fc', status.columns[0]]  = 1
status.loc[status.iloc[:,0]=='t', status.columns[0]] = 2
status.loc[status.iloc[:,0]=='al', status.columns[0]] = 0
status.loc[status.iloc[:,0].isnull(), status.columns[0]]  = 0

In [7]:
rng = pd.date_range('10/1/2009', periods=23, freq='3MS')
status = tci[['parcel']].copy()

for date in rng:
    tem = df[df.date < date].groupby('parcel').nth(-1)
    status = pd.merge(status, tem[['type']], how='left',left_on='parcel', right_index=True)
    status = status.rename(columns={'type':date})
    print(date, len(tem[tem.REO==True]))

2009-10-01 00:00:00 2738
2010-01-01 00:00:00 2697
2010-04-01 00:00:00 2764
2010-07-01 00:00:00 2751
2010-10-01 00:00:00 2799
2011-01-01 00:00:00 3024
2011-04-01 00:00:00 3099
2011-07-01 00:00:00 3049
2011-10-01 00:00:00 3117
2012-01-01 00:00:00 3217
2012-04-01 00:00:00 3200
2012-07-01 00:00:00 3308
2012-10-01 00:00:00 3434
2013-01-01 00:00:00 3523
2013-04-01 00:00:00 3611
2013-07-01 00:00:00 3556
2013-10-01 00:00:00 3554
2014-01-01 00:00:00 3605
2014-04-01 00:00:00 3770
2014-07-01 00:00:00 3903
2014-10-01 00:00:00 4024
2015-01-01 00:00:00 4153
2015-04-01 00:00:00 4433


In [8]:
status = status.set_index('parcel')
status_copy = status.copy()
status.loc[status.iloc[:,0]=='fc', status.columns[0]]  = 1
status.loc[status.iloc[:,0]=='t', status.columns[0]] = 2
status.loc[status.iloc[:,0]=='al', status.columns[0]] = 0
status.loc[status.iloc[:,0].isnull(), status.columns[0]]  = 0

In [11]:
s = {0:'al', 1:'fc', 2:'t'}

def get_number(x):
    for col in range(1,23):
        if x.iloc[col] == s[x.iloc[col-1]%3]:
            x.iloc[col] = x.iloc[col-1]
        elif (s[x.iloc[col-1]%3] == 'fc') & (x.iloc[col] == 't'):
            x.iloc[col] =  x.iloc[col-1]+1
        elif (s[x.iloc[col-1]%3] == 'fc') & (x.iloc[col] == 'al'):
            x.iloc[col] =  x.iloc[col-1]+2
        elif (s[x.iloc[col-1]%3] == 't') & (x.iloc[col] == 'al'):
            x.iloc[col] =  x.iloc[col-1]+1
        elif (s[x.iloc[col-1]%3] == 't') & (x.iloc[col] == 'fc'):
            x.iloc[col] =  x.iloc[col-1]+2
        elif (s[x.iloc[col-1]%3] == 'al') & (x.iloc[col] == 'fc'):
            x.iloc[col] =  x.iloc[col-1]+1
        elif (s[x.iloc[col-1]%3] == 'al') & (x.iloc[col] == 't'):
            x.iloc[col] =  x.iloc[col-1]+2
        else:
            x.iloc[col] =  0
    return x

In [12]:
status2 = status.apply(get_number, axis=1).copy()

In [13]:
status2.to_csv(path+'/data/clean_data/parcel_status.csv')

In [4]:
dfs = pd.read_csv(path+'/data/clean_data/parcel_status.csv')

In [7]:
dfs = dfs.set_index('parcel')

In [14]:
((dfs%3)==2).apply(sum, axis=0)

2009-10-01    2738
2010-01-01    2697
2010-04-01    2764
2010-07-01    2751
2010-10-01    2799
2011-01-01    3024
2011-04-01    3099
2011-07-01    3049
2011-10-01    3117
2012-01-01    3217
2012-04-01    3200
2012-07-01    3308
2012-10-01    3434
2013-01-01    3523
2013-04-01    3611
2013-07-01    3556
2013-10-01    3554
2014-01-01    3605
2014-04-01    3770
2014-07-01    3903
2014-10-01    4024
2015-01-01    4153
2015-04-01    4433
dtype: int64