# Analysis of vacancy and foreclosures, transactions, sheriff's auctions, and armslength sales

In [171]:
import matplotlib.pyplot as plt
import datetime as dt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import re

%matplotlib inline

path = '/'.join(os.getcwd().split('/')[:-1])
print(path)

/Volumes/Dropbox/largetransfer/luc/carter


In [175]:
# import data

tci = pd.read_csv(path+'/data/model_data/tci_1_0.csv', parse_dates=['Date'])
tci = tci[['parcel','vacant','SPA_NAME','Date']]

dates = dict(zip(tci.parcel, tci.Date))
print(tci.shape)

(113132, 4)


  interactivity=interactivity, compiler=compiler, result=result)


## Foreclosures

In [125]:
# import all foreclosures

fc = pd.read_csv(path+'/data/clean_data/foreclosure_filings2.csv', parse_dates=[2])
fc = fc.sort_values('filedate').groupby('caseno').last()
fc = fc.rename(columns={'filedate':'date'}).reset_index()

print(fc.columns)

Index(['caseno', 'parcel', 'status', 'date', 'defendant', 'parcel_address',
       'dateid', 'case_title', 'plaintiff', 'FORE', 'LATITUDE', 'LONGITUDE',
       'zip_fore'],
      dtype='object')


In [126]:
# crosstabulation looking at whether or not vacancies have ever been foreclosed

fc_parcel = set(fc.parcel)
tci['fc'] = tci.apply(lambda x: x.parcel in fc_parcel, axis=1)
pd.crosstab(tci.fc, tci.vacant)

vacant,0,1
fc,Unnamed: 1_level_1,Unnamed: 2_level_1
False,79154,3836
True,23940,6202


#### Has it been foreclosed before?

So of the 30,142 parcels that have been foreclosed between 1/3/2006 and 11/6/2015, 6,202 are currently vacant, of the 10,038 total residential vacancies as measured by TCI. This means that 3,836 parcels were determined to be vacant but never have been foreclosed. 

#### Active vs. inactive foreclosures

In [127]:
fc_parcel = set(fc[fc.status=='Inactive'].parcel)
tci['fc'] = tci.apply(lambda x: x.parcel in fc_parcel, axis=1)
pd.crosstab(tci.fc, tci.vacant)

vacant,0,1
fc,Unnamed: 1_level_1,Unnamed: 2_level_1
False,79967,4135
True,23127,5903


In [128]:
fc_parcel = set(fc[fc.status=='Active'].parcel)
tci['fc'] = tci.apply(lambda x: x.parcel in fc_parcel, axis=1)
pd.crosstab(tci.fc, tci.vacant)

vacant,0,1
fc,Unnamed: 1_level_1,Unnamed: 2_level_1
False,101689,9455
True,1405,583


#### Number of times foreclosed vs. vacant

In [129]:
tem = pd.merge(fc[['parcel','status']].groupby('parcel').count(), tci[['parcel','vacant']], how='right', left_index=True, right_on='parcel').fillna(0)
tem = tem[['status','vacant']].groupby('status').agg([sum,len])
tem['percent'] = tem.vacant['sum']/tem.vacant['len']
tem

Unnamed: 0_level_0,vacant,vacant,percent
Unnamed: 0_level_1,sum,len,Unnamed: 3_level_1
status,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
0,3836,82990,0.046222
1,4193,22045,0.190202
2,1579,6255,0.252438
3,330,1422,0.232068
4,81,326,0.248466
5,14,71,0.197183
6,4,15,0.266667
7,1,8,0.125


## Sheriff's Auctions

Merging sheriff's auctions, transfers, and foreclosures to see order of events for each parcel.

In [130]:
sa = pd.read_csv(path+'/data/clean_data/sheriff_auction.csv', parse_dates=[2])#, encoding="ISO-8859-1")
sa = sa.rename(columns={'salesdt':'date'})

In [131]:
sa = sa.loc[:,['date','parcel','sold_amt','purchaser']]
sa['REO'] = 0
sa['type'] = 'sa'

In [132]:
t = pd.read_csv(path+'/data/clean_data/transfers.csv', parse_dates=['mdate'], dtype=str)
t = t.rename(columns={'PROPERTY_NUMBER':'parcel','mdate':'date'})
t['type'] = 't'
t['REO'] = 0

t.loc[(t.DEED_TYPE.isin(['Sheriffs Deed',
 'Sheriffs Deed Ex'])),'shf'] = 1

In [133]:
fc = fc.loc[:,['date','parcel']]
fc['type'] = 'fc'
fc['REO'] = 0

In [189]:
# assume real estate owned (by companies or banks) are organizations that have these series of letters in their name.

def find_REO(s):
    if isinstance(s,str):
        s = s.lower()
        if re.search("llc", s):
            return True
        if re.search("bank", s):
            return True
        if re.search("mortg", s):
            return True
        if re.search("mort.", s):
            return True
        if re.search("comp", s):
            return True
        if re.search("corp", s):
            return True
        if re.search("fannie", s):
            return True
        if re.search("housing", s):
            return True
        if re.search("sec.", s):
            return True
        if re.search("loan", s):
            return True
        if re.search("inc.", s):
            return True
        return False
    else:
        return False

In [190]:
sa['REO'] = sa.purchaser.apply(find_REO)
sa['GRANTEE1'] = sa.purchaser
t['REO'] = t.GRANTEE1.apply(find_REO)

In [212]:
# append the datasets and eliminate dates from before they were searched, then sort by date
df = fc.append(sa.loc[sa.sold_amt.notnull(),['date','REO','parcel','type','GRANTEE1']]).append(t[['date','parcel','type','REO','DEED_TYPE','GRANTEE1']])
df = df.loc[df.apply(lambda x: x.date < dates[x.parcel], axis=1)]
df = df.sort_values('date')

In [217]:
# get the last transaction
last = df.groupby('parcel').nth(-1)

In [218]:
# merge with the vacancy status
tem = pd.merge(last, tci[['parcel','vacant']], how='left', left_index=True, right_on='parcel')

In [220]:
tem[tem.vacant==1].sample(30)

Unnamed: 0,DEED_TYPE,GRANTEE1,REO,date,type,parcel,vacant
102604,Quit Claim Deed Ex,"WHITE, DOUGLAS LAVON",0,2014-08-04,t,138-12-073,1
101511,Quit Claim Deed,"Millhouse, Edith A.",0,2001-01-17,t,138-01-040,1
68717,Forfeiture Ex,STATE OF OHIO (FORF) CASE NO. CV 800207,0,2014-09-16,t,112-24-182,1
67090,Quit Claim Deed,GARRETT BILLY,0,2012-12-18,t,111-26-012,1
53067,,,0,2015-01-08,fc,028-26-012,1
74236,Sheriffs Deed Ex,LAKESIDE REO VENTURES LLC,1,2014-06-26,t,116-20-071,1
67425,,,0,2012-08-23,fc,112-02-051,1
37126,Quit Claim Deed Ex,"ANDERSON, DEVAN G.",0,2014-10-14,t,020-12-087,1
60134,Fiscal Officer Ex,"ELLIS, LAMONT",0,2009-04-29,t,108-18-017,1
88906,Quit Claim Deed Ex,"WILLIAMS, ROBBIE L",0,2005-07-27,t,130-09-013,1


In [221]:
# look at the number of vacant, 'real estate owned' parcels vs the total REO
sum(tci[(tci.parcel.isin(last[(last.REO==1)].index))].vacant), len(tci[(tci.parcel.isin(last[(last.REO==1)].index))].vacant)

(2037, 9336)

Compare with Pittsburgh's measure, which is a house that has been foreclosed and not yet sold in sheriff's auction or transferred.

In [226]:
sub = set(last[last['type'] == 'fc'].index)
sum(tci[tci.parcel.isin(sub)].vacant), len(tci[tci.parcel.isin(sub)].vacant)
# second_last = df.groupby('parcel').nth(-2)
# sub2 = set(second_last[second_last['type']=='fc'].index)

(2665, 11337)

So only 2665 out of 11337 parcels that meet this criteria are actually vacant, which is not a very good percentage.

## Postal vacancies

In [227]:
pv = pd.read_csv(path+'/data/clean_data/postal_vacancy.csv', parse_dates=3, index_col=0)
pv.date = pv.date.apply(lambda x: dt.datetime(int(x[0:4]),int(x[5:7]),int(x[8:10])))
pv = pv.sort_values('date',ascending=False)
pv = pv.loc[pv.apply(lambda x: x.date < dates[x.PARCEL], axis=1)]

In [228]:
# count the number of consecutive times a parcel is postal vacant, working backwards
def fun(x):
    total = 0
    recent = pv_dates[pv_dates<dates[x.parcel]]

    for i,j in enumerate(x[recent]):
        if x[recent][-i-1] == 'Y':
            total += 1
        else:
            return total
    return total

pv_dates = pv.groupby('date').last().index
pv2 = pv.pivot(index='PARCEL',columns='date').fillna('N').reset_index()
pv2.columns = pv2.columns.droplevel()
pv2.columns.values[0] = 'parcel'
pv2['pv_count'] = pv2.apply(fun, axis=1)
pv2 = pv2.set_index('parcel')

Compare the number of postal vacant homes (at least 1 term in a row) to the actually vacant homes.

In [230]:
sum(tci[tci.parcel.isin(pv2[pv2.pv_count>0].index)].vacant),len(tci[tci.parcel.isin(pv2[pv2.pv_count>0].index)].vacant)

(4426, 8904)

About half of the postal vacant homes are actually vacant right now. That's a lot better than the other measures, but still far away from being as accurate as we'd like.