# Analysis of vacancy and foreclosures, transactions, sheriff's auctions, and armslength sales

In [66]:
import matplotlib.pyplot as plt
import datetime as dt
import seaborn as sns
import pandas as pd
import numpy as np
import os

%matplotlib inline

path = '/'.join(os.getcwd().split('/')[:-1])
print(path)

/Volumes/Dropbox/largetransfer/luc/carter


In [67]:
df = pd.read_csv(path+'/data/model_data/tci_2_0.csv', index_col = 0)
tci = pd.read_csv(path+'/data/model_data/tci_2_2.csv', parse_dates=['Date'], dtype={'Parcel ID':str})
tci = pd.merge(tci, df[['NAME10','SPA_NAME']], how='left', right_index=True, left_on='parcel')
ppns = set(tci['parcel'])
dates = dict(zip(tci.parcel, tci.Date))
print(tci.shape)

(113132, 39)


## Foreclosures

In [68]:
fc = pd.read_csv(path+'/data/clean_data/foreclosure_filings2.csv', parse_dates=[2])
fc = fc.sort_values('filedate').groupby('caseno').last()
fc = fc.rename(columns={'filedate':'date'}).reset_index()

fc.columns

Index([u'caseno', u'parcel', u'status', u'date', u'defendant',
       u'parcel_address', u'dateid', u'case_title', u'plaintiff', u'FORE',
       u'LATITUDE', u'LONGITUDE', u'zip_fore'],
      dtype='object')

In [69]:
fc_parcel = set(fc.parcel)
tci['fc'] = tci.apply(lambda x: x.parcel in fc_parcel, axis=1)
pd.crosstab(tci.fc, tci.vacant)

vacant,0,1
fc,Unnamed: 1_level_1,Unnamed: 2_level_1
False,79154,3836
True,23940,6202


In [70]:
sum(tci.vacant), sum(tci.fc)

(10038, 30142)

#### Has it been foreclosed before?

So of the 30,142 parcels that have been foreclosed between 1/3/2006 and 11/6/2015, 6,202 are currently vacant, of the 10,038 total residential vacancies as measured by TCI. This means that 3,836 parcels were determined to be vacancy but never have been foreclosed. 

In [71]:
fc_parcel = set(fc[fc.status=='Inactive'].parcel)
tci['fc'] = tci.apply(lambda x: x.parcel in fc_parcel, axis=1)
pd.crosstab(tci.fc, tci.vacant)

vacant,0,1
fc,Unnamed: 1_level_1,Unnamed: 2_level_1
False,79967,4135
True,23127,5903


In [72]:
fc_parcel = set(fc[fc.status=='Active'].parcel)
tci['fc'] = tci.apply(lambda x: x.parcel in fc_parcel, axis=1)
pd.crosstab(tci.fc, tci.vacant)

vacant,0,1
fc,Unnamed: 1_level_1,Unnamed: 2_level_1
False,101689,9455
True,1405,583


In [73]:
tem = pd.merge(fc[['parcel','status']].groupby('parcel').count(), tci[['parcel','vacant']], how='right', left_index=True, right_on='parcel').fillna(0)
tem = tem[['status','vacant']].groupby('status').agg([sum,len])
tem['percent'] = tem.vacant['sum']/tem.vacant['len']
tem.T

Unnamed: 0,status,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0
vacant,sum,3836.0,4193.0,1579.0,330.0,81.0,14.0,4.0,1.0
vacant,len,82990.0,22045.0,6255.0,1422.0,326.0,71.0,15.0,8.0
percent,,0.046222,0.190202,0.252438,0.232068,0.248466,0.197183,0.266667,0.125


## Sheriff's Auctions

In [74]:
sa = pd.read_csv(path+'/data/clean_data/sheriff_auction.csv', parse_dates=[2], encoding="ISO-8859-1")

In [75]:
reo = pd.read_csv(path+'/data/extra_data/reo.csv', encoding="utf-8")
reo = reo.groupby('GRANTEE1').last()

In [76]:
purchaser = sa.groupby('scaseno').last().groupby('purchaser').count().sort_values('parcel',ascending=False)[['parcel']]
purchaser = purchaser.rename(columns={'parcel':'purchaser_count'})
sa = pd.merge(sa, purchaser, how='left', left_on='purchaser',right_index=True)
# sa = sa.sort_values('salesdt').groupby('parcel').last()
sa = sa.rename(columns={'salesdt':'date'})
# sa = sa[['date','plaintiff_count']]
# sa['type'] = 'sa'
# sa = sa.reset_index()
sa['purchaser_count'] = sa['purchaser_count'].fillna(0)
sa.columns

Index([         u'parcel',         u'address',            u'date',
                u'saleno',        u'sold_amt',       u'appraisal',
                u'minbid',         u'scaseno',       u'withdrawn',
             u'plaintiff',         u'sdefend',       u'purchaser',
              u'paddress',          u'rparty',        u'raddress',
              u'attorney',        u'descript',        u'location',
            u'defend_att',            u'with',          u'status',
                 u'ssold',             u'SHF',        u'LATITUDE',
             u'LONGITUDE', u'purchaser_count'],
      dtype='object')

In [None]:
# al = pd.read_csv(path+'/data/clean_data/armslength.csv',dtype=str)
# al = al.drop_duplicates()

# months = dict(zip(['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'],range(1,13)))

# def parse_date(x):
#     day = int(x[0:2])
#     month = months[x[2:5]]
#     year = int(x[5:])
#     return dt.datetime(year,month,day)

# al['date'] = al.mdate.apply(parse_date)
# # al = al[al.date < dt.datetime(2015,6,1)]

# al = al.rename(columns={'PROPERTY_NUMBER':'parcel'})
# al = al[['date','parcel']]
# al['type'] = 'al'
# al['plaintiff_count'] = 0

In [78]:
t2 = pd.read_csv(path+'/data/clean_data/transfers.csv', parse_dates=['mdate'], dtype=str)

In [80]:
t = pd.read_csv(path+'/data/clean_data/transfers.csv', parse_dates=['mdate'], dtype=str)

t = t.rename(columns={'PROPERTY_NUMBER':'parcel','mdate':'date'})
t['type'] = 't'
t['REO'] = 0

In [81]:
t = pd.merge(t, reo, how='left',left_on='GRANTEE1', right_index=True)

In [82]:
t.loc[(t.DEED_TYPE.isin(['Sheriffs Deed',
 'Sheriffs Deed Ex'])),'REO'] = 1

In [83]:
temp = tci[tci.parcel.isin(t[(t.REO==1)&(t.date > dt.datetime(2013,1,1))].parcel)]
sum(temp.vacant), len(temp)

(930, 3581)

In [85]:
fc2 = fc[['date','parcel']]
fc2['type'] = 'fc'
fc2['REO'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [86]:
sa2 = sa[['date','parcel','sold_amt']]
sa2['REO'] = 0
sa2['type'] = 'sa'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [None]:
df = fc2.append(sa2.loc[sa2.sold_amt.notnull(),['date','REO','parcel','type']]).append(t)
df = df.sort_values('date')
df = df.loc[df.apply(lambda x: x.date < dates[x.parcel], axis=1)]

In [None]:
last = df.groupby('parcel').last()
sub = last[(last['type'] == 'sa')]#&(last['plaintiff_count']>-1)]

In [None]:
t = tci[tci.parcel.isin(sa[sa.REO].parcel)].vacant
sum(t),len(t)

In [None]:
test = set(last[(last['type'] == 'sa') & (last['REO']==True)].index)
sum(tci[tci.parcel.isin(test)].vacant), len(tci[tci.parcel.isin(test)].vacant)

In [None]:
test = set(sub.index)
sum(tci[tci.parcel.isin(test)].vacant), len(tci[tci.parcel.isin(test)].vacant)

In [100]:
import re

def find_REO(s):
    if isinstance(s,str) or isinstance(s,unicode):
        s = s.lower()
        if re.search("llc", s):
            return True
        if re.search("bank", s):
            return True
        if re.search("mortg", s):
            return True
        if re.search("mort.", s):
            return True
        if re.search("comp", s):
            return True
        if re.search("corp", s):
            return True
        if re.search("fannie", s):
            return True
        if re.search("housing", s):
            return True
        if re.search("sec.", s):
            return True
        if re.search("loan", s):
            return True
        return False
    else:
        return False

In [None]:
sa.groupby('purchaser').count().sort_values('date',ascending=False)

In [106]:
sa['REO'] = sa.purchaser.apply(find_REO)

In [103]:
find_REO(sa.purchaser.iloc[6])

True

In [107]:
s = sa.groupby('scaseno').last()

In [113]:
ts = tci[tci.parcel.isin(s[(s.REO) & (s.date > dt.datetime(2011,1,1))].parcel)]
sum(ts.vacant), len(ts)

(1289, 5716)

In [None]:
spas_sa = tci[tci.parcel.isin(test)][['SPA_NAME','vacant']].groupby('SPA_NAME').agg([sum,len])
spas_sa['percent'] = spas_sa.vacant['sum']*1.0/spas_sa.vacant['len']
print(spas_sa.sort_values('percent'))

In [None]:
for i in spas_sa.index:
    a = sa[sa.parcel.isin(test.intersection(tci[tci.SPA_NAME==i].parcel))]
    spas_sa.loc[spas_sa.index==i,'median_sa'] = a.sort_values('date')['date'].iloc[round(len(a)/2)]
#     print(i,a.sort_values('date')['date'].iloc[round(len(a)/2)])
#     print(np.median(sa[sa.parcel.isin(test.intersection(tci[tci.SPA_NAME==i].parcel))].date))

In [None]:
spas_sa

In [None]:
tci[tci.parcel.isin(set(fc.parcel))][['SPA_NAME','vacant']].groupby('SPA_NAME').agg([sum,len])

In [None]:
len(tci.loc[tci.vacant==1,'parcel'][tci.loc[tci.vacant==1,'parcel'].isin(set(sa.parcel))])

In [None]:
sum(tci['vacant']), len(tci)

In [None]:
sum(tci[tci.parcel.isin(sub)].vacant), len(tci[tci.parcel.isin(sub)].vacant)

In [None]:
last = df.groupby('parcel').last()
sub = set(last[last['type'] == 'sa'].index)

In [None]:
second_last = df.groupby('parcel').nth(-2)
sub2 = set(second_last[second_last['type']=='fc'].index)

In [None]:
len(sub.intersection(sub2)), len(sub), len(sub2)

In [None]:
fc_to_sa = sub.intersection(sub2)

In [None]:
sum(tci[tci.parcel.isin(fc_to_sa)].vacant), len(tci[tci.parcel.isin(fc_to_sa)].vacant)

In [None]:
df[df.parcel=='137-25-057']

In [None]:
a = second_last[second_last.index.isin(sub)]

In [None]:
pitt_vacant = set(second_last[second_last.index.isin(sub)].index)

In [None]:
pv = pd.read_csv(path+'/data/clean_data/postal_vacancy.csv', parse_dates=3, index_col=0)
pv.date = pv.date.apply(lambda x: dt.datetime(int(x[0:4]),int(x[5:7]),int(x[8:10])))
pv = pv.sort_values('date',ascending=False)
pv = pv.loc[pv.apply(lambda x: x.date < dates[x.PARCEL], axis=1)]

In [None]:
pv = pv[pv.vindall=='Y']
pv_copy = pv.copy().groupby('PARCEL').count().reset_index()

pv_copy['pv_count'] = pv_copy.apply(lambda x: \
        len(pv[(pv.PARCEL == x.PARCEL) & (pv.date > dates[x.PARCEL]-pd.DateOffset(years=2))]), axis=1)

In [None]:
pv2 = pv.groupby('PARCEL').last()

In [None]:
tci[tci.parcel.isin(pv2[pv2.vindall=='Y'].index)].groupby('vacant').count()

In [None]:
sns.distplot(df2[df2.vacant==1].date,bins=np.linspace(0,30,30));
sns.distplot(df2[df2.vacant==0].date,bins=np.linspace(0,30,30));

In [None]:
df2 = pd.merge(df.groupby('parcel').last(), tci[['vacant','ppn']], how='left',left_index=True, right_on='ppn')

In [None]:
df2[['type','vacant']].groupby('type').agg([sum,len])

In [None]:
d = df2[df2['type']=='fc']
d['year'] = d.date.apply(lambda x: x.year)

In [None]:
d[['year','vacant']].groupby('year').agg([sum,len])

In [None]:
plt.plot(fc.set_index('date').groupby(['type']).resample('m', 'count'))

In [None]:
t = tci[tci.ppn.isin(fc[fc.date > dt.datetime(2015,3,1)].parcel)].vacant

In [None]:
sum(t),len(t)

In [None]:
fc['year'] = fc['date']