In [25]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from neighborhoods import nearest_neighborhood
from datetime import date
from dateutil.relativedelta import relativedelta



In [26]:
df = pd.read_csv('eviction_notices.csv', parse_dates=['File Date'], encoding = "ISO-8859-1")

  interactivity=interactivity, compiler=compiler, result=result)


In [27]:
len(df)

37736

In [28]:
df.head()

Unnamed: 0,Eviction ID,Address,City,State,Eviction Notice Source Zipcode,File Date,Non Payment,Breach,Nuisance,Illegal Use,...,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,Constraints Date,Supervisor District,Neighborhoods - Analysis Boundaries,Location
0,M161957,300 Block Of Park Street,San Francisco,CA,94110,2016-06-06,False,False,False,False,...,False,False,False,False,False,False,,9.0,Bernal Heights,"(37.7364374645373, -122.417989910582)"
1,M162256,200 Block Of Seneca Avenue,San Francisco,CA,94112,2016-06-29,False,False,False,False,...,False,False,False,False,False,False,8/25/2021,11.0,Outer Mission,"(37.7205429316262, -122.443264852669)"
2,M162135,200 Block Of Dolores Street,San Francisco,CA,94103,2016-06-28,False,True,False,False,...,False,False,False,False,False,False,,8.0,Castro/Upper Market,"(37.7652067507312, -122.426591617441)"
3,M161901,1200 Block Of 9th Avenue,San Francisco,CA,94122,2016-06-02,False,False,False,False,...,False,False,False,False,False,False,,5.0,Inner Sunset,"(37.764977785911, -122.4664456379)"
4,M162428,1400 Block Of Larkin Street,San Francisco,CA,94109,2016-02-28,False,False,False,False,...,False,False,False,False,False,False,,3.0,Nob Hill,"(37.7912373166567, -122.419197310472)"


In [29]:
df = df.dropna(subset=['Location'])
len(df)

36289

In [42]:
mindate = date(2007, 8, 15) # This is the minimum date for which we have home values from Zillow.
lookbackperiod_mos = 12 # This is the number of lookback months to find change in home values prior to an eviction
df = df[df['File Date'] > (mindate + relativedelta(months=+lookbackperiod_mos)) ]
len(df)

14510

In [None]:
# Function appends data from Zillow API and data file using neighborhood()  
def addPricingData(row, lookback):
    latlng = row['Location']
    lat, lng = latlng.split(',')
    lat = float(lat.replace('(','')) # Reformat lat, lng to separate arguments.
    lng = float(lng.replace(')',''))
    evictdate = row['File Date'].date() # Convert eviction date and create date keys for eviciton date and starting valuation date. 
    startdate = evictdate + relativedelta(months=-lookback)
    evictdate_key = date.strftime(evictdate,"%b%Y")
    startdate_key = date.strftime(startdate, "%b%Y")
    neighborhood = nearest_neighborhood(lat, lng) # Create neighborhood object for lat, lng.
    name = neighborhood.name
    meanvalue = neighborhood.meanvalue
    s = neighborhood.prices[startdate_key]
    c = neighborhood.prices[evictdate_key]
    valueincrease = (c - s)/s
    return pd.Series({'Eviction_id':row['Eviction ID'],'Zillow_Neighborhood':name, 'Z_Mean_Value':meanvalue, 'Z_Value_Increase':valueincrease}) 
    return name

# Merge the results of our calls to neighborhood() back to our dataframe.
df = df.merge(df.apply(lambda row: addPricingData(row, lookbackperiod_mos), axis = 1), left_on='Eviction ID', right_on='Eviction_id')
df


In [46]:
df.dtypes

Eviction ID                                    object
Address                                        object
City                                           object
State                                          object
Eviction Notice Source Zipcode                 object
File Date                              datetime64[ns]
Non Payment                                      bool
Breach                                           bool
Nuisance                                         bool
Illegal Use                                      bool
Failure to Sign Renewal                          bool
Access Denial                                    bool
Unapproved Subtenant                             bool
Owner Move In                                    bool
Demolition                                       bool
Capital Improvement                              bool
Substantial Rehab                                bool
Ellis Act WithDrawal                             bool
Condo Conversion            

In [49]:
aggregation = {
    'Owner Move In' : {
        'total_move_in' : 'sum'
    },
    'Condo Conversion' : {
         'total_condo' : 'sum'
    },
    'Ellis Act WithDrawal' : {
         'total_ellis' : 'sum'
    },
    'Eviction ID' : {
        'total_evictions' : 'count'
    }
}
grouped = df.groupby(['Zillow_Neighborhood']).agg(aggregation).reset_index()

grouped

Unnamed: 0_level_0,Zillow_Neighborhood,Owner Move In,Condo Conversion,Ellis Act WithDrawal,Eviction ID
Unnamed: 0_level_1,Unnamed: 1_level_1,total_move_in,total_condo,total_ellis,total_evictions
0,Alamo Square,26.0,2.0,17.0,195
1,Anza Vista,38.0,2.0,6.0,181
2,Balboa Terrace,0.0,0.0,0.0,6
3,Bayview,37.0,0.0,6.0,319
4,Bernal Heights,109.0,7.0,68.0,490
5,Buena Vista Park,15.0,0.0,16.0,120
6,Central Richmond,80.0,2.0,33.0,305
7,Central Sunset,82.0,0.0,24.0,229
8,Central Waterfront - Dogpatch,4.0,1.0,0.0,16
9,Chinatown,5.0,0.0,5.0,189


In [50]:
grouped['pct_move_in'] = grouped[('Owner Move In','total_move_in')]/grouped[('Eviction ID', 'total_evictions')]
grouped['pct_condo'] = grouped[('Condo Conversion','total_condo')]/grouped[('Eviction ID', 'total_evictions')]
grouped['pct_ellis'] = grouped[('Ellis Act WithDrawal','total_ellis')]/grouped[('Eviction ID', 'total_evictions')]
grouped['pct_suspect'] = (grouped[('Owner Move In','total_move_in')] + grouped[('Ellis Act WithDrawal','total_ellis')] + grouped[('Condo Conversion','total_condo')] 
                         )/grouped[('Eviction ID', 'total_evictions')]

grouped


Unnamed: 0_level_0,Zillow_Neighborhood,Owner Move In,Condo Conversion,Ellis Act WithDrawal,Eviction ID,pct_move_in,pct_condo,pct_ellis,pct_suspect
Unnamed: 0_level_1,Unnamed: 1_level_1,total_move_in,total_condo,total_ellis,total_evictions,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,Alamo Square,26.0,2.0,17.0,195,0.133333,0.010256,0.087179,0.230769
1,Anza Vista,38.0,2.0,6.0,181,0.209945,0.011050,0.033149,0.254144
2,Balboa Terrace,0.0,0.0,0.0,6,0.000000,0.000000,0.000000,0.000000
3,Bayview,37.0,0.0,6.0,319,0.115987,0.000000,0.018809,0.134796
4,Bernal Heights,109.0,7.0,68.0,490,0.222449,0.014286,0.138776,0.375510
5,Buena Vista Park,15.0,0.0,16.0,120,0.125000,0.000000,0.133333,0.258333
6,Central Richmond,80.0,2.0,33.0,305,0.262295,0.006557,0.108197,0.377049
7,Central Sunset,82.0,0.0,24.0,229,0.358079,0.000000,0.104803,0.462882
8,Central Waterfront - Dogpatch,4.0,1.0,0.0,16,0.250000,0.062500,0.000000,0.312500
9,Chinatown,5.0,0.0,5.0,189,0.026455,0.000000,0.026455,0.052910


In [48]:
ax = sns.barplot(x='Neighborhoods - Analysis Boundaries', y='pct_suspect', data=grouped)
ax.set_xticklabels(labels=grouped['Neighborhoods - Analysis Boundaries'], rotation=90)
plt.show()

ValueError: Could not interpret input 'Neighborhoods - Analysis Boundaries'

In [None]:
ax = sns.barplot(x='Neighborhoods - Analysis Boundaries', y='pct_ellis', data=grouped)
ax.set_xticklabels(labels=grouped['Neighborhoods - Analysis Boundaries'], rotation=90)
plt.show()

In [None]:
ax = sns.barplot(x='Neighborhoods - Analysis Boundaries', y='pct_move_in', data=grouped)
ax.set_xticklabels(labels=grouped['Neighborhoods - Analysis Boundaries'], rotation=90)
plt.show()

In [45]:
byhood = pd.DataFrame(df.groupby(['Zillow_Neighborhood']).agg({'Eviction ID' : ['count']})).reset_index()
byhood

Unnamed: 0_level_0,Zillow_Neighborhood,Eviction ID
Unnamed: 0_level_1,Unnamed: 1_level_1,count
0,Alamo Square,195
1,Anza Vista,181
2,Balboa Terrace,6
3,Bayview,319
4,Bernal Heights,490
5,Buena Vista Park,120
6,Central Richmond,305
7,Central Sunset,229
8,Central Waterfront - Dogpatch,16
9,Chinatown,189
