In [101]:
from shapely.geometry import Point, Polygon
import fiona 
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', None)

%run ../python_files/feature_selection

In [102]:
# import data
puds = pd.read_csv('../data/final_datasets/master_puds_tracts.csv')

In [103]:
# feature engineering
puds = create_demo_col(puds)
minipuds = count_puds(puds)

In [153]:
# set up dependent var
outcome = 'eviction-rate'

In [154]:
# set up single linear regression
x_cols = minipuds['pud_count']

X = minipuds['pud_count'].values
y = minipuds[outcome]

# # fit model01
X = sm.add_constant(X)
model = sm.OLS(y, X, hasconst=True )
result = model.fit()
labels = ['intercept'] + ['pud_count']
result.summary(xname=labels)

0,1,2,3
Dep. Variable:,eviction-rate,R-squared:,0.003
Model:,OLS,Adj. R-squared:,-0.003
Method:,Least Squares,F-statistic:,0.5687
Date:,"Thu, 28 Nov 2019",Prob (F-statistic):,0.452
Time:,21:40:38,Log-Likelihood:,-426.08
No. Observations:,173,AIC:,856.2
Df Residuals:,171,BIC:,862.5
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,2.9364,0.253,11.627,0.000,2.438,3.435
pud_count,-0.0596,0.079,-0.754,0.452,-0.216,0.096

0,1,2,3
Omnibus:,36.424,Durbin-Watson:,0.897
Prob(Omnibus):,0.0,Jarque-Bera (JB):,51.618
Skew:,1.253,Prob(JB):,6.19e-12
Kurtosis:,3.937,Cond. No.,3.82


In [155]:
# set up multiple linear regression
x_cols = ['pct-non-white','poverty-rate', 'pct-renter-occupied','pud_count']

minitest = minipuds[x_cols]

X = minitest.values
y = minipuds[outcome]

# fit model02
X = sm.add_constant(X)
model = sm.OLS(y, X, hasconst=True )
result = model.fit()
labels = ['intercept'] + x_cols
result.summary(xname=labels)

0,1,2,3
Dep. Variable:,eviction-rate,R-squared:,0.548
Model:,OLS,Adj. R-squared:,0.537
Method:,Least Squares,F-statistic:,50.93
Date:,"Thu, 28 Nov 2019",Prob (F-statistic):,5.01e-28
Time:,21:41:02,Log-Likelihood:,-357.67
No. Observations:,173,AIC:,725.3
Df Residuals:,168,BIC:,741.1
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,-1.9855,0.660,-3.009,0.003,-3.288,-0.683
pct-non-white,0.0553,0.007,7.500,0.000,0.041,0.070
poverty-rate,0.0382,0.019,2.056,0.041,0.002,0.075
pct-renter-occupied,0.0106,0.010,1.092,0.276,-0.009,0.030
pud_count,-0.0136,0.057,-0.236,0.814,-0.127,0.100

0,1,2,3
Omnibus:,63.725,Durbin-Watson:,1.799
Prob(Omnibus):,0.0,Jarque-Bera (JB):,246.414
Skew:,1.378,Prob(JB):,3.0999999999999995e-54
Kurtosis:,8.157,Cond. No.,420.0


In [150]:
# set up single linear regression
x_cols = 'pct-non-white'

X = minipuds[x_cols].values
y = minipuds[outcome]

# # fit model03
X = sm.add_constant(X)
model = sm.OLS(y, X, hasconst=True )
result = model.fit()
labels = ['intercept'] + [x_cols]
result.summary(xname=labels)

0,1,2,3
Dep. Variable:,eviction-rate,R-squared:,0.511
Model:,OLS,Adj. R-squared:,0.508
Method:,Least Squares,F-statistic:,178.9
Date:,"Thu, 28 Nov 2019",Prob (F-statistic):,2.19e-28
Time:,21:33:58,Log-Likelihood:,-364.43
No. Observations:,173,AIC:,732.9
Df Residuals:,171,BIC:,739.2
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,-1.7911,0.378,-4.737,0.000,-2.537,-1.045
pct-non-white,0.0693,0.005,13.376,0.000,0.059,0.080

0,1,2,3
Omnibus:,33.245,Durbin-Watson:,1.64
Prob(Omnibus):,0.0,Jarque-Bera (JB):,66.18
Skew:,0.892,Prob(JB):,4.26e-15
Kurtosis:,5.449,Cond. No.,181.0


In [156]:
# set up single linear regression
x_cols = 'poverty-rate'

X = minipuds[x_cols].values
y = minipuds[outcome]

# # fit model04
X = sm.add_constant(X)
model = sm.OLS(y, X, hasconst=True )
result = model.fit()
labels = ['intercept'] + [x_cols]
result.summary(xname=labels)

0,1,2,3
Dep. Variable:,eviction-rate,R-squared:,0.381
Model:,OLS,Adj. R-squared:,0.377
Method:,Least Squares,F-statistic:,105.3
Date:,"Thu, 28 Nov 2019",Prob (F-statistic):,1.51e-19
Time:,21:41:11,Log-Likelihood:,-384.87
No. Observations:,173,AIC:,773.7
Df Residuals:,171,BIC:,780.1
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,1.0474,0.245,4.283,0.000,0.565,1.530
poverty-rate,0.1256,0.012,10.260,0.000,0.101,0.150

0,1,2,3
Omnibus:,82.147,Durbin-Watson:,1.578
Prob(Omnibus):,0.0,Jarque-Bera (JB):,395.424
Skew:,1.753,Prob(JB):,1.3600000000000001e-86
Kurtosis:,9.525,Cond. No.,28.6


In [157]:
# set up single linear regression
x_cols = 'pct-renter-occupied'

X = minipuds[x_cols].values
y = minipuds[outcome]

# # fit model05
X = sm.add_constant(X)
model = sm.OLS(y, X, hasconst=True )
result = model.fit()
labels = ['intercept'] + [x_cols]
result.summary(xname=labels)

0,1,2,3
Dep. Variable:,eviction-rate,R-squared:,0.092
Model:,OLS,Adj. R-squared:,0.087
Method:,Least Squares,F-statistic:,17.41
Date:,"Thu, 28 Nov 2019",Prob (F-statistic):,4.78e-05
Time:,21:41:16,Log-Likelihood:,-417.98
No. Observations:,173,AIC:,840.0
Df Residuals:,171,BIC:,846.3
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,0.4370,0.612,0.714,0.476,-0.771,1.645
pct-renter-occupied,0.0421,0.010,4.173,0.000,0.022,0.062

0,1,2,3
Omnibus:,50.426,Durbin-Watson:,1.019
Prob(Omnibus):,0.0,Jarque-Bera (JB):,99.462
Skew:,1.367,Prob(JB):,2.52e-22
Kurtosis:,5.514,Cond. No.,179.0


In [160]:
# set up co-linearity check

y_vif = minipuds[outcome]
## remove Passenger from predictor list
## prepare data for the linear model

X_vif = minipuds[x_cols]
## add intercept term
X_vif = sm.add_constant(X_vif.values)
## fit model
model_vif = sm.OLS(y_vif, X_vif, hasconst=True)
result_vif = model_vif.fit()
## check the r2-score
result_vif.summary()
## calculate vif score directly from r2-score
passenger_vif = 1/(1 - result_vif.rsquared)
passenger_vif

1.1018357699538996

In [116]:
## standard scaling 
# for col in x_cols:
#     ## Here we don't have to do this but still it is a good practice
#     if (type(minipuds[col]) == int) | (type(minipuds[col]) == float):
#         minipuds[col] = (minipuds[col] - minipuds[col].mean())/minipuds[col].std()

In [None]:
test['ward'] = [int(el[-1]) for el in minipuds.ward]