In [3]:
import pysal as pysal
import numpy as np

In [8]:
# grid data pathways
shp_path = "../../data/clean/gridDataCurrent.shp"
dbf_path = "../../data/clean/gridDataCurrent.dbf"

In [9]:
# store function to open grid shapefile
f = pysal.open(dbf_path)
# View columns in the dataset
print(f.header)

['OBJECTID_1', 'OBJECTID', 'PageName', 'PageNumber', 'area_km', 'pop16', 'nhblk16', 'blk16', 'pov16', 'pov_det16', 'hu16', 'oohu16', 'pop70', 'black70', 'pov_det70', 'pov70', 'p_nhblk16', 'p_blk16', 'p_pov16', 'p_oohu16', 'p_black70', 'p_pov70', 'p_popChg', 'p_blkChg', 'p_povChg', 'pop80', 'nhblk80', 'dst_bk80', 'barriers', 'v_crimes', 'vc_rate', 'OBJECTID_2', 'p_vac16', 'p_vac80', 'p_vac70', 'p_vacChg80', 'p_vacChg70', 'Shape_Area', 'Shape_Leng', 'Shape_Ar_1']


In [11]:
# create Rook spatial weight spatial weight
w = pysal.weights.Rook.from_shapefile(shp_path)
print(w.n) # view number of observations
print(w.histogram) # view histogram of neighbors

205
[(2, 33), (3, 22), (4, 150)]


In [13]:
# read in dependent variables: St. louis violent crime counts and barrier counts
y1 = np.array([f.by_col('v_crimes')]).T
print(y1.shape)
y2 = np.array([f.by_col('barriers')]).T
print(y2.shape)

(205, 1)
(205, 1)


In [14]:
# create global moran's i
mi1 = pysal.Moran(y1, w, two_tailed=False)
print("Global Moran's I For Violent Crime Counts " "%.5f"%mi1.I)
print("P-Value " "%.5f"%mi1.p_norm)

mi2 = pysal.Moran(y2, w, two_tailed=False)
print("Global Moran's I For Barrier Counts " "%.5f"%mi2.I)
print("P-Value " "%.5f"%mi2.p_norm)

Global Moran's I For Violent Crime Counts 0.58347
P-Value 0.00000
Global Moran's I For Barrier Counts 0.45146
P-Value 0.00000


## Regression

In [15]:
# Create arrays for explanatory variables.
x1_names = ['p_nhblk16', 'p_pov16', 'p_popChg', 'p_vac16', 'p_vacChg70', 'barriers']
x1 = np.array([f.by_col(var) for var in x1_names]).T
print(x1.shape)

x2_names = ['p_nhblk16', 'p_pov16', 'p_popChg', 'p_vac16', 'pop16', 'barriers']
x2 = np.array([f.by_col(var) for var in x2_names]).T
print(x2.shape)

x3_names = ['p_nhblk16', 'p_pov16', 'p_popChg', 'p_vac16', 'pop16']
x3 = np.array([f.by_col(var) for var in x3_names]).T
print(x3.shape)

x4_names = ['barriers']
x4 = np.array([f.by_col(var) for var in x4_names]).T
print(x4.shape)

(205, 6)
(205, 6)
(205, 5)
(205, 1)


In [16]:
# create name for dependent variables
y1_name = 'v_crimes'
y2_name = 'barriers'

In [18]:
# Crime Count Models - OLS and GMM Spatial Lag

ols_vc_count_bar = pysal.spreg.OLS(y1, x4, w=w, name_y=y1_name, name_x=x4_names, spat_diag=True, moran=True, white_test=True, robust='white')
print(ols_vc_count_bar.summary)

ols_vc_count = pysal.spreg.OLS(y1, x2, w=w, name_y=y1_name, name_x=x2_names, spat_diag=True, moran=True, white_test=True, robust='white')
print(ols_vc_count.summary)

gmm_lag_vc_count = pysal.spreg.twosls_sp.GM_Lag(y1, x2, w=w, name_y=y1_name, name_x=x2_names, w_lags=1, spat_diag=True, robust='white')
print(gmm_lag_vc_count.summary)

REGRESSION
----------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :     unknown
Weights matrix      :     unknown
Dependent Variable  :    v_crimes                Number of Observations:         205
Mean dependent var  :     28.0732                Number of Variables   :           2
S.D. dependent var  :     33.3659                Degrees of Freedom    :         203
R-squared           :      0.1028
Adjusted R-squared  :      0.0984
Sum squared residual:  203769.198                F-statistic           :     23.2526
Sigma-square        :    1003.789                Prob(F-statistic)     :   2.783e-06
S.E. of regression  :      31.683                Log likelihood        :    -998.310
Sigma-square ML     :     993.996                Akaike info criterion :    2000.620
S.E of regression ML:     31.5277                Schwarz criterion     :    2007.266

White Standard Errors
-------------------------------------------------------

In [20]:
## Barrier Location Models - OLS and GMM Spatial Lag

ols_barriers = pysal.spreg.OLS(y2, x3, w=w, name_y=y2_name, name_x=x3_names, spat_diag=True, moran=True, white_test=True, robust='white')
print(ols_barriers.summary)

gmm_lag_barriers = pysal.spreg.twosls_sp.GM_Lag(y2, x3, w=w, name_y=y2_name, name_x=x3_names, w_lags=1, spat_diag=True, robust='white')
print(gmm_lag_barriers.summary)

REGRESSION
----------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :     unknown
Weights matrix      :     unknown
Dependent Variable  :    barriers                Number of Observations:         205
Mean dependent var  :      1.3659                Number of Variables   :           6
S.D. dependent var  :      2.7756                Degrees of Freedom    :         199
R-squared           :      0.1899
Adjusted R-squared  :      0.1696
Sum squared residual:    1273.084                F-statistic           :      9.3312
Sigma-square        :       6.397                Prob(F-statistic)     :   5.306e-08
S.E. of regression  :       2.529                Log likelihood        :    -478.067
Sigma-square ML     :       6.210                Akaike info criterion :     968.133
S.E of regression ML:      2.4920                Schwarz criterion     :     988.071

White Standard Errors
-------------------------------------------------------