## Experimenting with Baltimore 911 Call Data

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style("darkgrid")

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold

In [2]:
# Location may be different

f_loc = 'dat/'
fname = '911_Police_Calls_for_Service.csv'

In [3]:
dat = pd.read_csv(f_loc + fname, low_memory=False)

In [4]:
# reduce overhead while messing with data
#dat = dat.head(1000)

dat.head()

Unnamed: 0,RecordID,CallNumber,CallDateTime,Priority,District,Description,IncidentLocation,ZipCode,Neighborhood,PoliceDistrict,PolicePost,CouncilDistrict,SheriffDistricts,Community_Statistical_Areas,Census_Tracts,VRIZones,Location,2010 Census Neighborhoods,2010 Census Wards Precincts,Zip Codes
0,2,P190531375,02/22/2019 10:08:00 AM,Non-Emergency,WD,Hot Spot Check,1600 BLK N SMALLWOOD ST,21216.0,Coppin Heights/Ash-Co-East,Western,723.0,7.0,D9,Greater Rosemont,Census Tract 1503,Western,"1600 BLK N SMALLWOOD ST\nBALTIMORE, MD",,,
1,6145754,P191011722,04/11/2019 10:45:00 AM,Non-Emergency,SW,Business Check,3400 CLIFTON AV,21216.0,Mount Holly,Southwestern,812.0,7.0,D9,Greater Rosemont,Census Tract 1506,,"3400 CLIFTON AV\nBALTIMORE, MD",,,
2,6145643,P191011721,04/11/2019 10:45:00 AM,Non-Emergency,SD,Foot Patrol,HOLLINS MARKET,21223.0,Hollins Market,Southern,931.0,9.0,D8,Poppleton/The Terraces/Hollins Market,Census Tract 1803,,"HOLLINS MARKET BALTIMORE, MD",,,
3,887522,P181132068,04/23/2018 05:06:00 PM,Non-Emergency,SD,Hot Spot Check,FEDERAL HILL PARK,,,,,,,,,,"FEDERAL HILL PARK BALTIMORE, MD\n(38.692018, -...",,,9656.0
4,6144900,P191010884,04/11/2019 07:22:00 AM,Non-Emergency,SD,ANIMAL DISTURBAN,2000 CHRISTIAN ST,21223.0,Carrollton Ridge,Southern,934.0,9.0,D8,Southwest Baltimore,Census Tract 2003,Tri-District,"2000 CHRISTIAN ST\nBALTIMORE, MD",,,


In [5]:
dat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8277479 entries, 0 to 8277478
Data columns (total 20 columns):
 #   Column                       Dtype  
---  ------                       -----  
 0   RecordID                     int64  
 1   CallNumber                   object 
 2   CallDateTime                 object 
 3   Priority                     object 
 4   District                     object 
 5   Description                  object 
 6   IncidentLocation             object 
 7   ZipCode                      float64
 8   Neighborhood                 object 
 9   PoliceDistrict               object 
 10  PolicePost                   float64
 11  CouncilDistrict              float64
 12  SheriffDistricts             object 
 13  Community_Statistical_Areas  object 
 14  Census_Tracts                object 
 15  VRIZones                     object 
 16  Location                     object 
 17  2010 Census Neighborhoods    float64
 18  2010 Census Wards Precincts  float64
 19  

In [6]:
# Convert priority into categorical data
from pandas.api.types import CategoricalDtype

priority_cats = CategoricalDtype(categories=['Non-Emergency', 'Low', 'Medium',  'High'], ordered=True)
dat['Priority'] = dat['Priority'].astype(priority_cats)

dat.insert(4, "Priority_Cat", dat['Priority'].cat.codes, True)

dat.head()

Unnamed: 0,RecordID,CallNumber,CallDateTime,Priority,Priority_Cat,District,Description,IncidentLocation,ZipCode,Neighborhood,...,PolicePost,CouncilDistrict,SheriffDistricts,Community_Statistical_Areas,Census_Tracts,VRIZones,Location,2010 Census Neighborhoods,2010 Census Wards Precincts,Zip Codes
0,2,P190531375,02/22/2019 10:08:00 AM,Non-Emergency,0,WD,Hot Spot Check,1600 BLK N SMALLWOOD ST,21216.0,Coppin Heights/Ash-Co-East,...,723.0,7.0,D9,Greater Rosemont,Census Tract 1503,Western,"1600 BLK N SMALLWOOD ST\nBALTIMORE, MD",,,
1,6145754,P191011722,04/11/2019 10:45:00 AM,Non-Emergency,0,SW,Business Check,3400 CLIFTON AV,21216.0,Mount Holly,...,812.0,7.0,D9,Greater Rosemont,Census Tract 1506,,"3400 CLIFTON AV\nBALTIMORE, MD",,,
2,6145643,P191011721,04/11/2019 10:45:00 AM,Non-Emergency,0,SD,Foot Patrol,HOLLINS MARKET,21223.0,Hollins Market,...,931.0,9.0,D8,Poppleton/The Terraces/Hollins Market,Census Tract 1803,,"HOLLINS MARKET BALTIMORE, MD",,,
3,887522,P181132068,04/23/2018 05:06:00 PM,Non-Emergency,0,SD,Hot Spot Check,FEDERAL HILL PARK,,,...,,,,,,,"FEDERAL HILL PARK BALTIMORE, MD\n(38.692018, -...",,,9656.0
4,6144900,P191010884,04/11/2019 07:22:00 AM,Non-Emergency,0,SD,ANIMAL DISTURBAN,2000 CHRISTIAN ST,21223.0,Carrollton Ridge,...,934.0,9.0,D8,Southwest Baltimore,Census Tract 2003,Tri-District,"2000 CHRISTIAN ST\nBALTIMORE, MD",,,


In [7]:
# Add a 24 Hour time column
times = dat['CallDateTime'].str[11:]
times = pd.to_datetime(times).dt.strftime("%H:%M:%S")
dat.insert(3, "Time", times, True)

In [8]:
# Add categories for times of day
hours = dat['Time'].str[:2] # hour only
hours = pd.to_numeric(hours) # as int

b = [0,6,12,18,24]
l = ['Early Morning', 'Morning', 'Afternoon',  'Evening']
timeofday = pd.cut(hours, bins=b, labels=l, include_lowest=True)
#timeofday.unique()

dat['TimeOfDay'] = timeofday
dat['TimeOf_Cat'] = dat['TimeOfDay'].cat.codes

dat.head()

Unnamed: 0,RecordID,CallNumber,CallDateTime,Time,Priority,Priority_Cat,District,Description,IncidentLocation,ZipCode,...,SheriffDistricts,Community_Statistical_Areas,Census_Tracts,VRIZones,Location,2010 Census Neighborhoods,2010 Census Wards Precincts,Zip Codes,TimeOfDay,TimeOf_Cat
0,2,P190531375,02/22/2019 10:08:00 AM,10:08:00,Non-Emergency,0,WD,Hot Spot Check,1600 BLK N SMALLWOOD ST,21216.0,...,D9,Greater Rosemont,Census Tract 1503,Western,"1600 BLK N SMALLWOOD ST\nBALTIMORE, MD",,,,Morning,1
1,6145754,P191011722,04/11/2019 10:45:00 AM,10:45:00,Non-Emergency,0,SW,Business Check,3400 CLIFTON AV,21216.0,...,D9,Greater Rosemont,Census Tract 1506,,"3400 CLIFTON AV\nBALTIMORE, MD",,,,Morning,1
2,6145643,P191011721,04/11/2019 10:45:00 AM,10:45:00,Non-Emergency,0,SD,Foot Patrol,HOLLINS MARKET,21223.0,...,D8,Poppleton/The Terraces/Hollins Market,Census Tract 1803,,"HOLLINS MARKET BALTIMORE, MD",,,,Morning,1
3,887522,P181132068,04/23/2018 05:06:00 PM,17:06:00,Non-Emergency,0,SD,Hot Spot Check,FEDERAL HILL PARK,,...,,,,,"FEDERAL HILL PARK BALTIMORE, MD\n(38.692018, -...",,,9656.0,Afternoon,2
4,6144900,P191010884,04/11/2019 07:22:00 AM,07:22:00,Non-Emergency,0,SD,ANIMAL DISTURBAN,2000 CHRISTIAN ST,21223.0,...,D8,Southwest Baltimore,Census Tract 2003,Tri-District,"2000 CHRISTIAN ST\nBALTIMORE, MD",,,,Morning,1


In [9]:
# build datafram for regression

X = dat['Neighborhood']
y = dat['Priority_Cat']

X = pd.DataFrame(X)
y = pd.DataFrame(y)

In [10]:
# 1 hot for Neighborhoods
onehotX = pd.get_dummies(X['Neighborhood'], prefix="N_")
X = X.join(onehotX)
X.drop(columns="Neighborhood", inplace=True)

In [11]:
X = X.join(dat['TimeOf_Cat'])

In [12]:
X.head()

Unnamed: 0,N__Abell,N__Allendale,N__Arcadia,N__Arlington,N__Armistead Gardens,N__Ashburton,N__Baltimore Highlands,N__Barclay,N__Barre Circle,N__Bayview,...,N__Woodberry,N__Woodbourne Heights,N__Woodbourne-McCabe,N__Woodmere,N__Wrenlane,N__Wyman Park,N__Wyndhurst,N__Yale Heights,N__York-Homeland,TimeOf_Cat
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [None]:
# Go Man Go!

model = LinearRegression()
scores = []
kfold = KFold(n_splits=3, shuffle=True, random_state=42)
for i, (train, test) in enumerate(kfold.split(X, y)):
    model.fit(X.iloc[train,:], y.iloc[train,:])
    scores.append(model.score(X.iloc[test,:], y.iloc[test,:]))
print(scores)