In [None]:
# Alex Tresselt
# CS 7180
# 12/13/18
# Response Time Prediction Model

In [1]:
%pylab inline
import pandas as pd
import numpy as np

from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier

Populating the interactive namespace from numpy and matplotlib


In [2]:
df = pd.read_csv('Call_Data.csv')

In [3]:
# Remove missing data
df.dropna(inplace = True)

In [4]:
# Drop all calls that were not 911 calls.
df = df.drop(df[df['Call Type'] != '911'].index)

In [5]:
# Format time columns
df['Arrived Time'] = pd.to_datetime(df['Arrived Time'], format="%b %d %Y %I:%M:%S:%f%p")
df['Original Time Queued'] = pd.to_datetime(df['Original Time Queued'], format="%m/%d/%Y %I:%M:%S %p")

In [6]:
# Trim dataset to events in 2017. Response time varies by year, so only selecting one year for better results.
df = df.drop(df[df['Original Time Queued'] <= '2017-01-01 00:00:00'].index)
df = df.drop(df[df['Original Time Queued'] >= '2018-01-01 00:00:00'].index)

In [7]:
# Create more time columns 
df['Response Time'] = df['Arrived Time'] - df['Original Time Queued']
df['Rmins'] = df['Response Time'].dt.total_seconds() / 60
df['Rmins'] = round(df['Rmins'])
df['Response'] = round(df['Rmins'], -1)

In [8]:
# Drop all calls with response times of more than 1 day
df = df.drop(df[df['Rmins'] >= 1440].index)
df = df.drop(df[df['Rmins'] <= 0].index)

In [9]:
# Trim all calls with a priority greater than 3
df = df.drop(df[df['Priority'] >= 4].index)

In [10]:
# Drop unneeded columns
df = df.drop(columns=['CAD Event Number', 'Event Clearance Description', 'Final Call Type',  'Arrived Time', 
                      'Response Time', 'Original Time Queued', 'Call Type', 'Rmins', 'Sector', 'Beat'])

In [11]:
# Encode and transform labels for the classifier
le = LabelEncoder()
le.fit(df['Initial Call Type'])
df['Initial Call Type'] = le.transform(df['Initial Call Type'])

In [12]:
leP = LabelEncoder()
leP.fit(df['Precinct'])
df['Precinct'] = leP.transform(df['Precinct'])

In [13]:
df.head()

Unnamed: 0,Priority,Initial Call Type,Precinct,Response
72894,2,160,4,40.0
73541,2,140,0,10.0
73777,2,140,1,40.0
74240,3,136,4,10.0
74502,2,140,0,40.0


In [14]:
# Train/Test split
X = df.iloc[:, :3]  # Priority, Initial Call Type, Precinct
y = df.iloc[:, -1]  # Response
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [15]:
# Fit model
clf_dtc = DecisionTreeClassifier()
clf_dtc.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [16]:
# Predict
y_pred = clf_dtc.predict(X_test)

In [17]:
# Metrics
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.36580965268380694

In [18]:
from sklearn.metrics import precision_recall_fscore_support
precision_recall_fscore_support(y_test, y_pred, average='weighted')

  'precision', 'predicted', average, warn_for)


(0.2772782442874055, 0.36580965268380694, 0.2750577718036385, None)

In [19]:
# Export labels
joblib.dump(le, 'model/RTPle.joblib')
joblib.dump(leP, 'model/RTPleP.joblib')

# Export classifier
joblib.dump(clf_dtc, 'model/RTPmodel.joblib')

['model/RTPmodel.joblib']