In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout, Concatenate, Input,Flatten
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.constraints import MaxNorm
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.stattools import adfuller

In [56]:
import xgboost as xgb

In [68]:
df = pd.read_csv('dispatch_data_updated.csv')

In [69]:
df = df[['ZIPCODE', 'INITIAL_CALL_TYPE', 'POLICEPRECINCT', 'INCIDENT_RESPONSE_SECONDS_QY', 
         'INCIDENT_DATETIME','Nearest_Traffic_Vol','Distance_m']]

In [70]:
zipcode_encoder = LabelEncoder()
call_type_encoder = LabelEncoder()
borough_encoder = LabelEncoder()
precinct_encoder = LabelEncoder()
df['PRECINCT_encoded'] = precinct_encoder.fit_transform(df['POLICEPRECINCT'].astype(str))
df['ZIPCODE_encoded'] = zipcode_encoder.fit_transform(df['ZIPCODE'].astype(str))
df['CALL_TYPE_encoded'] = call_type_encoder.fit_transform(df['INITIAL_CALL_TYPE'].astype(str))

In [71]:
df['INCIDENT_DATETIME'] = pd.to_datetime(df['INCIDENT_DATETIME'])
df['hour'] = df['INCIDENT_DATETIME'].dt.hour
df['day'] = df['INCIDENT_DATETIME'].dt.day
df['month'] = df['INCIDENT_DATETIME'].dt.month
df['day_of_week'] = df['INCIDENT_DATETIME'].dt.dayofweek

In [72]:
zipcode_avg_response = df.groupby('ZIPCODE')['INCIDENT_RESPONSE_SECONDS_QY'].mean().reset_index()
zipcode_avg_response.rename(columns={'INCIDENT_RESPONSE_SECONDS_QY': 'average_response_time'}, inplace=True)

# Merge the average response time back to the main dataset
df = pd.merge(df, zipcode_avg_response[['ZIPCODE', 'average_response_time']], on='ZIPCODE', how='left')

In [73]:
df.head()

Unnamed: 0,ZIPCODE,INITIAL_CALL_TYPE,POLICEPRECINCT,INCIDENT_RESPONSE_SECONDS_QY,INCIDENT_DATETIME,Nearest_Traffic_Vol,Distance_m,PRECINCT_encoded,ZIPCODE_encoded,CALL_TYPE_encoded,hour,day,month,day_of_week,average_response_time
0,10306.0,CVAC,122.0,329.0,2016-11-17 13:23:36,11.0,719.542681,20,46,31,13,17,11,3,447.597523
1,11434.0,DIFFBR,113.0,510.0,2012-04-23 09:38:42,32.0,640.678352,15,168,34,9,23,4,0,539.729332
2,11205.0,SICK,88.0,230.0,2012-04-22 15:53:38,649.0,86.818935,73,89,91,15,22,4,6,462.810881
3,10314.0,ALTMEN,121.0,322.0,2020-12-22 10:45:37,133.0,427.442315,19,52,3,10,22,12,1,466.060138
4,11426.0,EDP,105.0,474.0,2008-05-09 09:21:50,55.0,281.350053,7,161,40,9,9,5,4,459.363636


In [74]:
X = df[['ZIPCODE_encoded', 'CALL_TYPE_encoded', 'hour', 'day', 'month', 
        'day_of_week', 'POLICEPRECINCT', 'average_response_time','Distance_m','Nearest_Traffic_Vol']]

# Define target variable
y = df['INCIDENT_RESPONSE_SECONDS_QY']

In [75]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create DMatrix for training and testing
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [76]:
params = {
    'objective': 'reg:squarederror',  
    'eval_metric': 'rmse',
    'max_depth': 6,
    'eta': 0.1,                 
    'subsample': 0.8,          
    'colsample_bytree': 0.8
}

In [77]:
num_boost_round = 100  # Number of boosting rounds
model = xgb.train(params, dtrain, num_boost_round=num_boost_round)

In [83]:
y_pred = model.predict(dtest)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Root Mean Squared Error (RMSE): {np.sqrt(mse)}")
print(f"R-squared (R²): {r2}")

Root Mean Squared Error (RMSE): 243.9542447893702
R-squared (R²): 0.7575501446692506


In [84]:
mae = mean_absolute_error(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")

Mean Absolute Error (MAE): 154.08577593985052


In [81]:
#df.to_csv('dispatch_data_updated.csv', index=False)

#print("DataFrame saved as 'updated_incident_data.csv'")