In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
from datetime import datetime

In [None]:
df = pd.read_csv('ems_calls_routes_500.csv')
print(f"✅ Loaded {len(df)} documents from ems_calls_routes_500.csv.")
print("--- DataFrame Columns ---")
print(df.columns.tolist())
print("-------------------------")
display(df.head())

✅ Loaded 500 documents from ems_calls_routes_500.csv.

--- DataFrame Columns ---
['CallNumber', 'UnitID', 'IncidentNumber', 'CallType', 'CallDate', 'WatchDate', 'ReceivedDtTm', 'EntryDtTm', 'DispatchDtTm', 'ResponseDtTm', 'OnSceneDtTm', 'TransportDtTm', 'HospitalDtTm', 'CallFinalDisposition', 'AvailableDtTm', 'Address', 'City', 'Zipcode', 'Battalion', 'StationArea', 'Box', 'OriginalPriority', 'Priority', 'FinalPriority', 'ALSUnit', 'CallTypeGroup', 'NumberOfAlarms', 'UnitType', 'UnitSequenceInCallDispatch', 'FirePreventionDistrict', 'SupervisorDistrict', 'Neighborhood', 'Location', 'RowID', 'Delay', 'Route', 'Destination', 'DestinationZip', 'DestinationName', 'Coordinates', 'IncidentLocation', 'TransportDistance', 'TransportTime', 'TravelTime', 'StartTime', 'EndTime', 'StartCoordinates', 'EndCoordinates', 'DepartureTime', 'ArrivalTime', 'TransportDuration', 'EncodedPolyline', 'Latitude', 'Longitude', 'DestinationLat', 'DestinationLon', 'Polyline']
-------------------------


Unnamed: 0,CallNumber,UnitID,IncidentNumber,CallType,CallDate
0,230010023,M23,23000201,Medical Incident,01/01/2023
1,230010156,M15,23000494,Medical Incident,01/01/2023
2,230010167,94,23000531,Medical Incident,01/01/2023
3,230010203,M05,23000632,Medical Incident,01/01/2023
4,230010204,M25,23000642,Medical Incident,01/01/2023


In [3]:
if not df.empty:
    time_cols = ['ReceivedDtTm', 'ResponseDtTm', 'OnSceneDtTm', 'AvailableDtTm']
    for col in time_cols:
        df[col] = pd.to_datetime(df[col], errors='coerce')

    df['Response_Time_min'] = (df['ResponseDtTm'] - df['ReceivedDtTm']).dt.total_seconds() / 60
    df['OnScene_Time_min'] = (df['AvailableDtTm'] - df['OnSceneDtTm']).dt.total_seconds() / 60

    print("✅ Data cleaning and time calculations complete.")
else:
    print("DataFrame is empty. Skipping cleaning.")

✅ Data cleaning and time calculations complete.


In [4]:
from sklearn.model_selection import train_test_split
import pandas as pd

model_df = df.copy()

subset_cols = ['Response_Time_min', 'CallType', 'UnitType', 'ReceivedDtTm']
model_df.dropna(subset=subset_cols, inplace=True)

model_df['ReceivedDtTm'] = pd.to_datetime(model_df['ReceivedDtTm'], errors='coerce')
model_df.dropna(subset=['ReceivedDtTm'], inplace=True)
model_df['CallHour'] = model_df['ReceivedDtTm'].dt.hour

features = ['CallType', 'UnitType', 'CallHour']
target = 'Response_Time_min'

X = model_df[features]
y = model_df[target]

X_encoded = pd.get_dummies(X, columns=['CallType', 'UnitType'], drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

print("Data preparation for regression complete.")
print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")

Data preparation for regression complete.
Training set shape: (398, 5)
Testing set shape: (100, 5)


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

print("Training the Random Forest Regressor model...")

model = RandomForestRegressor(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

print("Model training complete.")

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("--- Model Performance ---")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f} minutes")
print(f"R-squared (R²): {r2:.2f}")

print(f"RMSE is a measure of the typical size of the model's prediction error. A lower RMSE is better.")
print(f"R² measures how much of the variance in the response time is explained by the model. A score closer to 1.0 is better.")

Training the Random Forest Regressor model...
Model training complete.

--- Model Performance ---
Root Mean Squared Error (RMSE): 2.50 minutes
R-squared (R²): 0.11

RMSE is a measure of the typical size of the model's prediction error. A lower RMSE is better.
R² measures how much of the variance in the response time is explained by the model. A score closer to 1.0 is better.
