# Univariate Demand Forecasting of 911 Calls

## Libraries

In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import matplotlib.pyplot as plt

In [27]:
# Read CSV Data
raw_data = pd.read_csv('CLT_FY18-24.csv')

# (Per PM Directions. 'Other' has potentially relevant fire calls too)
# Filter for EMS
ems_calldata = raw_data[raw_data['CauseCategory'] == 'EMS'].reset_index()

# 'Dispatched' to hold value from pandas datetime func
ems_calldata['Dispatched'] = pd.to_datetime(ems_calldata['Dispatched'])

# Get Date from 'Dispatched'
ems_calldata['Date'] = ems_calldata['Dispatched'].dt.date

# Extract date info and time from 'Dispatched'
ems_calldata['Date'] = ems_calldata['Dispatched'].dt.date
ems_calldata['Year'] = ems_calldata['Dispatched'].dt.year
ems_calldata['Month'] = ems_calldata['Dispatched'].dt.month
ems_calldata['Day'] = ems_calldata['Dispatched'].dt.day
ems_calldata['Time'] = ems_calldata['Dispatched'].dt.time

# Sort by 'Dispatched'
ems_calldata = ems_calldata.sort_values('Dispatched')

# Floor the hour?
#ems_calldata['Time'] = ems_calldata['Dispatched'].dt.floor('H')

# Organize by 'Dispatched' (per hour) and lat&lon
ems_sorted_data = ems_calldata.groupby(['Latitude', 'Longitude', 'Dispatched']).size().reset_index(name='Demand')

# Identify Relevant attributes from Dispatched / Date-Time
ems_sorted_data['day_of_week'] = ems_sorted_data['Dispatched'].dt.dayofweek
ems_sorted_data['hour_of_day'] = ems_sorted_data['Dispatched'].dt.hour

# Normalize Demand (optional but recommended for LSTM ? Why is this?)
# standard deviation?
ems_sorted_data['Demand'] = (ems_sorted_data['Demand'] - ems_sorted_data['Demand'].mean() / ems_sorted_data['Demand'].std())

# Sequence for LSTM - Times Series Length, Hour-based
# Retrieves data as far back as sequence_len specifies
def generate_sequences(data, sequence_len):
    X, y = [], []
    for index in range(len(data) - sequence_len):
        X.append(data[index:index+sequence_len])
        y.append(data[index+sequence_len])
    return np.array(X), np.array(y)

# For 3 days needs 72 hours
current_seq_len = 72
X_seq, y_seq = generate_sequences(ems_sorted_data['Demand'].values, current_seq_len)

# Separate sets for training and testing based on the available data
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

# Prepare for LSTM model building - params(samples, timestamps, features)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(-1)  # Add feature dimension
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32).unsqueeze(-1)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoader for training
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# for batch_X, batch_y in train_loader:
#     print(f"batch_X shape: {batch_X.shape}")  # Should be (batch_size, seq_length, input_size)
#     print(f"batch_y shape: {batch_y.shape}")  # Should be (batch_size, output_size)
#     break


# Call frequency by date
freq_count = ems_calldata.groupby('Date').size().reset_index(name='Count')

freq_count


batch_X shape: torch.Size([32, 72, 1, 1])
batch_y shape: torch.Size([32])


Unnamed: 0,Date,Count
0,2018-07-01,287
1,2018-07-02,285
2,2018-07-03,279
3,2018-07-04,225
4,2018-07-05,267
...,...,...
2187,2024-06-26,220
2188,2024-06-27,240
2189,2024-06-28,218
2190,2024-06-29,207


## Model Building - LTSM

In [28]:
# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)  # LSTM output
        out = self.fc(out[:, -1, :])  # Use the last time step's output
        return out

# Initialize model, loss, and optimizer
model = LSTMModel(input_size=1, hidden_size=50, output_size=1)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 50
for epoch in range(epochs):
    model.train()
    for batch_X, batch_y in train_loader:
        # Fix shapes
        batch_X = batch_X.squeeze(-1)  # Shape: (32, 72, 1)
        batch_y = batch_y.unsqueeze(-1)  # Shape: (32, 1)

        optimizer.zero_grad()
        outputs = model(batch_X)  # Forward pass
        loss = criterion(outputs, batch_y)  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

ValueError: LSTM: Expected input to be 2D or 3D, got 4D instead

## a

In [None]:
# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    y_pred = model(X_test).numpy()

# Flatten predictions and true values for evaluation
y_pred = y_pred.flatten()
y_test = y_test.numpy()

# Evaluate the model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE: {rmse}')

In [None]:
# Apply exponential smoothing to the predictions
smoothed_predictions = ExponentialSmoothing(y_pred, trend='add', seasonal='add', seasonal_periods=24).fit().fittedvalues

# Plot the results
plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Actual Demand')
plt.plot(y_pred, label='LSTM Predictions')
plt.plot(smoothed_predictions, label='Smoothed Predictions')
plt.legend()
plt.show()

In [None]:
# Get the last 72 hours of data
last_72_hours = ems_sorted_data['demand'].values[-current_seq_len:]

# Convert to PyTorch tensor
last_72_hours = torch.tensor(last_72_hours, dtype=torch.float32).unsqueeze(0).unsqueeze(-1)

# Predict the next 8 hours
future_predictions = []
for _ in range(8):
    with torch.no_grad():
        next_hour = model(last_72_hours).item()
    future_predictions.append(next_hour)
    last_72_hours = torch.cat([last_72_hours[:, 1:, :], torch.tensor([[[next_hour]]], dtype=torch.float32)], dim=1)

# Apply exponential smoothing to the future predictions
smoothed_future_predictions = ExponentialSmoothing(future_predictions, trend='add', seasonal='add', seasonal_periods=24).fit().fittedvalues

# Plot the forecast
plt.figure(figsize=(12, 6))
plt.plot(range(8), future_predictions, label='LSTM Forecast')
plt.plot(range(8), smoothed_future_predictions, label='Smoothed Forecast')
plt.legend()
plt.show()