# Univariate Demand Forecasting of 911 Calls

## Libraries

In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset

In [None]:
# Read CSV Data
raw_data = pd.read_csv('CLT_FY18-24.csv')

# (Per PM Directions. 'Other' has potentially relevant fire calls too)
# Filter for EMS
ems_calldata = raw_data[raw_data['CauseCategory'] == 'EMS'].reset_index()

# 'Dispatched' to hold value from pandas datetime func
ems_calldata['Dispatched'] = pd.to_datetime(ems_calldata['Dispatched'])

# Get Date from 'Dispatched'
ems_calldata['Date'] = ems_calldata['Dispatched'].dt.date

# Extract date info and time from 'Dispatched'
ems_calldata['Date'] = ems_calldata['Dispatched'].dt.date
ems_calldata['Year'] = ems_calldata['Dispatched'].dt.year
ems_calldata['Month'] = ems_calldata['Dispatched'].dt.month
ems_calldata['Day'] = ems_calldata['Dispatched'].dt.day
ems_calldata['Time'] = ems_calldata['Dispatched'].dt.time

# Sort by 'Dispatched'
ems_calldata = ems_calldata.sort_values('Dispatched')

# Floor the hour?
#ems_calldata['Time'] = ems_calldata['Dispatched'].dt.floor('H')

# Organize by 'Dispatched' (per hour) and lat&lon
ems_sorted_data = ems_calldata.groupby(['Latitude', 'Longitude', 'Dispatched']).size().reset_index(name='Demand')

# Identify Relevant attributes from Dispatched / Date-Time
ems_sorted_data['day_of_week'] = ems_sorted_data['Dispatched'].dt.dayofweek
ems_sorted_data['hour_of_day'] = ems_sorted_data['Dispatched'].dt.hour

# Normalize Demand (optional but recommended for LSTM ? Why is this?)
# standard deviation?
ems_sorted_data['Demand'] = (ems_sorted_data['Demand'] - ems_sorted_data['Demand'].mean() / ems_sorted_data['Demand'].std())

# Sequence for LSTM - Times Series Length, Hour-based
# Retrieves data as far back as sequence_len specifies
def generate_sequences(data, sequence_len):
    X, y = [], []
    for index in range(len(data) - sequence_len):
        X.append(data[index:index+sequence_len])
        y.append(data[index+sequence_len])
    return np.array(X), np.array(y)

# For 3 days needs 72 hours
current_seq_len = 72
X_seq, y_seq = generate_sequences(ems_sorted_data['Demand'].values, current_seq_len)

# Separate sets for training and testing based on the available data
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

# Prepare for LSTM model building - params(samples, timestamps, features)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(-1)  # Add feature dimension
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32).unsqueeze(-1)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoader for training
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Call frequency by date
freq_count = ems_calldata.groupby('Date').size().reset_index(name='Count')

freq_count


## Model Building - LTSM

In [None]:
model = 