# Model Training & Experimentation Framework

This notebook implements the "Experiment Factory" for the Headway Prediction model. 
It is designed to support the ablation analysis defined in the project abstract, allowing us to vary:
1.  **Lookback Window ($L$):** 30, 45, 60 minutes.
2.  **Input Features:** With or without Terminal Headways ($T$).
3.  **Prediction Horizon:** Recursive prediction up to 60 minutes.

We start by importing the necessary libraries, including TensorFlow/Keras for the Deep Learning components.

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import seaborn as sns

# set random seeds for reproduceability
np.random.seed(42)
tf.random.set_seed(42)


In [None]:
# 1. Configuration class 
# We define an `ExperimentConfig` class to encapsulate all hyperparameters. This makes it easy to switch between different experimental setups (e.g., changing the lookback window or enabling/disabling terminal headways) without rewriting code. 

class ExperimentConfig:
    def __init__(
        self,
        lookback_mins=30, # Paper: 30 minutes
        forecast_mins=15, # Paper: 15 minutes (Single Step)
        time_bin_size_min=5,
        use_terminal_headway=True,
        batch_size=32,    # Paper: 32
        epochs=32, 
        learning_rate=0.001 # Paper: 0.001
    ):
        self.lookback_mins = lookback_mins
        self.forecast_mins = forecast_mins
        self.time_bin_size_min = time_bin_size_min
        self.use_terminal_headway = use_terminal_headway
        self.batch_size = batch_size
        self.epochs = epochs
        self.learning_rate = learning_rate

        # calculated properties
        self.lookback_bins = lookback_mins // time_bin_size_min
        self.forecast_bins = forecast_mins // time_bin_size_min

    def __repr__(self):
        return (f"ExperimentalConfig(L={self.lookback_mins}m, "
                f"F={self.forecast_mins}m, "
                f"Use_T={self.use_terminal_headway}")

# create baseline configuration (exp-A1)
config = ExperimentConfig(
    lookback_mins=30, # Baseline from Table 1
    forecast_mins=15, # Baseline from Table 1
    use_terminal_headway=True
)

print(f"Active Configuration {config}")
print(f"Lookback Bins: {config.lookback_bins}")
print(f"Forecast Bins: {config.forecast_bins}")

## 2. Data Loading & Preparation

We load the preprocessed matrix and schedule data. We then use the `create_dataset` function (adapted from the EDA notebook) to generate the tensors based on the active `config`.

In [None]:
# file paths
MATRIX_PATH = "../data/headway_matrix_full.npy"
SCHEDULE_PATH = "../data/target_terminal_headways.csv"
GLOBAL_START_TIME = "2025-06-06 00:00:00"

def load_and_process_data(config):
    """
    loads the pre-processed matrix and prepares the (X, T, Y) tensors based on the config.
    
    NOTE: This function expects 'headway_matrix_full.npy' to contain ONLY Northbound traffic
    (filtered upstream in 3_data_merging.ipynb) to avoid directional collision.
    """
    print("Loading data...")
    
    # 1. Load Matrix (Northbound Only)
    matrix = np.load(MATRIX_PATH)

    # 2 normalize the data
    SCALING_FACTOR = 20.0
    matrix = matrix / SCALING_FACTOR

    # 3 load and align schedule
    # Note: target_terminal_headways.csv is verified to contain only Northbound trips (N..R)
    schedule_df = pd.read_csv(SCHEDULE_PATH)
    schedule_df['datetime'] = pd.to_datetime(schedule_df['service_date']) + \
                              pd.to_timedelta(schedule_df['departure_seconds'], unit='s')
    schedule_df = schedule_df.set_index('datetime').sort_index()
    schedule_df = schedule_df[~schedule_df.index.duplicated(keep='first')]
    schedule_df = schedule_df[schedule_df.index >= GLOBAL_START_TIME]

    # fill nans and resample
    schedule_df['scheduled_headway_min'] = schedule_df['scheduled_headway_min'].bfill()
    
    # normalize schedule too
    schedule_df['scheduled_headway_min'] = schedule_df['scheduled_headway_min'] / SCALING_FACTOR
    
    time_coords = pd.date_range(start=GLOBAL_START_TIME, periods=matrix.shape[0], freq=f"{config.time_bin_size_min}min")
    schedule_resampled = schedule_df['scheduled_headway_min'].resample(f'{config.time_bin_size_min}min').ffill()
    schedule_aligned = schedule_resampled.reindex(time_coords, method='ffill').bfill().values

    # 4. create tensors
    print(f"Generating tensors with L={config.lookback_bins} bins, F={config.forecast_bins} bins...")
    X, T, Y = [], [], []

    for i in range(config.lookback_bins, len(matrix) - config.forecast_bins):
        # Input X: Past L steps
        X.append(matrix[i-config.lookback_bins:i, :])
        # Input T: Future F steps
        T.append(schedule_aligned[i:i+config.forecast_bins])
        # Target Y: Future F steps
        Y.append(matrix[i:i+config.forecast_bins, :])

    X = np.array(X)[..., np.newaxis] #(Batch, Time, Space, 1)
    T = np.array(T)[..., np.newaxis] #(Batch, Time, 1)
    Y = np.array(Y)[..., np.newaxis] #(Batch, Time, Space, 1)

    return X, T, Y

# execute data loading
X, T, Y = load_and_process_data(config)

print(f"\nData Shapes:")
print(f"X (Context): {X.shape}")
print(f"T (Intent): {T.shape}")
print(f"Y (Target): {Y.shape}")

## 3. Model Architecture (ConvLSTM)

We define the `build_model` function. It constructs a Keras model using `ConvLSTM2D` layers to capture spatiotemporal dependencies.

**Key Features:**
*   **5D Input Handling:** `ConvLSTM2D` expects `(Batch, Time, Rows, Cols, Channels)`. Since our data is 1D space (Stations), we reshape it to `(Time, Stations, 1, 1)` inside the model.
*   **Dual Input Support:** If `config.use_terminal_headway` is True, the model creates a secondary input branch for the schedule ($T$), processes it, and concatenates it with the main traffic flow features.

In [None]:
def build_model(config, input_shape_x, input_shape_t=None):
    """
    Constructs a 'Stripped Down' ConvLSTM model.
    Removes the Recurrent Decoder to improve speed and convergence.
    
    Architecture:
    1. Encoder: ConvLSTM (Reads History) -> Summary State
    2. Projection: Conv2D (Projects State to Future Horizon)
    3. Merger: Combines Projected State with Schedule (T)
    """
    # --- Encoder (Reads History X) ---
    # Input: (L, Space, 1)
    input_x = layers.Input(shape=input_shape_x, name="input_x")
    
    # Reshape for ConvLSTM: (L, Space, 1, 1)
    x_reshaped = layers.Reshape((config.lookback_bins, input_shape_x[1], 1, 1))(input_x)

    # Encoder: Single ConvLSTM layer
    # Collapses time dimension (return_sequences=False)
    # Output Shape: (Batch, Space, 1, 32)
    encoder_out = layers.ConvLSTM2D(
        filters=32,
        kernel_size=(3, 1),
        padding="same",
        return_state=False,
        return_sequences=False, 
        activation="relu"
    )(x_reshaped)
    
    # --- Projection (CNN Decoder) ---
    # Instead of unrolling a loop, we project the features directly to the forecast horizon.
    # We use filters=ForecastBins to generate all time steps at once.
    # Output Shape: (Batch, Space, 1, F)
    projection = layers.Conv2D(
        filters=config.forecast_bins, 
        kernel_size=(1, 1), 
        activation="relu"
    )(encoder_out)
    
    # Reshape to (Batch, F, Space, 1)
    # 1. Permute dims: (Batch, Space, 1, F) -> (Batch, F, Space, 1)
    # Keras Permute expects 1-based index excluding batch. 
    # Current dims (excluding batch): (Space=1, 1=2, F=3) -> Want (F=3, Space=1, 1=2)
    forecast_base = layers.Permute((3, 1, 2))(projection)
    
    # --- Merge with Schedule (T) ---
    if config.use_terminal_headway and input_shape_t is not None:
        input_t = layers.Input(shape=input_shape_t, name="input_t") # (F, 1)
        
        # Reshape T to (Batch, F, 1, 1)
        t_reshaped = layers.Reshape((config.forecast_bins, 1, 1))(input_t)
        
        # Tile T across space: (Batch, F, Space, 1)
        t_tiled = layers.Lambda(lambda x: tf.tile(x, [1, 1, input_shape_x[1], 1]))(t_reshaped)
        
        # Concatenate: (Batch, F, Space, 2)
        merged = layers.Concatenate(axis=-1)([forecast_base, t_tiled])
        
        # Refine with 1x1 Conv
        outputs = layers.TimeDistributed(layers.Conv2D(16, (1, 1), activation="relu"))(merged)
        outputs = layers.TimeDistributed(layers.Conv2D(1, (1, 1), activation="linear"))(outputs)
        
        inputs = [input_x, input_t]
    else:
        outputs = forecast_base
        inputs = [input_x]

    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=config.learning_rate), loss="mse")
    return model

# Re-build model
model = build_model(config, input_shape_x=X.shape[1:], input_shape_t=T.shape[1:])
model.summary()

In [None]:
# Training
print(f"Training model for {config.epochs} epochs...")

# callbacks
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    x=[X, T],  # <--- Pass BOTH inputs here
    y=Y, 
    validation_split=0.2, 
    epochs=config.epochs, 
    batch_size=config.batch_size,
    callbacks=[early_stopping],
    verbose=1
)

In [None]:
# plot training history
plt.figure(figsize=(10,6))
plt.plot(history.history['loss'], label="Training Loss")
plt.plot(history.history['val_loss'], label="Validation Loss")
plt.title('Model Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# visualize a prediction
# select a random sample
sample_idx = np.random.randint(0, len(X))
x_sample = X[sample_idx:sample_idx+1]
t_sample = T[sample_idx:sample_idx+1] # Get corresponding T
y_true = Y[sample_idx:sample_idx+1]

y_pred = model.predict([x_sample, t_sample]) # Pass BOTH inputs

# shapes (1, Time, Space, 1)
# lets plot the space-time heatmap for truth vs Pred
# we transpose to have time on x-axis and space (stations) on y axis

fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# ground truth
sns.heatmap(y_true[0, :, :, 0].T, ax=axes[0], cmap="viridis")
axes[0].set_title(f"Ground Truth (Sample {sample_idx})")
axes[0].set_xlabel("Time Step (Future)")
axes[0].set_ylabel("Station ID")

# prediction
sns.heatmap(y_pred[0, :, :, 0].T, ax=axes[1], cmap="viridis")
axes[1].set_title(f"Prediction (Sample {sample_idx})")
axes[1].set_xlabel("Time Step (Future)")
axes[1].set_ylabel("Station ID")

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# 1. Re-create Validation Split (Last 20%)
val_split_idx = int(len(X) * 0.8)
X_val = X[val_split_idx:]
T_val = T[val_split_idx:]
Y_val = Y[val_split_idx:]

print(f"Evaluating on {len(X_val)} validation samples...")

# 2. Predict
# Note: We pass BOTH inputs [X_val, T_val]
Y_pred_norm = model.predict([X_val, T_val], verbose=1)

# 3. Inverse Transform (Normalized -> Minutes -> Seconds)
# Recall: We divided by 20.0 to normalize
SCALING_FACTOR = 20.0
Y_val_sec = Y_val * SCALING_FACTOR * 60
Y_pred_sec = Y_pred_norm * SCALING_FACTOR * 60

# 4. Calculate Metrics
# Flatten arrays because metrics expect 1D arrays
rmse = np.sqrt(mean_squared_error(Y_val_sec.flatten(), Y_pred_sec.flatten()))
r2 = r2_score(Y_val_sec.flatten(), Y_pred_sec.flatten())

print("\n--- Experiment Results ---")
print(f"RMSE: {rmse:.2f} seconds")
print(f"R2 Score: {r2:.4f}")