In [None]:
# 1. Imports & Setup
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

# Configuration
LOOKBACK_MINS = 30
FORECAST_MINS = 15
BATCH_SIZE = 128
EPOCHS = 20
LEARNING_RATE = 0.001

# File Paths
DATA_DIR = "../data"
HEADWAY_MATRIX_FILE = os.path.join(DATA_DIR, "headway_matrix_full.npy")
SCHEDULE_MATRIX_FILE = os.path.join(DATA_DIR, "schedule_matrix_full.npy")

print("Imports complete.")
print(f"TensorFlow Version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

In [None]:
# 2. Load Data
print(f"Loading Headway Matrix from {HEADWAY_MATRIX_FILE}...")
headway_matrix = np.load(HEADWAY_MATRIX_FILE)
print(f"Headway Matrix Shape: {headway_matrix.shape}") # (Time, Stations, Directions, Channels)

print(f"Loading Schedule Matrix from {SCHEDULE_MATRIX_FILE}...")
schedule_matrix = np.load(SCHEDULE_MATRIX_FILE)
print(f"Schedule Matrix Shape: {schedule_matrix.shape}") # (Time, Directions, Channels)

# Verify shapes match in time dimension
assert headway_matrix.shape[0] == schedule_matrix.shape[0], "Time dimensions do not match!"

# Normalize Data (if not already normalized)
# Data from Notebook 3 is normalized to [0, 1] using MAX_HEADWAY = 30 minutes.
SCALER = 30.0 # Minutes

max_headway = np.max(headway_matrix)
print(f"Max Headway in Data: {max_headway}")

# If max > 1, we need to normalize.
if max_headway > 1.0:
    print(f"Normalizing data to [0, 1] range using SCALER={SCALER}...")
    headway_matrix = headway_matrix / SCALER
    
    max_schedule = np.max(schedule_matrix)
    print(f"Max Schedule Value: {max_schedule}")
    if max_schedule > 1.0:
         schedule_matrix = schedule_matrix / SCALER
else:
    print("Data appears to be already normalized.")

print("Data Loading & Normalization Complete.")

In [None]:
    # 3. Create Datasets
    def create_dataset(headway_data, schedule_data, start_index, end_index, batch_size):
        # Alignment Logic:
        # Target Y[t]: Future Headways from t to t+15.
        # Input X[t]: Past Headways from t-30 to t.
        # Input T[t]: Future Schedule from t to t+15.
        
        # We align the datasets so that for a given index `i`:
        # Y starts at `i`
        # T starts at `i`
        # X starts at `i - 30` (so it ends at `i`)
        
        ds_x = keras.utils.timeseries_dataset_from_array(
            data=headway_data,
            targets=None,
            sequence_length=LOOKBACK_MINS,
            sequence_stride=1,
            sampling_rate=1,
            batch_size=batch_size,
            start_index=start_index - LOOKBACK_MINS,
            end_index=end_index - FORECAST_MINS
        )
        
        ds_t = keras.utils.timeseries_dataset_from_array(
            data=schedule_data,
            targets=None,
            sequence_length=FORECAST_MINS,
            sequence_stride=1,
            sampling_rate=1,
            batch_size=batch_size,
            start_index=start_index,
            end_index=end_index
        )
        
        ds_y = keras.utils.timeseries_dataset_from_array(
            data=headway_data,
            targets=None,
            sequence_length=FORECAST_MINS,
            sequence_stride=1,
            sampling_rate=1,
            batch_size=batch_size,
            start_index=start_index,
            end_index=end_index
        )
        
        # Zip inputs and targets
        # Inputs: (X, T)
        # Target: Y
        dataset = tf.data.Dataset.zip(((ds_x, ds_t), ds_y))
        return dataset

    # Split Data
    total_samples = len(headway_matrix)
    train_split_idx = int(total_samples * 0.8)

    # Ensure we have enough history for the first sample
    start_idx = LOOKBACK_MINS 

    print(f"Creating Training Dataset (0 to {train_split_idx})...")
    train_ds = create_dataset(
        headway_matrix, 
        schedule_matrix, 
        start_index=start_idx, 
        end_index=train_split_idx, 
        batch_size=BATCH_SIZE
    )

    print(f"Creating Validation Dataset ({train_split_idx} to {total_samples})...")
    val_ds = create_dataset(
        headway_matrix, 
        schedule_matrix, 
        start_index=train_split_idx, 
        end_index=total_samples - 1, # Fix: end_index must be < len(data)
        batch_size=BATCH_SIZE
    )

    # Prefetch for performance
    train_ds = train_ds.cache().prefetch(tf.data.AUTOTUNE)
    val_ds = val_ds.cache().prefetch(tf.data.AUTOTUNE)

    print("Datasets created.")
    for (x, t), y in train_ds.take(1):
        print(f"Input X shape: {x.shape}")
        print(f"Input T shape: {t.shape}")
        print(f"Target Y shape: {y.shape}")

In [None]:
# 4. Build Model
def build_model(input_shape_x, input_shape_t, output_shape):
    # Input 1: History (Batch, 30, 156, 2, 1)
    input_x = layers.Input(shape=input_shape_x, name='history_input')
    
    # Input 2: Terminal Schedule (Batch, 15, 2, 1)
    input_t = layers.Input(shape=input_shape_t, name='terminal_input')
    
    # --- Encoder (ConvLSTM Branch) ---
    # Layer 1
    x = layers.ConvLSTM2D(
        filters=16,
        kernel_size=(3, 1),
        padding='same',
        return_sequences=True,
        activation='relu'
    )(input_x)
    
    # Layer 2
    x = layers.ConvLSTM2D(
        filters=32,
        kernel_size=(3, 1),
        padding='same',
        return_sequences=False, # Compress time dimension
        activation='relu'
    )(x)
    # x shape: (Batch, 156, 2, 32)
    
    # --- Fusion ---
    # We want to combine the global schedule info with local spatial features.
    # Instead of flattening everything (which causes the 46M params),
    # we broadcast the schedule features to every station.
    
    # 1. Process Schedule
    # input_t: (Batch, 15, 2, 1) -> Flatten to (Batch, 30)
    # Explicitly calculate size to avoid Flatten() shape inference issues
    flat_dim_t = input_shape_t[0] * input_shape_t[1] * input_shape_t[2]
    t_flat = layers.Reshape((flat_dim_t,))(input_t)
    
    # 2. Repeat for each station/direction
    # Target spatial grid: 156 * 2 = 312 locations
    num_locations = output_shape[1] * output_shape[2]
    t_rep = layers.RepeatVector(num_locations)(t_flat) # (Batch, 312, 30)
    
    # 3. Reshape to match spatial grid
    t_grid = layers.Reshape((output_shape[1], output_shape[2], -1))(t_rep) # (Batch, 156, 2, 30)
    
    # 4. Concatenate with ConvLSTM output
    combined = layers.Concatenate(axis=-1)([x, t_grid]) # (Batch, 156, 2, 62)
    
    # --- Decoder (Convolutional Projection) ---
    # We use a 1x1 Conv to project features to the output time steps (15)
    # This acts as a "Per-Pixel Dense Layer" sharing weights across stations
    out_steps = output_shape[0] # 15
    
    z = layers.Conv2D(
        filters=out_steps, 
        kernel_size=(1, 1), 
        activation='sigmoid'
    )(combined)
    # z shape: (Batch, 156, 2, 15)
    
    # Reshape to (Batch, 15, 156, 2, 1)
    # Permute to put Time dim first: (Batch, 15, 156, 2)
    z = layers.Permute((3, 1, 2))(z)
    
    # Add channel dim
    output = layers.Reshape(output_shape)(z)
    
    model = keras.Model(inputs=[input_x, input_t], outputs=output)
    return model

# Define shapes
# X: (30, 156, 2, 1)
input_shape_x = (LOOKBACK_MINS, headway_matrix.shape[1], headway_matrix.shape[2], headway_matrix.shape[3])
# T: (15, 2, 1)
input_shape_t = (FORECAST_MINS, schedule_matrix.shape[1], schedule_matrix.shape[2])
# Y: (15, 156, 2, 1)
output_shape = (FORECAST_MINS, headway_matrix.shape[1], headway_matrix.shape[2], headway_matrix.shape[3])

print(f"Input X Shape: {input_shape_x}")
print(f"Input T Shape: {input_shape_t}")
print(f"Output Y Shape: {output_shape}")

model = build_model(input_shape_x, input_shape_t, output_shape)
model.summary()

# Compile
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='mse',
    metrics=[keras.metrics.RootMeanSquaredError()]
)

In [None]:
# 5. Train
callbacks = [
    keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True, monitor='val_loss'),
    keras.callbacks.ModelCheckpoint("best_model.keras", save_best_only=True, monitor='val_loss')
]

print("Starting training...")
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=val_ds,
    callbacks=callbacks
)

# Plot History
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss (MSE)')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['root_mean_squared_error'], label='Train RMSE')
plt.plot(history.history['val_root_mean_squared_error'], label='Val RMSE')
plt.title('RMSE')
plt.legend()
plt.show()

In [None]:
# 6. Evaluate Predictions
# Take a batch from validation set
for (x_val, t_val), y_val in val_ds.take(1):
    predictions = model.predict([x_val, t_val])
    break

# Select a sample index
sample_idx = 0

# Extract Ground Truth and Prediction for this sample
# Shape: (15, Stations, 2, 1)
y_true_sample = y_val[sample_idx]
y_pred_sample = predictions[sample_idx]

# Select Direction 0 (Northbound) and remove channel dim
# Shape: (15, Stations)
y_true_d0 = y_true_sample[:, :, 0, 0]
y_pred_d0 = y_pred_sample[:, :, 0, 0]

# Denormalize if we scaled earlier
# SCALER is defined in Cell 2 as 30.0 (minutes)
try:
    scale_factor = SCALER
except NameError:
    scale_factor = 30.0 # Default to 30 mins if not defined

y_true_d0_min = y_true_d0 * scale_factor 
y_pred_d0_min = y_pred_d0 * scale_factor

# Plot Heatmaps
plt.figure(figsize=(15, 6))

# Ground Truth
plt.subplot(1, 2, 1)
sns.heatmap(y_true_d0_min.T, cmap='viridis', vmin=0, vmax=30)
plt.title(f"Ground Truth (Next 15 Mins)\nSample {sample_idx}, Dir 0")
plt.xlabel("Time Step (Future)")
plt.ylabel("Station Index")

# Prediction
plt.subplot(1, 2, 2)
sns.heatmap(y_pred_d0_min.T, cmap='viridis', vmin=0, vmax=30)
plt.title(f"Prediction (Next 15 Mins)\nSample {sample_idx}, Dir 0")
plt.xlabel("Time Step (Future)")
plt.ylabel("Station Index")

plt.tight_layout()
plt.show()

# Plot Time Series for a specific station (e.g., middle of the line)
station_idx = y_true_d0.shape[1] // 2

plt.figure(figsize=(10, 4))
plt.plot(y_true_d0_min[:, station_idx], label='Ground Truth', marker='o')
plt.plot(y_pred_d0_min[:, station_idx], label='Prediction', marker='x')
plt.title(f"Headway Forecast at Station {station_idx} (Dir 0)")
plt.ylabel("Headway (Minutes)")
plt.xlabel("Minutes into Future")
plt.legend()
plt.grid(True)
plt.show()