In [None]:
# 1. Imports & Setup
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

# Configuration
LOOKBACK_MINS = 30
FORECAST_MINS = 15
BATCH_SIZE = 128
EPOCHS = 20
LEARNING_RATE = 0.001

# File Paths
DATA_DIR = "../data"
HEADWAY_MATRIX_FILE = os.path.join(DATA_DIR, "headway_matrix_full.npy")
SCHEDULE_MATRIX_FILE = os.path.join(DATA_DIR, "schedule_matrix_full.npy")

print("Imports complete.")
print(f"TensorFlow Version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

In [None]:
# 2. Load Data
print(f"Loading Headway Matrix from {HEADWAY_MATRIX_FILE}...")
headway_matrix = np.load(HEADWAY_MATRIX_FILE)
print(f"Headway Matrix Shape: {headway_matrix.shape}") # (Time, Stations, Directions, Channels)

print(f"Loading Schedule Matrix from {SCHEDULE_MATRIX_FILE}...")
schedule_matrix = np.load(SCHEDULE_MATRIX_FILE)
print(f"Schedule Matrix Shape: {schedule_matrix.shape}") # (Time, Directions, Channels)

# Verify shapes match in time dimension
assert headway_matrix.shape[0] == schedule_matrix.shape[0], "Time dimensions do not match!"

# Normalize Data (if not already normalized)
# Master plan says: "Scale all values to range [0, 1]"
# Assuming max headway is around 30-60 mins (1800-3600 seconds).
# Let's check the max value in the data.
max_headway = np.max(headway_matrix)
print(f"Max Headway in Data: {max_headway}")

# If max > 1, we need to normalize.
if max_headway > 1.0:
    print("Normalizing data to [0, 1] range...")
    # Use a fixed scaler to allow inverse transform later. 
    # 3600 seconds (60 mins) is a safe upper bound for subway headways.
    SCALER = 3600.0 
    headway_matrix = headway_matrix / SCALER
    # Schedule might also be in seconds?
    max_schedule = np.max(schedule_matrix)
    print(f"Max Schedule Value: {max_schedule}")
    if max_schedule > 1.0:
         schedule_matrix = schedule_matrix / SCALER
else:
    print("Data appears to be already normalized.")

print("Data Loading & Normalization Complete.")

In [None]:
# 3. Create Datasets
def create_dataset(headway_data, schedule_data, start_index, end_index, batch_size):
    # Alignment Logic:
    # Target Y[t]: Future Headways from t to t+15.
    # Input X[t]: Past Headways from t-30 to t.
    # Input T[t]: Future Schedule from t to t+15.
    
    # We align the datasets so that for a given index `i`:
    # Y starts at `i`
    # T starts at `i`
    # X starts at `i - 30` (so it ends at `i`)
    
    ds_x = keras.utils.timeseries_dataset_from_array(
        data=headway_data,
        targets=None,
        sequence_length=LOOKBACK_MINS,
        sequence_stride=1,
        sampling_rate=1,
        batch_size=batch_size,
        start_index=start_index - LOOKBACK_MINS,
        end_index=end_index - FORECAST_MINS
    )
    
    ds_t = keras.utils.timeseries_dataset_from_array(
        data=schedule_data,
        targets=None,
        sequence_length=FORECAST_MINS,
        sequence_stride=1,
        sampling_rate=1,
        batch_size=batch_size,
        start_index=start_index,
        end_index=end_index
    )
    
    ds_y = keras.utils.timeseries_dataset_from_array(
        data=headway_data,
        targets=None,
        sequence_length=FORECAST_MINS,
        sequence_stride=1,
        sampling_rate=1,
        batch_size=batch_size,
        start_index=start_index,
        end_index=end_index
    )
    
    # Zip inputs and targets
    # Inputs: (X, T)
    # Target: Y
    dataset = tf.data.Dataset.zip(((ds_x, ds_t), ds_y))
    return dataset

# Split Data
total_samples = len(headway_matrix)
train_split_idx = int(total_samples * 0.8)

# Ensure we have enough history for the first sample
start_idx = LOOKBACK_MINS 

print(f"Creating Training Dataset (0 to {train_split_idx})...")
train_ds = create_dataset(
    headway_matrix, 
    schedule_matrix, 
    start_index=start_idx, 
    end_index=train_split_idx, 
    batch_size=BATCH_SIZE
)

print(f"Creating Validation Dataset ({train_split_idx} to {total_samples})...")
val_ds = create_dataset(
    headway_matrix, 
    schedule_matrix, 
    start_index=train_split_idx, 
    end_index=total_samples, 
    batch_size=BATCH_SIZE
)

# Prefetch for performance
train_ds = train_ds.cache().prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.cache().prefetch(tf.data.AUTOTUNE)

print("Datasets created.")
for (x, t), y in train_ds.take(1):
    print(f"Input X shape: {x.shape}")
    print(f"Input T shape: {t.shape}")
    print(f"Target Y shape: {y.shape}")

In [None]:
# 4. Build Model
def build_model(input_shape_x, input_shape_t, output_shape):
    # Input 1: History (Batch, 30, 64, 2, 1)
    input_x = layers.Input(shape=input_shape_x, name='history_input')
    
    # Input 2: Terminal Schedule (Batch, 15, 2, 1)
    input_t = layers.Input(shape=input_shape_t, name='terminal_input')
    
    # --- Encoder (ConvLSTM Branch) ---
    # Layer 1
    x = layers.ConvLSTM2D(
        filters=32,
        kernel_size=(3, 1),
        padding='same',
        return_sequences=True,
        activation='relu'
    )(input_x)
    
    # Layer 2
    x = layers.ConvLSTM2D(
        filters=32,
        kernel_size=(3, 1),
        padding='same',
        return_sequences=False, # Compress time dimension
        activation='relu'
    )(x)
    
    # Flatten Spatial Features
    x = layers.Flatten()(x)
    
    # --- Fusion ---
    # Flatten Schedule
    t = layers.Flatten()(input_t)
    
    # Concatenate
    combined = layers.Concatenate()([x, t])
    
    # --- Decoder (Projector) ---
    # Calculate output dimensions
    # Output shape: (15, 64, 2, 1)
    out_steps = output_shape[0]
    out_stations = output_shape[1]
    out_dirs = output_shape[2]
    out_channels = output_shape[3]
    
    flat_output_size = out_steps * out_stations * out_dirs * out_channels
    
    z = layers.Dense(flat_output_size, activation='sigmoid')(combined) # Sigmoid for [0, 1] output
    
    # Reshape to target shape
    output = layers.Reshape(output_shape)(z)
    
    model = keras.Model(inputs=[input_x, input_t], outputs=output)
    return model

# Define shapes
# X: (30, 156, 2, 1)
input_shape_x = (LOOKBACK_MINS, headway_matrix.shape[1], headway_matrix.shape[2], headway_matrix.shape[3])
# T: (15, 2, 1)
input_shape_t = (FORECAST_MINS, schedule_matrix.shape[1], schedule_matrix.shape[2])
# Y: (15, 156, 2, 1)
output_shape = (FORECAST_MINS, headway_matrix.shape[1], headway_matrix.shape[2], headway_matrix.shape[3])

print(f"Input X Shape: {input_shape_x}")
print(f"Input T Shape: {input_shape_t}")
print(f"Output Y Shape: {output_shape}")

model = build_model(input_shape_x, input_shape_t, output_shape)
model.summary()

# Compile
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='mse',
    metrics=[keras.metrics.RootMeanSquaredError()]
)

In [None]:
# 5. Train
callbacks = [
    keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True, monitor='val_loss'),
    keras.callbacks.ModelCheckpoint("best_model.keras", save_best_only=True, monitor='val_loss')
]

print("Starting training...")
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=val_ds,
    callbacks=callbacks
)

# Plot History
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss (MSE)')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['root_mean_squared_error'], label='Train RMSE')
plt.plot(history.history['val_root_mean_squared_error'], label='Val RMSE')
plt.title('RMSE')
plt.legend()
plt.show()