In [None]:
# standard imports
import os
import sys
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.preprocessing import RobustScaler

# === CRITICAL: Enable Mixed Precision for A100/V100/T4 GPUs ===
# This provides 2-3x speedup by using float16 for compute
from tensorflow.keras import mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)
print(f"‚úÖ Mixed Precision enabled: {policy.name}")
print(f"   Compute dtype: {policy.compute_dtype}")
print(f"   Variable dtype: {policy.variable_dtype}")

In [None]:
# environment configuration
# check if running in colab

if "google.colab" in sys.modules:
  from google.colab import drive
  print("Running in Colab")
  drive.mount('/content/drive')

  # EDIT THIS: Your exact folder path in Drive
  PROJECT_ROOT = "/content/drive/MyDrive/Colab Notebooks/headway-prediction"
else:
    print("üíª Running in Local Environment")
    # Assuming notebook is in /notebooks, root is one level up
    PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))

# system setup path
if PROJECT_ROOT not in sys.path:
  sys.path.append(PROJECT_ROOT)
  print(f"added to sys.path: {PROJECT_ROOT}")

print(f"Project Root: {PROJECT_ROOT}")

Running in Colab
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
added to sys.path: /content/drive/MyDrive/Colab Notebooks/headway-prediction
Project Root: /content/drive/MyDrive/Colab Notebooks/headway-prediction


In [None]:
# Validate Imports
try:
    from src.config import Config
    from src.data.dataset import SubwayDataGenerator
    from src.models.st_convnet import HeadwayConvLSTM  # V1 architecture
    from src.training.trainer import Trainer  # ‚Üê Use trainer module
    from src.evaluator import Evaluator
    print("‚úÖ Success: All custom 'src' modules imported.")
except ImportError as e:
    print(f"‚ùå IMPORT ERROR: {e}")

‚úÖ Success: All custom 'src' modules imported.


# Data Loading

In [None]:
# Data loading and scaling
# Paper uses MinMax normalization to [0,1] (Section 3.1)
from sklearn.preprocessing import MinMaxScaler

config = Config()
config.DATA_DIR = os.path.join(PROJECT_ROOT, "data")
print(f"Loading data from {config.DATA_DIR}")

# 1. Instantiate data generator
data_gen = SubwayDataGenerator(config)

# 2. Load raw .npy files (no normalization - we'll use MinMaxScaler per paper)
data_gen.load_data(normalize=False)
print(f"Raw max headway values: {data_gen.headway_data.max():.2f} min")

# 3. Fit MinMaxScaler (Paper Section 3.1: "normalized to [0,1] using min-max scaling")
total_timesteps = len(data_gen.headway_data)
train_limit = int(total_timesteps * 0.6)

print(f"\nüìÑ Paper: 'headway values normalized to the interval [0, 1] using min-max scaling'")
print(f"Fitting MinMaxScaler on first {train_limit} steps")
scaler = MinMaxScaler(feature_range=(0, 1))
flat_train = data_gen.headway_data[:train_limit].reshape(-1, 1)
scaler.fit(flat_train)

# 4. Transform All Data
print("Transforming Headway and Schedule Data")
data_gen.headway_data = scaler.transform(data_gen.headway_data.reshape(-1, 1)).reshape(data_gen.headway_data.shape)
data_gen.schedule_data = scaler.transform(data_gen.schedule_data.reshape(-1, 1)).reshape(data_gen.schedule_data.shape)

# 5. Save scaler for inference
scaler_path = os.path.join(PROJECT_ROOT, "models", "minmax_scaler.pkl")
os.makedirs(os.path.dirname(scaler_path), exist_ok=True)
joblib.dump(scaler, scaler_path)
print(f"‚úÖ Scaler saved to {scaler_path}")

print(f"\nScaled data range: [{data_gen.headway_data.min():.4f}, {data_gen.headway_data.max():.4f}]")

Loading data from /content/drive/MyDrive/Colab Notebooks/headway-prediction/data
Loading data from /content/drive/MyDrive/Colab Notebooks/headway-prediction/data...
Headway Shape: (264222, 66, 2, 1)
Schedule Shape: (264222, 2, 1)
Raw max headway values: 30.0 min (should be ~30.0)

Fitting RobustScaler on first 158533 steps
Transforming Headway and Schedule Data
Scaler saved to /content/drive/MyDrive/Colab Notebooks/headway-prediction/models/robust_scaler.pkl


# Baseline Experiment Configuration

In [None]:
# Configuration
# Using batch size 128 (same as notebook 7 which runs at ~1 min/epoch)
# Paper used batch 32, but that's 4x slower due to 4x more batches

config.LOOKBACK_MINS = 30
config.FORECAST_MINS = 15
config.BATCH_SIZE = 128  # ‚Üê Match notebook 7 (paper used 32, but 4x slower)
config.EPOCHS = 100
config.LEARNING_RATE = 0.001

print(f'--- Configuration ---')
print(f'Lookback: {config.LOOKBACK_MINS} minutes')
print(f'Forecast: {config.FORECAST_MINS} minutes')
print(f'Batch Size: {config.BATCH_SIZE}')
print(f'Epochs: {config.EPOCHS}')
print(f'Learning Rate: {config.LEARNING_RATE}')

# Create tf datasets (60% train, 20% val, 20% test)
train_end = int(0.6 * total_timesteps)
val_end = int(0.8 * total_timesteps)

print(f"\nCreating datasets...")
train_ds = data_gen.make_dataset(start_index=0, end_index=train_end, shuffle=True)
val_ds = data_gen.make_dataset(start_index=train_end, end_index=val_end, shuffle=False)
test_ds = data_gen.make_dataset(start_index=val_end, end_index=None, shuffle=False)

# Quick shape check
for inputs, targets in train_ds.take(1):
    print(f"Input headway shape: {inputs['headway_input'].shape}")
    print(f"Input schedule shape: {inputs['schedule_input'].shape}")
    print(f"Target shape: {targets.shape}")

--- Baseline Run Config --- 
lookback: 30
batch: 64
epochs: 20
filters: 64

creating datasets...
Creating dataset from index 0 to 158533
Creating dataset from index 158533 to 211377
Creating dataset from index 211377 to 264177
Input headway shape: (64, 30, 66, 2, 1)
Target shape: (64, 15, 66, 2, 1)


# Model Build and Training

In [None]:
# Build and train using the Trainer module
print(f"\nüèóÔ∏è Building Model...")

builder = HeadwayConvLSTM(config)
model = builder.build_model()
model.summary()

# Use Trainer class for clean compilation and training
checkpoint_dir = os.path.join(PROJECT_ROOT, "models")
trainer = Trainer(model, config, checkpoint_dir=checkpoint_dir)
trainer.compile_model()

print("\nüöÄ Starting Training...")
history = trainer.fit(
    train_ds, 
    val_ds,
    patience=10,  # Early stopping patience
    reduce_lr_patience=5  # Reduce LR if no improvement for 5 epochs
)

print(f"‚úÖ Training complete")


Building V2 Architecture
Starting Training...
Epoch 1/20
[1m2477/2477[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m499s[0m 197ms/step - loss: 0.1695 - mae: 0.4089 - mse: 0.3959 - val_loss: 0.1649 - val_mae: 0.3905 - val_mse: 0.3955 - learning_rate: 0.0010
Epoch 2/20
[1m 145/2477[0m [32m‚îÅ[0m[37m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [1m6:46[0m 175ms/step - loss: 0.1515 - mae: 0.3731 - mse: 0.3572

# Model Evaluation

In [None]:
# Full Model Evaluation using the Evaluator module
# Evaluator now handles unit conversion (normalized ‚Üí seconds) and paper-style visualizations

# Initialize evaluator with scaler for proper unit conversion
evaluator = Evaluator(config, scaler=scaler)

# Run full evaluation pipeline:
# 1. Metrics summary (MAE/RMSE in seconds, production readiness)
# 2. Training curves with dual y-axis (normalized + seconds)
# 3. Paper-style heatmap visualizations (Figure 7 style)
save_dir = os.path.join(PROJECT_ROOT, "images")
os.makedirs(save_dir, exist_ok=True)

metrics = evaluator.full_evaluation(
    model=trainer.model,
    history=history,
    test_dataset=test_ds,
    save_dir=save_dir
)

In [None]:
# Smoke Test: Verify paper architecture trains quickly
import time
from src.models.st_convnet_paper import HeadwayConvLSTM

# Paper parameters (Table 1)
BATCH_SIZE = 32
N_STATIONS = 64  # Paper uses 64 distance bins
LOOKBACK = 30
FORECAST = 15

# Generate Random Data in RAM
print("Generating synthetic data (matching paper dimensions)...")
X_headway = np.random.rand(BATCH_SIZE * 4, LOOKBACK, N_STATIONS, 2, 1).astype('float32')  # [0,1] range
X_schedule = np.random.rand(BATCH_SIZE * 4, FORECAST, 2, 1).astype('float32')
Y_target = np.random.rand(BATCH_SIZE * 4, FORECAST, N_STATIONS, 2, 1).astype('float32')

# Build & Compile
print("Building paper-faithful model...")
builder = HeadwayConvLSTM(n_stations=N_STATIONS, lookback=LOOKBACK, forecast=FORECAST)
test_model = builder.build_model()
test_model.compile(optimizer='adam', loss='mse')
print(f"Model parameters: {test_model.count_params():,}")

# Time the Training Loop
print("\n‚è±Ô∏è Starting Smoke Test (4 batches, 1 epoch)...")
start = time.time()
test_model.fit([X_headway, X_schedule], Y_target, epochs=1, batch_size=BATCH_SIZE, verbose=1)
end = time.time()

print(f"\n‚úÖ Total Time: {end - start:.2f} seconds")
print(f"   Paper model has ~187K params (vs V1's 371K, V2's 389K)")
print(f"   Expected: ~5-15s on GPU with mixed precision")

In [None]:
# Quick training speed test with real data
print("Creating MINI dataset for speed test...")
train_ds_mini = data_gen.make_dataset(start_index=0, end_index=2000, shuffle=True)
val_ds_mini = data_gen.make_dataset(start_index=2000, end_index=2500, shuffle=False)

print("Running 5 epochs to measure per-epoch time...")
import time
start = time.time()
model.fit(train_ds_mini, validation_data=val_ds_mini, epochs=5, verbose=1)
elapsed = time.time() - start
print(f"\nüìä Average time per epoch: {elapsed/5:.1f}s")
print(f"   Paper target: ~45-60s/epoch on A100 with full dataset")