In [1]:
import os
import sys
import tensorflow as tf
import numpy as np

# Add project root to path so we can import src
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.config import Config
from src.data.dataset import SubwayDataGenerator

print(f"TensorFlow Version: {tf.__version__}")
print(f"Project Root: {project_root}")

2026-01-05 16:32:55.089599: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorFlow Version: 2.17.0
Project Root: /Users/danherman/Desktop/headway-prediction


In [6]:
# 1. Initialize Config and Generator
config = Config()

# Ensure paths are correct relative to where this notebook is running
# If running from 'notebooks/', data is in '../data/'
# We only need to set DATA_DIR; the properties headway_path/schedule_path update automatically
config.DATA_DIR = os.path.join(project_root, 'data')

# IMPORTANT: Reduce batch size for testing
# fast_dataset uses drop_remainder=True. 
# If we ask for 100 samples but batch_size is 128 (default), 
# we get 0 batches and an OUT_OF_RANGE error.
config.BATCH_SIZE = 32

print(f"Headway Path: {config.headway_path}")
print(f"Schedule Path: {config.schedule_path}")
print(f"Test Batch Size: {config.BATCH_SIZE}")

gen = SubwayDataGenerator(config)
gen.load_data()

Headway Path: /Users/danherman/Desktop/headway-prediction/data/headway_matrix_full.npy
Schedule Path: /Users/danherman/Desktop/headway-prediction/data/schedule_matrix_full.npy
Test Batch Size: 32
Loading data from /Users/danherman/Desktop/headway-prediction/data...
Headway Shape: (264222, 66, 2, 1)
Schedule Shape: (264222, 2, 1)


In [7]:
# 2. Create a small test dataset
# We'll create a dataset with just 100 samples to verify logic
ds_test = gen.make_dataset(start_index=0, end_index=100)

print("Dataset created successfully.")

Creating dataset from index 0 to 100
Dataset created successfully.


In [8]:
# 3. Inspect one batch
print("\n--- Verifying Batch Shapes ---")

# Take 1 batch from the dataset
for inputs, targets in ds_test.take(1):
    headway_in = inputs['headway_input']
    schedule_in = inputs['schedule_input']
    
    print(f"Headway Input Shape:  {headway_in.shape}")
    print(f"   Expected: (Batch, {config.LOOKBACK_MINS}, {config.NUM_STATIONS}, 2, 1)")
    
    print(f"Schedule Input Shape: {schedule_in.shape}")
    print(f"   Expected: (Batch, {config.FORECAST_MINS}, 2, 1)")
    
    print(f"Target Output Shape:  {targets.shape}")
    print(f"   Expected: (Batch, {config.FORECAST_MINS}, {config.NUM_STATIONS}, 2, 1)")

    # 4. Sanity Check: Ensure inputs are not all zeros
    print(f"\nHeadway Mean: {tf.reduce_mean(headway_in):.4f}")
    print(f"Target Mean:  {tf.reduce_mean(targets):.4f}")
    
    # Check for NaNs
    print(f"Headway has NaNs: {np.isnan(headway_in.numpy()).any()}")
    print(f"Target has NaNs:  {np.isnan(targets.numpy()).any()}")


--- Verifying Batch Shapes ---
Headway Input Shape:  (32, 30, 66, 2, 1)
   Expected: (Batch, 30, 66, 2, 1)
Schedule Input Shape: (32, 15, 2, 1)
   Expected: (Batch, 15, 2, 1)
Target Output Shape:  (32, 15, 66, 2, 1)
   Expected: (Batch, 15, 66, 2, 1)

Headway Mean: 0.0993
Target Mean:  0.1146
Headway has NaNs: False
Target has NaNs:  False


2026-01-05 16:36:54.415795: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
