In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
import time
import math
from google.colab import drive

# --- 1. Configuration ---
# Input files
FILE_PATH = "/content/drive/MyDrive/Youbike_Master_Project/YouBike_Demand_Forecast/data/gongguan_model_ready_features.parquet.gz"
DISTANCE_CSV_PATH = "/content/drive/MyDrive/Youbike_Master_Project/YouBike_Demand_Forecast/data/youbike_distances.csv"

# Directory for processed GMAN data
GMAN_DATA_DIR = "/content/drive/MyDrive/Youbike_Master_Project/GMAN_data/"

# Output files that will be created
TRAFFIC_FILE = os.path.join(GMAN_DATA_DIR, "youbike_traffic_full.h5")
SE_FILE = os.path.join(GMAN_DATA_DIR, "SE_gman_node2vec.txt") # This is the final SE file

# Model saving directories
MODEL_DIR = "/content/drive/MyDrive/Youbike_Master_Project/GMAN_Checkpoints_TF2/"
LOG_DIR = "/content/drive/MyDrive/Youbike_Master_Project/GMAN_logs_TF2/"

# --- 2. Mount Drive and Create Directories ---
drive.mount('/content/drive')
os.makedirs(GMAN_DATA_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True)
print("✅ Google Drive mounted and directories are ready.")
print(f"TensorFlow Version: {tf.__version__}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Google Drive mounted and directories are ready.
TensorFlow Version: 2.19.0


In [2]:
# ==============================================================================
# CELL 2 (FINAL, FULLY UPDATED): DATA PREPARATION PIPELINE
# ==============================================================================
!pip install -q node2vec
import pandas as pd
import numpy as np
import networkx as nx
from node2vec import Node2Vec

# --- Step 1: Create the Traffic File (Optimized to load only 3 columns) ---
print("--- Step 1: Creating the traffic matrix from the full dataset... ---")
traffic_columns = ['time', 'sno', 'occupancy_ratio']
data = pd.read_parquet(FILE_PATH, columns=traffic_columns)
data['time'] = pd.to_datetime(data['time'])
traffic_df = data.pivot(index='time', columns='sno', values='occupancy_ratio')
traffic_df = traffic_df.resample('10T').mean().ffill()
traffic_df.to_hdf(TRAFFIC_FILE, key='df', mode='w')
print(f"✅ Traffic data shape: {traffic_df.shape}. Saved to: {TRAFFIC_FILE}")

# --- Step 2: Create the Normalized Adjacency Matrix from the distance CSV ---
print("\n--- Step 2: Creating the normalized adjacency matrix... ---")
# This creates an intermediate file that node2vec will use
normalized_adj_path = os.path.join(GMAN_DATA_DIR, 'SE_gman_normalized.npy')

try:
    distance_df = pd.read_csv(DISTANCE_CSV_PATH)
    distance_matrix_df = distance_df.pivot_table(values='distance', index='from_station_id', columns='to_station_id')
    dist_mx = distance_matrix_df.to_numpy()

    distances = dist_mx[~np.isclose(dist_mx, 0)]
    std = distances.std()
    adj_mx = np.exp(-np.square(dist_mx / std))

    np.save(normalized_adj_path, adj_mx)
    print(f"✅ Normalized adjacency matrix created at: {normalized_adj_path}")
except FileNotFoundError:
    print(f"❌ ERROR: 'youbike_distances.csv' not found. Please ensure it is at the correct path in the DISTANCE_CSV_PATH variable.")
    raise

# --- Step 3: Run Node2Vec on the Adjacency Matrix with Paper's Hyperparameters ---
print("\n--- Step 3: Running Node2Vec to learn final spatial embeddings... ---")
graph = nx.from_numpy_array(adj_mx)

# Use the hyperparameters from the original generateSE.py
node2vec = Node2Vec(
    graph,
    dimensions=64,
    walk_length=80,    # Updated
    num_walks=100,     # Updated
    p=2,               # Updated
    q=1,               # Updated
    workers=4
)

# Fit the model with the specified number of training iterations (epochs)
model = node2vec.fit(window=10, min_count=1, batch_words=4, epochs=1000) # Updated

# Save the final embeddings to the path specified in Cell 1
model.wv.save_word2vec_format(SE_FILE)
print(f"✅ Node2Vec spatial embeddings saved to: {SE_FILE}")

--- Step 1: Creating the traffic matrix from the full dataset... ---


  traffic_df = traffic_df.resample('10T').mean().ffill()


✅ Traffic data shape: (59760, 105). Saved to: /content/drive/MyDrive/Youbike_Master_Project/GMAN_data/youbike_traffic_full.h5

--- Step 2: Creating the normalized adjacency matrix... ---
✅ Normalized adjacency matrix created at: /content/drive/MyDrive/Youbike_Master_Project/GMAN_data/SE_gman_normalized.npy

--- Step 3: Running Node2Vec to learn final spatial embeddings... ---


Computing transition probabilities:   0%|          | 0/105 [00:00<?, ?it/s]

✅ Node2Vec spatial embeddings saved to: /content/drive/MyDrive/Youbike_Master_Project/GMAN_data/SE_gman_node2vec.txt


In [3]:
# --- Utility Functions ---
def log_string(log, string):
    log.write(string + '\n'); log.flush(); print(string)

def metric(pred, label):
    mask = np.not_equal(label, 0).astype(np.float32)
    mask /= np.mean(mask)
    mae = np.mean(np.abs(pred - label) * mask)
    rmse = np.sqrt(np.mean((pred - label)**2 * mask))
    return mae, rmse

def seq2instance(data, P, Q):
    num_step, dims = data.shape
    num_sample = num_step - P - Q + 1
    x = np.zeros((num_sample, P, dims)); y = np.zeros((num_sample, Q, dims))
    for i in range(num_sample):
        x[i] = data[i: i + P]; y[i] = data[i + P: i + P + Q]
    return x, y

def load_data(args):
    df = pd.read_hdf(args.traffic_file)

    # --- THIS BLOCK IS MODIFIED ---
    # Define the cutoff dates for training, validation, and testing
    train_cutoff = '2025-03-04 00:00:00'
    val_cutoff = '2025-05-04 00:00:00'

    # Split the main traffic dataframe by date
    train_df = df[df.index < train_cutoff]
    val_df = df[(df.index >= train_cutoff) & (df.index < val_cutoff)]
    test_df = df[df.index >= val_cutoff]

    # Convert the sliced dataframes to numpy arrays
    train = train_df.values
    val = val_df.values
    test = test_df.values
    # --- END OF MODIFICATION ---

    # The rest of the traffic data processing remains the same
    trainX, trainY = seq2instance(train, args.P, args.Q)
    valX, valY = seq2instance(val, args.P, args.Q)
    testX, testY = seq2instance(test, args.P, args.Q)
    mean, std = np.mean(trainX), np.std(trainX)
    trainX = (trainX - mean) / std; valX = (valX - mean) / std; testX = (testX - mean) / std

    # Loading the SE file remains the same
    with open(args.SE_file, 'r') as f:
        lines = f.readlines()
        temp = lines[0].split(' ')
        N, dims = int(temp[0]), int(temp[1])
        SE = np.zeros((N, dims), dtype=np.float32)
        for line in lines[1:]:
            temp = line.strip().split(' ')
            index = int(temp[0])
            SE[index] = [float(val) for val in temp[1:]]

    # --- THIS BLOCK FOR TEMPORAL FEATURES IS ALSO MODIFIED ---
    # Create temporal features for the entire dataset first
    Time = df.index
    dayofweek = np.reshape(Time.weekday, (-1, 1))
    timeofday = (Time.hour * 60 + Time.minute) // 10
    timeofday = np.reshape(timeofday, (-1, 1))
    Time = np.concatenate((dayofweek, timeofday), axis=-1)

    # Now, split the temporal features using the same date cutoffs
    train_time = Time[:len(train)]
    val_time = Time[len(train) : len(train) + len(val)]
    test_time = Time[len(train) + len(val):]

    # The rest of the temporal feature processing remains the same
    trainTE = np.concatenate(seq2instance(train_time, args.P, args.Q), axis=1).astype(np.int32)
    valTE = np.concatenate(seq2instance(val_time, args.P, args.Q), axis=1).astype(np.int32)
    testTE = np.concatenate(seq2instance(test_time, args.P, args.Q), axis=1).astype(np.int32)
    # --- END OF MODIFICATION ---

    return trainX, trainTE, trainY, valX, valTE, valY, testX, testTE, testY, SE, mean, std

# --- Keras Layers for GMAN ---
# --- THIS CLASS IS UPDATED TO INCLUDE BATCH NORMALIZATION ---

class FullyConnected(layers.Layer):
    def __init__(self, units, activations, use_bias=True):
        super(FullyConnected, self).__init__()
        if isinstance(units, int):
            units = [units]
            activations = [activations]

        self.convs = []
        self.batch_norms = []
        self.activations = activations # Store the activations list

        for num_unit in units:
            self.convs.append(layers.Conv2D(num_unit, kernel_size=1, use_bias=use_bias))
            self.batch_norms.append(layers.BatchNormalization())

    def call(self, x, training=None):
        # Iterate through the layers, applying activations as specified
        for i in range(len(self.convs)):
            x = self.convs[i](x)
            x = self.batch_norms[i](x, training=training)

            # --- THIS IS THE FIX ---
            # Only apply an activation if it's not None
            if self.activations[i] is not None:
                x = self.activations[i](x)
            # --- END OF FIX ---

        return x

class STEmbedding(layers.Layer):
    def __init__(self, D):
        super(STEmbedding, self).__init__()
        self.FC_se = FullyConnected([D, D], [tf.nn.relu, None])
        self.day_embed = layers.Embedding(7, D)
        self.time_embed = layers.Embedding(24 * 6, D) # 144 10-minute intervals in a day

    def call(self, SE, TE):
        # 1. Process the Spatial Embedding (SE)
        # Shape: (num_stations, 2) -> (1, 1, num_stations, D)
        SE = self.FC_se(tf.expand_dims(tf.expand_dims(SE, 0), 0))

        # 2. Process the Temporal Embeddings (TE)
        # Shape: (batch_size, seq_len, D)
        day_TE = self.day_embed(TE[..., 0])
        time_TE = self.time_embed(TE[..., 1])

        # --- THIS IS THE FIX ---
        # 3. Reshape TE to make it broadcastable with SE
        # We add a dimension for the number of stations.
        # Shape: (batch_size, seq_len, D) -> (batch_size, seq_len, 1, D)
        day_TE = tf.expand_dims(day_TE, axis=2)
        time_TE = tf.expand_dims(time_TE, axis=2)
        # --- END OF FIX ---

        # 4. Add the embeddings together. Broadcasting handles the rest.
        # (1, 1, num_stations, D) + (batch, seq, 1, D) -> (batch, seq, num_stations, D)
        return SE + day_TE + time_TE


class SpatialAttention(layers.Layer):
    def __init__(self, K, d):
        super(SpatialAttention, self).__init__()
        self.K, self.d = K, d
        self.D = K * d
        self.FC_q = FullyConnected(self.D, tf.nn.relu)
        self.FC_k = FullyConnected(self.D, tf.nn.relu)
        self.FC_v = FullyConnected(self.D, tf.nn.relu) # For the value tensor
        self.FC_o = FullyConnected(self.D, tf.nn.relu)

    def call(self, X, STE):
        # Concatenate X and STE first
        X_with_STE = tf.concat([X, STE], axis=-1)

        query = self.FC_q(X_with_STE)
        key = self.FC_k(X_with_STE)

        # --- THIS IS THE FINAL CORRECTION ---
        # The 'value' must also be derived from the concatenated tensor
        value = self.FC_v(X_with_STE)
        # --- END OF CORRECTION ---

        query = tf.concat(tf.split(query, self.K, axis=-1), axis=0)
        key = tf.concat(tf.split(key, self.K, axis=-1), axis=0)
        value = tf.concat(tf.split(value, self.K, axis=-1), axis=0)

        attention = tf.matmul(query, key, transpose_b=True) / (self.d ** 0.5)
        attention = tf.nn.softmax(attention)

        X_ = tf.matmul(attention, value)
        X_ = tf.concat(tf.split(X_, self.K, axis=0), axis=-1)

        return self.FC_o(X_)

class TemporalAttention(layers.Layer):
    def __init__(self, K, d):
        super(TemporalAttention, self).__init__()
        self.K, self.d = K, d
        self.D = K * d
        self.FC_q = FullyConnected(self.D, tf.nn.relu)
        self.FC_k = FullyConnected(self.D, tf.nn.relu)
        self.FC_v = FullyConnected(self.D, tf.nn.relu) # For the value tensor
        self.FC_o = FullyConnected(self.D, tf.nn.relu)

    def call(self, X, STE):
        # Concatenate X and STE first
        X_with_STE = tf.concat([X, STE], axis=-1)

        query = self.FC_q(X_with_STE)
        key = self.FC_k(X_with_STE)

        # --- THIS IS THE FINAL CORRECTION ---
        # The 'value' must also be derived from the concatenated tensor
        value = self.FC_v(X_with_STE)
        # --- END OF CORRECTION ---

        query = tf.concat(tf.split(query, self.K, axis=-1), axis=0)
        key = tf.concat(tf.split(key, self.K, axis=-1), axis=0)
        value = tf.concat(tf.split(value, self.K, axis=-1), axis=0)

        query = tf.transpose(query, (0, 2, 1, 3))
        key = tf.transpose(key, (0, 2, 3, 1))
        value = tf.transpose(value, (0, 2, 1, 3))

        attention = tf.matmul(query, key) / (self.d ** 0.5)
        attention = tf.nn.softmax(attention)

        X_ = tf.matmul(attention, value)
        X_ = tf.transpose(X_, (0, 2, 1, 3))
        X_ = tf.concat(tf.split(X_, self.K, axis=0), axis=-1)

        return self.FC_o(X_)

class GatedFusion(layers.Layer):
    def __init__(self, D):
        super(GatedFusion, self).__init__()
        # --- THIS BLOCK IS MODIFIED ---
        # Create two separate FullyConnected layers, one for spatial and one for temporal
        self.FC_xs = FullyConnected(D, activations=None, use_bias=False)
        self.FC_xt = FullyConnected(D, activations=None, use_bias=True)
        # --- END OF MODIFICATION ---

    def call(self, X_s, X_t):
        # --- THIS BLOCK IS MODIFIED TO MATCH THE ORIGINAL CODE ---
        # Project spatial and temporal outputs separately
        XS = self.FC_xs(X_s)
        XT = self.FC_xt(X_t)

        # Add the projected outputs and apply the sigmoid gate
        z = tf.nn.sigmoid(tf.add(XS, XT))

        # Fuse the original (non-projected) attention outputs
        H = z * X_s + (1 - z) * X_t

        # The final projection layer from the original code's GatedFusion function
        # is now implicitly handled by the subsequent layers in the STAttBlock's residual connection
        return H

# ==============================================================================
# CORRECTED ST-ATTENTION BLOCK (Replace the old class with this)
# ==============================================================================

class STAttBlock(layers.Layer):
    def __init__(self, K, d):
        super(STAttBlock, self).__init__()
        self.spatial_attention = SpatialAttention(K, d)
        self.temporal_attention = TemporalAttention(K, d)
        self.gated_fusion = GatedFusion(K * d)

    def call(self, X, STE):
        X_s = self.spatial_attention(X, STE)
        X_t = self.temporal_attention(X, STE)

        # --- THIS IS THE FINAL FIX ---
        # The output of the gated fusion is computed
        H = self.gated_fusion(X_s, X_t)
        # The residual connection is added: input + output
        return X + H
        # --- END OF FIX ---

class TransformAttention(layers.Layer):
    def __init__(self, K, d):
        super(TransformAttention, self).__init__()
        self.K, self.d = K, d
        self.D = K * d
        self.FC_q = FullyConnected(self.D, tf.nn.relu)
        self.FC_k = FullyConnected(self.D, tf.nn.relu)
        self.FC_v = FullyConnected(self.D, tf.nn.relu)
        self.FC_o = FullyConnected(self.D, tf.nn.relu)
    def call(self, X, STE_P, STE_Q):
        query = self.FC_q(STE_Q); key = self.FC_k(STE_P); value = self.FC_v(X)
        query = tf.concat(tf.split(query, self.K, axis=-1), axis=0)
        key = tf.concat(tf.split(key, self.K, axis=-1), axis=0)
        value = tf.concat(tf.split(value, self.K, axis=-1), axis=0)
        query = tf.transpose(query, (0, 2, 1, 3))
        key = tf.transpose(key, (0, 2, 3, 1))
        value = tf.transpose(value, (0, 2, 1, 3))
        attention = tf.matmul(query, key) / (self.d ** 0.5)
        attention = tf.nn.softmax(attention)
        X_ = tf.matmul(attention, value)
        X_ = tf.transpose(X_, (0, 2, 1, 3))
        X_ = tf.concat(tf.split(X_, self.K, axis=0), axis=-1)
        return self.FC_o(X_)

class GMAN(keras.Model):
    def __init__(self, L, K, d, P, Q):
        super(GMAN, self).__init__()
        self.L, self.K, self.d, self.P, self.Q = L, K, d, P, Q
        self.D = K * d
        self.FC_input = FullyConnected([self.D, self.D], [tf.nn.relu, None])
        self.STEmbedding = STEmbedding(self.D)
        self.encoder = [STAttBlock(K, d) for _ in range(L)]
        self.transform_attention = TransformAttention(K, d)
        self.decoder = [STAttBlock(K, d) for _ in range(L)]
        self.FC_output = FullyConnected([self.D, 1], [tf.nn.relu, None])
    def call(self, X, SE, TE):
        X = self.FC_input(tf.expand_dims(X, -1))
        STE = self.STEmbedding(SE, TE)
        STE_P, STE_Q = STE[:, :self.P], STE[:, self.P:]
        for block in self.encoder: X = block(X, STE_P)
        X = self.transform_attention(X, STE_P, STE_Q)
        for block in self.decoder: X = block(X, STE_Q)
        return tf.squeeze(self.FC_output(X), -1)

print("✅ GMAN model and utility functions defined for TensorFlow 2.x.")

✅ GMAN model and utility functions defined for TensorFlow 2.x.


In [None]:
# ==============================================================================
# Final Cell (UPDATED): GMAN Training with Original Hyperparameters & Scheduler
# ==============================================================================
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

import matplotlib.pyplot as plt
import time
import math
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# --- 1. Set Hyperparameters (Aligned with original train.py) ---
class Args:
    P = 6; Q = 6; L = 3; K = 8; d = 8
    # Using date-based split, so ratios are not needed here
    batch_size = 32          # UPDATED to match original paper's default
    max_epoch = 50
    patience = 10
    learning_rate = 0.001    # UPDATED to your requested value
    decay_epoch = 5          # From original train.py
    decay_rate = 0.7         # From original train.py
    traffic_file = TRAFFIC_FILE
    SE_file = SE_FILE
    model_file = os.path.join(MODEL_DIR, "gman_model_tf2.weights.h5")

args = Args()

# --- 2. Load Data ---
trainX, trainTE, trainY, valX, valTE, valY, testX, testTE, testY, SE, mean, std = load_data(args)
print(f'Number of training samples: {trainX.shape[0]}')

# --- 3. Build Model, Optimizer, and ExponentialDecay Scheduler ---
model = GMAN(args.L, args.K, args.d, args.P, args.Q)

# --- THIS BLOCK IS UPDATED ---
# Create an ExponentialDecay learning rate scheduler
num_train_steps_per_epoch = math.ceil(trainX.shape[0] / args.batch_size)
decay_steps = args.decay_epoch * num_train_steps_per_epoch

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    args.learning_rate,
    decay_steps=decay_steps,
    decay_rate=args.decay_rate,
    staircase=True)

optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=1.0)
# --- END OF UPDATE ---

loss_fn = tf.keras.losses.MeanAbsoluteError()

# --- 4. Training Loop ---
print("\n--- Starting Model Training with Exponential Decay Scheduler ---")
wait = 0
val_loss_min = np.inf

# Add shuffling to the training dataset
buffer_size = trainX.shape[0]

train_dataset = tf.data.Dataset.from_tensor_slices((trainX, trainTE, trainY)).shuffle(buffer_size).batch(args.batch_size).cache().prefetch(tf.data.AUTOTUNE)
val_dataset = tf.data.Dataset.from_tensor_slices((valX, valTE, valY)).batch(args.batch_size).cache().prefetch(tf.data.AUTOTUNE)

for epoch in range(args.max_epoch):
    start_time = time.time()

    # Training step (no changes needed here)
    for step, (x_batch, te_batch, y_batch) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            pred = model(x_batch, SE, te_batch) * std + mean
            loss = loss_fn(y_batch, pred)
        grads = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

    # Validation loop (no changes needed here)
    val_loss = 0
    num_val_batches = 0
    for x_batch, te_batch, y_batch in val_dataset:
        pred = model(x_batch, SE, te_batch, training=False) * std + mean
        val_loss += loss_fn(y_batch, pred)
        num_val_batches += 1
    val_loss /= num_val_batches

    # REPLACE WITH THIS LINE:
    print(f'Epoch {epoch+1:2d}, Val MAE: {val_loss:.4f}, Time: {time.time() - start_time:.2f}s, LR: {lr_schedule(optimizer.iterations).numpy():.6f}')

    # Early stopping (no changes needed here)
    if val_loss < val_loss_min:
        wait = 0
        val_loss_min = val_loss
        model.save_weights(args.model_file)
        print(f'  > Val MAE improved to {val_loss_min:.4f}, saving model.')
    else:
        wait += 1

    if wait >= args.patience:
        print(f'  > Early stop at epoch {epoch+1}')
        break

Number of training samples: 43765

--- Starting Model Training with Exponential Decay Scheduler ---
Epoch  1, Val MAE: 0.0859, Time: 2648.69s, LR: 0.001000
  > Val MAE improved to 0.0859, saving model.
Epoch  2, Val MAE: 0.0811, Time: 2620.64s, LR: 0.001000
  > Val MAE improved to 0.0811, saving model.
Epoch  3, Val MAE: 0.0799, Time: 2612.75s, LR: 0.001000
  > Val MAE improved to 0.0799, saving model.
Epoch  4, Val MAE: 0.0796, Time: 2614.92s, LR: 0.001000
  > Val MAE improved to 0.0796, saving model.
Epoch  5, Val MAE: 0.0780, Time: 2610.21s, LR: 0.000700
  > Val MAE improved to 0.0780, saving model.
Epoch  6, Val MAE: 0.0764, Time: 2615.30s, LR: 0.000700
  > Val MAE improved to 0.0764, saving model.
Epoch  7, Val MAE: 0.0754, Time: 2612.87s, LR: 0.000700
  > Val MAE improved to 0.0754, saving model.
Epoch  8, Val MAE: 0.0751, Time: 2612.34s, LR: 0.000700
  > Val MAE improved to 0.0751, saving model.
Epoch  9, Val MAE: 0.0749, Time: 2611.39s, LR: 0.000700
  > Val MAE improved to 0.07

In [None]:
# --- 5. Final Evaluation on TEST SET ---
import time # Ganz am Anfang deines Scripts importieren
print("\n\n--- Final Detailed Evaluation on Test Set ---")
model.load_weights(args.model_file)

# Generate predictions using a manual loop
print("\nGenerating predictions on the test set...")
# --- ZEITMESSUNG START ---
start_time = time.time()

test_preds = []
test_dataset = tf.data.Dataset.from_tensor_slices((testX, testTE)).batch(args.batch_size)
for x_batch, te_batch in test_dataset:
    pred_batch = model(x_batch, SE, te_batch, training=False) * std + mean
    test_preds.append(pred_batch.numpy())
test_preds = np.concatenate(test_preds, axis=0)
# --- ZEITMESSUNG ENDE ---
end_time = time.time()
prediction_duration = end_time - start_time
print("✅ Test set predictions generated.")

print("✅ Test set predictions generated.")
# Gib die gemessene Zeit aus
print(f"   -> Prediction took {prediction_duration:.4f} seconds.")

# --- 5.1: Per-Horizon and Overall Metrics ---
print('\n--- Test Set Metrics for Each Horizon ---')
for q in range(args.Q):
    y_true_step = testY[:, q, :]
    y_pred_step = test_preds[:, q, :]
    mae = mean_absolute_error(y_true_step, y_pred_step)
    mse = mean_squared_error(y_true_step, y_pred_step)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true_step, y_pred_step, multioutput='uniform_average')
    print(f"  Horizon t+{q+1}: MAE={mae:.4f}, MSE={mse:.4f}, RMSE={rmse:.4f}, R²={r2:.4f}")

print("\n--- Overall Average Test Set Metrics (All Stations) ---")
y_true_flat = testY.ravel(); y_pred_flat = test_preds.ravel()
print(f"  Overall MAE:  {mean_absolute_error(y_true_flat, y_pred_flat):.4f}")
print(f"  Overall MSE:  {mean_squared_error(y_true_flat, y_pred_flat):.4f}")
print(f"  Overall RMSE: {np.sqrt(mean_squared_error(y_true_flat, y_pred_flat)):.4f}")
print(f"  Overall R²:   {r2_score(y_true_flat, y_pred_flat):.4f}")

# --- 5.2: Identify and Analyze Best/Worst Stations ---
print("\n\n--- Best vs. Worst Station Analysis ---")
num_stations = testY.shape[2]
station_errors = [{'station_idx': i, 'mae': np.mean(np.abs(testY[:, :, i] - test_preds[:, :, i]))} for i in range(num_stations)]
errors_df = pd.DataFrame(station_errors).sort_values('mae')
best_stations_indices = errors_df.head(3)['station_idx'].tolist()
worst_stations_indices = errors_df.tail(3)['station_idx'].tolist()

def calculate_subset_metrics(indices):
    y_true_subset = testY[:, :, indices].ravel()
    y_pred_subset = test_preds[:, :, indices].ravel()
    mae = mean_absolute_error(y_true_subset, y_pred_subset)
    rmse = np.sqrt(mean_squared_error(y_true_subset, y_pred_subset))
    r2 = r2_score(y_true_subset, y_pred_subset)
    return mae, rmse, r2

metrics_best_3 = calculate_subset_metrics(best_stations_indices)
metrics_worst_3 = calculate_subset_metrics(worst_stations_indices)
print(f"\n--- Metrics for the 3 BEST Stations (Indices: {best_stations_indices}) ---")
print(f"  Best 3 MAE: {metrics_best_3[0]:.4f}, RMSE: {metrics_best_3[1]:.4f}, R²: {metrics_best_3[2]:.4f}")
print(f"\n--- Metrics for the 3 WORST Stations (Indices: {worst_stations_indices}) ---")
print(f"  Worst 3 MAE: {metrics_worst_3[0]:.4f}, RMSE: {metrics_worst_3[1]:.4f}, R²: {metrics_worst_3[2]:.4f}")

# --- 5.3: Plot Forecasts for Best and Worst Stations ---
horizon_to_plot = 5 # Corresponds to t+6 (0-indexed)

def plot_station_forecast(station_idx, performance_type, color):
    steps_to_plot = 288 # 2 days
    start_index = np.random.randint(0, testY.shape[0] - steps_to_plot)
    y_actual_slice = testY[start_index : start_index + steps_to_plot, horizon_to_plot, station_idx]
    y_pred_slice = test_preds[start_index : start_index + steps_to_plot, horizon_to_plot, station_idx]
    mae = errors_df[errors_df['station_idx'] == station_idx]['mae'].iloc[0]

    # Filter out invalid typhoon data for plotting
    valid_indices = y_actual_slice >= 0

    plt.figure(figsize=(15, 6))
    plt.plot(y_actual_slice[valid_indices], label='Actual Values', color='blue')
    plt.plot(y_pred_slice[valid_indices], label='Predicted Values', color=color, linestyle='--')
    plt.title(f'{performance_type} Station (Index: {station_idx}) | Horizon t+{horizon_to_plot+1} | Avg MAE: {mae:.4f}', fontsize=16)
    plt.xlabel('Time Steps (10-minute intervals)'); plt.ylabel('Occupancy Ratio')
    plt.legend(); plt.grid(True);
    plt.show()

print(f"\n--- Visualizing t+6 Forecasts for BEST Stations ---")
for station_idx in best_stations_indices:
    plot_station_forecast(station_idx, "BEST", "green")

print(f"\n--- Visualizing t+6 Forecasts for WORST Stations ---")
for station_idx in worst_stations_indices:
    plot_station_forecast(station_idx, "WORST", "red")

In [None]:
# ==============================================================================
# Final Cell: Comprehensive Evaluation on the TEST SET
# ==============================================================================
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# --- 1. Generate Predictions on the TEST SET ---
print("\n--- Final Evaluation on Test Set ---")
model.load_weights(args.model_file)

# Generate predictions using a manual loop
print("\nGenerating predictions on the test set...")
test_preds = []
test_dataset = tf.data.Dataset.from_tensor_slices((testX, testTE)).batch(args.batch_size)
for x_batch, te_batch in test_dataset:
    pred_batch = model(x_batch, SE, te_batch, training=False) * std + mean
    test_preds.append(pred_batch.numpy())
test_preds = np.concatenate(test_preds, axis=0)
print("✅ Test set predictions generated.")


# --- 2. Per-Horizon and Overall Metrics ---
print('\n--- Test Set Metrics for Each Horizon ---')
for q in range(args.Q):
    y_true_step = testY[:, q, :]
    y_pred_step = test_preds[:, q, :]

    mae = mean_absolute_error(y_true_step, y_pred_step)
    mse = mean_squared_error(y_true_step, y_pred_step)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true_step, y_pred_step, multioutput='uniform_average')

    print(f"\n--- Horizon t+{q+1} (+{(q+1)*10} min) ---")
    print(f"MAE:  {mae:.4f}, MSE:  {mse:.4f}, RMSE: {rmse:.4f}, R²:   {r2:.4f}")

# Calculate overall average metrics by flattening the arrays
print("\n--- Overall Average Test Set Metrics (All Stations) ---")
y_true_flat = testY.ravel()
y_pred_flat = test_preds.ravel()
overall_mae = mean_absolute_error(y_true_flat, y_pred_flat)
overall_mse = mean_squared_error(y_true_flat, y_pred_flat)
overall_rmse = np.sqrt(overall_mse)
overall_r2 = r2_score(y_true_flat, y_pred_flat)

print(f"Overall MAE:  {overall_mae:.4f}")
print(f"Overall MSE:  {overall_mse:.4f}")
print(f"Overall RMSE: {overall_rmse:.4f}")
print(f"Overall R²:   {overall_r2:.4f}")


# --- 3. Identify and Analyze Best/Worst Performing Stations ---
print("\n\n--- Best vs. Worst Station Analysis ---")
print("\nCalculating Mean Absolute Error for each station...")
num_stations = testY.shape[2]
station_errors = [{'station_idx': i, 'mae': np.mean(np.abs(testY[:, :, i] - test_preds[:, :, i]))} for i in range(num_stations)]

# Sort stations by error to find the best and worst
errors_df = pd.DataFrame(station_errors).sort_values('mae')
best_stations_indices = errors_df.head(3)['station_idx'].tolist()
worst_stations_indices = errors_df.tail(3)['station_idx'].tolist()
print("✅ Best and worst stations identified.")

# Helper function to calculate metrics for a subset of stations
def calculate_subset_metrics(indices):
    y_true_subset = testY[:, :, indices].ravel()
    y_pred_subset = test_preds[:, :, indices].ravel()
    mae = mean_absolute_error(y_true_subset, y_pred_subset)
    rmse = np.sqrt(mean_squared_error(y_true_subset, y_pred_subset))
    r2 = r2_score(y_true_subset, y_pred_subset)
    return mae, rmse, r2

# Calculate and display metrics for the subsets
metrics_best_3 = calculate_subset_metrics(best_stations_indices)
metrics_worst_3 = calculate_subset_metrics(worst_stations_indices)

print("\n--- Average Metrics for the 3 BEST Performing Stations ---")
print(f"(Station Indices: {best_stations_indices})")
print(f"Best 3 MAE:   {metrics_best_3[0]:.4f}, RMSE: {metrics_best_3[1]:.4f}, R²: {metrics_best_3[2]:.4f}")

print("\n--- Average Metrics for the 3 WORST Performing Stations ---")
print(f"(Station Indices: {worst_stations_indices})")
print(f"Worst 3 MAE:  {metrics_worst_3[0]:.4f}, RMSE: {metrics_worst_3[1]:.4f}, R²: {metrics_worst_3[2]:.4f}")


# --- 4. Plot Forecasts for Best and Worst Stations ---
horizon_to_plot = 5 # Corresponds to t+6 (0-indexed)

def plot_station_forecast(station_idx, performance_type, color):
    # Select a random 2-day slice of the test data for plotting
    steps_to_plot = 288
    start_index = np.random.randint(0, testY.shape[0] - steps_to_plot)
    y_actual_slice = testY[start_index : start_index + steps_to_plot, horizon_to_plot, station_idx]
    y_pred_slice = test_preds[start_index : start_index + steps_to_plot, horizon_to_plot, station_idx]
    mae = errors_df[errors_df['station_idx'] == station_idx]['mae'].iloc[0]

    plt.figure(figsize=(15, 6))
    plt.plot(y_actual_slice, label='Actual Values', color='blue')
    plt.plot(y_pred_slice, label='Predicted Values', color=color, linestyle='--')
    plt.title(f'{performance_type} Performing Station (Index: {station_idx}) | Avg MAE: {mae:.4f}', fontsize=16)
    plt.xlabel(f'Time Steps (10-minute intervals, Horizon t+{horizon_to_plot+1})'); plt.ylabel('Occupancy Ratio')
    plt.legend(); plt.grid(True); plt.ylim(0, 1)
    plt.show()

print(f"\n--- Visualizing t+6 Forecasts for BEST Stations ---")
for station_idx in best_stations_indices:
    plot_station_forecast(station_idx, "BEST", "green")

print(f"\n--- Visualizing t+6 Forecasts for WORST Stations ---")
for station_idx in worst_stations_indices:
    plot_station_forecast(station_idx, "WORST", "red")

In [None]:
import pandas as pd

# Define the file path and cutoff date from your script
FILE_PATH = "/content/drive/MyDrive/Youbike_Master_Project/YouBike_Demand_Forecast/data/gongguan_model_ready_features.parquet.gz"
VALIDATION_CUTOFF_DATE = '2024-09-20'

# Load the data to find the date range
data = pd.read_parquet(FILE_PATH)
data['time'] = pd.to_datetime(data['time'])

# Determine the start and end dates of the test set
test_set_start = data[data['time'] > VALIDATION_CUTOFF_DATE]['time'].min()
test_set_end = data[data['time'] > VALIDATION_CUTOFF_DATE]['time'].max()

print(f"Test set starts on: {test_set_start}")
print(f"Test set ends on:   {test_set_end}")