In [1]:
import os
import sys
sys.path.append(os.path.join(os.path.dirname(os.path.realpath("__file__")), "pytorch_geometric_temporal"))
# https://stackoverflow.com/questions/35569042/ssl-certificate-verify-failed-with-python3
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import torch
import os

!pip install torch==2.4.0

torch_version = str(torch.__version__)
scatter_src = f"https://pytorch-geometric.com/whl/torch-{torch_version}.html"
sparse_src = f"https://pytorch-geometric.com/whl/torch-{torch_version}.html"
!pip install torch-scatter -f $scatter_src
!pip install torch-sparse -f $sparse_src
!pip install torch-geometric
!pip install ogb

Looking in links: https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html
Looking in links: https://pytorch-geometric.com/whl/torch-2.4.0+cu121.html


In [4]:
import torch_geometric

In [5]:
%cd /content/drive/MyDrive/

from pytorch_geometric_temporal.torch_geometric_temporal.dataset import PemsBayDatasetLoader, METRLADatasetLoader

/content/drive/MyDrive


In [6]:
# Load in the data
import os
import numpy as np
import pandas as pd
import torch
#from pytorch_geometric_temporal.torch_geometric_temporal.dataset import PemsBayDatasetLoader
%cd /content/drive/MyDrive/
from src.utils.data_utils import *

/content/drive/MyDrive


In [7]:
import numpy as np
import torch

class Scaler:
  def __init__(self, shift: np.ndarray, scale: np.ndarray):
      """
      shift: [F,] -> [1, F, 1]
      scale: [F,] -> [1, F, 1]
      """
      self.shift = shift.reshape(1, -1, 1)
      self.scale = scale.reshape(1, -1, 1)
      print(self.shift.shape)
      print(self.scale.shape)

  def normalize(self, data: torch.Tensor) -> torch.Tensor:
      """
      data: [V, F, W + H]
      ---
      data_norm: [V, F, W + H]. data_norm = (data - shift) / scale
      """
      return (data - self.shift) / self.scale

  def unnormalize(self, data: torch.Tensor) -> torch.Tensor:
      """
      data: [V, F, W + H]
      ---
      data_unnorm: [V, F, W + H]. data_unnorm = shift + scale * data
      """
      return self.shift + self.scale * data

  def unnormalize_y(self, data: torch.Tensor) -> torch.Tensor:
      """
      Unnormalizes data of shape [batch_size, num_nodes] to match the scaler's shift and scale.
      """

      # Add the feature dimension to match the scaler (reshape to [batch_size, 1, num_nodes])
      data = data.unsqueeze(1)  # Shape becomes [batch_size, 1, num_nodes]

      # Expand shift and scale to match data shape [batch_size, 1, num_nodes]
      # Convert shift and scale to torch tensors if they are numpy arrays
      shift = torch.tensor(self.shift, device=data.device)
      scale = torch.tensor(self.scale, device=data.device)
      shift = shift.expand_as(data)
      scale = scale.expand_as(data)

      # Perform unnormalization
      unnormalized_data = shift + scale * data
      #print(f"Unnormalized data shape: {unnormalized_data.shape}")
      return unnormalized_data.squeeze(1)  # Remove the feature dimension






class TimeSeriesDataset:
    # TODO: This only works on PemsBayDatasetLoader & METRLADatasetLoader since their dataloaders are
    # modified to have indices and not be normalized using the entire dataset (preventing data leakage).
    def __init__(
        self,
        dataloader,
        window: int = 12,
        horizon: int = 12,
        train: float = 0.7,
        test: float = 0.2,
    ):
        """
        dataloader: One of the data loader objects defined under torch_geometric_temporal/dataset
        window (W): Number of timesteps the model is allowed to look-back
        horizon (H): Number of timesteps to predict ahead
        train: Ratio of data assigned to training. See split_data()
        test: Ratio of data assigned to testing. See split_data()
        ---
        indices: List of (sample_start, sample_end) tuples that specifies the start (t - W + 1)
            & end (t + H) indices of each data slice (t - W + 1, ..., t, t + 1, ..., t + H)
        X: np.array[V, F, T], V=num_nodes, F=num_features, T=num_timesteps. Raw dataset
        data: Graph signal object defined under torch_geometric_temporal/signal
        """
        self.data = dataloader.get_dataset(num_timesteps_in=window, num_timesteps_out=horizon)
        self.indices = dataloader.indices
        self.X = dataloader.X.detach().cpu().numpy().astype(np.float32)
        #print(self.X.shape, "hello")
        if len(self.X.shape) == 2: # [V, T]
            self.X = self.X.reshape(self.X.shape[0], 1, self.X.shape[1]) # [V, F=1, T]
        assert len(self.X.shape) == 3, "Missing dimension(s) in the raw dataset"
        self.split_data(train=train, test=test)

    def split_data(self, train: float = 0.7, test: float = 0.2):
        """
        train: Ratio of data assigned to training. num_train = round(train * len_of_data)
        test: Ratio of data assigned to testing. num_train_test = num_train + round(test * len_of_data)
        ---
        data_splits: Dict of graph signal objects defined under torch_geometric_temporal/signal
            - train: data[:num_train]
            - test: data[num_train:num_train_test]
            - valid: data[num_train_test:]
        scaler: Scaler object with shift & scale set to the mean & std of the training samples
            (across all nodes and training timesteps)
        """
        assert train > 0, "Train ratio must be positive"
        num_train = round(train * self.data.snapshot_count)
        num_train_test = num_train + round(test * self.data.snapshot_count)
        # Create data splits
        self.data_splits = {
            "train": self.data[:num_train],
            "test": (
                self.data[num_train:num_train_test]
                if num_train_test <= self.data.snapshot_count else None
            ),
            "valid": (
                self.data[num_train_test:]
                if num_train_test < self.data.snapshot_count else None
            )
        }
        # Create Scaler based on the mean & std of training data
        num_train_samp = self.indices[num_train - 1][-1] # Index of last sample in train split
        X_train = self.X[:, 0:1, :num_train_samp] # TAKE ONLY THe FIRST FEATURE
        #print(X_train.shape, "x train shape")
        self.scaler = Scaler(
            shift=np.mean(X_train, axis=(0, 2)),
            scale=np.std(X_train, axis=(0, 2))
        )

In [36]:
import numpy as np
import tensorflow as tf

def masked_mae_tf(preds, labels, null_val=np.nan):
    """
    Accuracy with masking.
    :param preds:
    :param labels:
    :param null_val:
    :return:
    """
    if np.isnan(null_val):
        mask = ~tf.math.is_nan(labels)
    else:
        mask = tf.not_equal(labels, null_val)

    # Ensure mask has the same shape as labels
    mask = tf.expand_dims(tf.cast(mask, tf.float32), axis=-1)
    mask_shape = tf.shape(mask)
    labels_shape = tf.shape(labels)

    #print("labels shape", labels_shape)
    mask = tf.squeeze(mask, axis=-1)
    #print("mask shape", mask.shape)

    # Replace NaN values in labels with the mean of non-NaN values
    labels = tf.where(tf.math.is_nan(labels), tf.reduce_mean(labels * mask, axis=-1, keepdims=True), labels)

    loss = tf.abs(tf.subtract(preds, labels))
    loss = loss * mask
    loss = tf.where(tf.math.is_nan(loss), tf.zeros_like(loss), loss)

    loss = tf.reduce_mean(loss)
    return torch.tensor(loss.numpy(), device='cuda:0', requires_grad=True)

In [43]:
import torch
import torch.nn.functional as F
from pytorch_geometric_temporal.torch_geometric_temporal.nn.recurrent import DCRNN
from pytorch_geometric_temporal.torch_geometric_temporal.signal import temporal_signal_split

try:
    from tqdm import tqdm
except ImportError:
    def tqdm(iterable):
        return iterable

# Load and examine data
loader = PemsBayDatasetLoader()
df = TimeSeriesDataset(PemsBayDatasetLoader())
train_dataset, test_dataset = df.data_splits["train"], df.data_splits["test"]

class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features):
        super(RecurrentGCN, self).__init__()
        self.dcrnn1 = DCRNN(in_channels=node_features, out_channels=64, K=2)
        self.dcrnn2 = DCRNN(in_channels=64, out_channels=64, K=2)
        self.linear = torch.nn.Linear(64, 12)

    def forward(self, x, edge_index, edge_weight):
        """
        x: [batch_size, num_nodes, features, timesteps]
        """
        batch_size, num_nodes, features, timesteps = x.size()
        #print(f"Input shapes: batch_size={batch_size}, num_nodes={num_nodes}, features={features}, timesteps={timesteps}")

        # Initialize hidden states
        h1 = None
        h2 = None

        # Process each timestep while maintaining batch efficiency
        for t in range(timesteps):
            # Get current timestep data [batch_size, num_nodes, features]
            current_x = x[:, :, :, t]
            #print(f"Timestep {t} - current_x shape before reshape: {current_x.shape}")

            # Reshape for DCRNN [batch_size * num_nodes, features]
            current_x = current_x.reshape(batch_size * num_nodes, features)
            #print(f"Timestep {t} - current_x shape after reshape: {current_x.shape}")

            # First DCRNN layer
            #print(f"Timestep {t} - Before DCRNN1 - input shape: {current_x.shape}, h1 shape: {None if h1 is None else h1.shape}")
            h1 = self.dcrnn1(current_x, edge_index, edge_weight, h1)
            #print(f"Timestep {t} - After DCRNN1 - h1 shape: {h1.shape}")
            h1 = F.relu(h1)

            # Second DCRNN layer
            #print(f"Timestep {t} - Before DCRNN2 - input shape: {h1.shape}, h2 shape: {None if h2 is None else h2.shape}")
            h2 = self.dcrnn2(h1, edge_index, edge_weight, h2)
            #print(f"Timestep {t} - After DCRNN2 - h2 shape: {h2.shape}")
            h2 = F.relu(h2)

        #print(f"\nFinal shapes before linear:")
        #print(f"h2 shape: {h2.shape}")
        out = self.linear(h2)
        #print(f"Output shape after linear: {out.shape}")
        #print(f"Final reshape target: [batch_size={batch_size}, num_nodes={num_nodes}, -1]")

        # Return predictions reshaped to [batch_size, num_nodes, output_timesteps]
        return out.reshape(batch_size, num_nodes, -1)

# Update training loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RecurrentGCN(node_features = 2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, eps=1e-3)  # Set epsilon
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=[20,30,40,50],
                                               gamma=0.1)
scaler = torch.cuda.amp.GradScaler()
model.train()
batch_size = 256

print(f"Scaler is enabled: {scaler.is_enabled()}")
for epoch in tqdm(range(100)):
    cost = 0
    optimizer.zero_grad()

    train_snapshots = list(train_dataset)

    for i in range(0, len(train_snapshots), batch_size):
        batch_snapshots = train_snapshots[i:i+batch_size]

        with torch.amp.autocast('cuda'):
            # Keep original shape when stacking
            batch_x = torch.stack([df.scaler.normalize(snapshot.x) for snapshot in batch_snapshots]).to(device)
            batch_edge_index = batch_snapshots[0].edge_index.to(device)
            batch_edge_attr = batch_snapshots[0].edge_attr.to(device)
            batch_y = torch.stack([snapshot.y[:, 0, :] for snapshot in batch_snapshots]).to(device)
            # Normalize targets (y) if they are not already normalized
            #batch_y = torch.stack([snapshot.y[:, 0, :] for snapshot in batch_snapshots]).to(device)
            #batch_y = torch.stack([df.scaler.normalize(snapshot.y[:, 0, :]) for snapshot in batch_snapshots]).to(device)
            # for snapshot in batch_snapshots:
            #   print("snapshot y shape", snapshot.y.shape)
            #   print("snapshot x shape", snapshot.x.shape)
            # Get predictions for all horizons
            batch_y_15 = torch.stack([snapshot.y[:, 0, 2] for snapshot in batch_snapshots]).to(device)  # 15min (3rd step)
            batch_y_30 = torch.stack([snapshot.y[:, 0, 5] for snapshot in batch_snapshots]).to(device)  # 30min (6th step)
            batch_y_60 = torch.stack([snapshot.y[:, 0, 11] for snapshot in batch_snapshots]).to(device) # 60min (12th step)

            y_hat = model(batch_x, batch_edge_index, batch_edge_attr)
            #print("y hat shape", y_hat.shape)
            y_hat_15 = y_hat[:, :, 2]  # 15min prediction
            y_hat_30 = y_hat[:, :, 5]  # 30min prediction
            y_hat_60 = y_hat[:, :, 11] # 60min prediction
            #print("\ny hat 15", type(y_hat_15), y_hat_15.shape)
            #print("y hat 30", type(y_hat_30), y_hat_30.shape)
            #print("y hat 60", type(y_hat_60), y_hat_60.shape)
            # Denormalize predictions and targets
            y_hat = df.scaler.unnormalize_y(y_hat)
            y_hat_15 = df.scaler.unnormalize_y(y_hat_15)
            y_hat_30 = df.scaler.unnormalize_y(y_hat_30)
            y_hat_60 = df.scaler.unnormalize_y(y_hat_60)
            #y_true_15 = df.scaler.unnormalize_y(batch_y_15)
            #y_true_30 = df.scaler.unnormalize_y(batch_y_30)
            #y_true_60 = df.scaler.unnormalize_y(batch_y_60)

            #print("y hat 15", y_hat_15)
            #print("y true 15", y_true_15)
            #print("batch true 15", batch_y_15)
            # Calculate MSE for each horizon
            rmse_15 = torch.sqrt(torch.mean((y_hat_15 - batch_y_15)**2))
            rmse_30 = torch.sqrt(torch.mean((y_hat_30 - batch_y_30)**2))
            rmse_60 = torch.sqrt(torch.mean((y_hat_60 - batch_y_60)**2))

            null_val = 0.
            #print("rmse 15 shape", rmse_15)
            #print("y hat", y_hat)
            #print("batch y", batch_y)
            print(f"y_hat device: {y_hat.device}")
            print(f"batch_y device: {batch_y.device}")
            print(f"y_hat dtype: {y_hat.dtype}")
            print(f"batch_y dtype: {batch_y.dtype}")
            batch_cost = rmse_15#masked_mae_tf(y_hat.detach().cpu().numpy(), batch_y.detach().cpu().numpy(), null_val)
            print("Masked mse: ", batch_cost)
            #batch_cost = rmse_15
            #null_val = 0.
            #batch_cost = masked_mae_loss(scaler, null_val)(y_hat.detach().cpu().numpy(), batch_y.detach().cpu().numpy())
            #batch_cost = torch.mean((y_hat - batch_y)**2)

        del batch_x, batch_edge_index, batch_edge_attr, y_hat, y_hat_15, y_hat_30, y_hat_60, batch_y_15, batch_y_30, batch_y_60
        torch.cuda.empty_cache()

        scaler.scale(batch_cost).backward()
        cost += batch_cost.item()

    print(f"Epoch {epoch} - Gradient norm before unscaling: {torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0).item()}")
    scaler.unscale_(optimizer)
    print(f"Epoch {epoch} - Gradient norm after unscaling: {torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0).item()}")
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
    scaler.step(optimizer)
    scaler.update()
    optimizer.zero_grad()
    scheduler.step()

    num_train_batches = (len(train_snapshots) + batch_size - 1) // batch_size
    loss = cost/num_train_batches

    print(f'Epoch {epoch}, LR: {scheduler.get_last_lr()[0]:.6f}, Loss: {loss:.4f}, '
          f'RMSE@15min: {rmse_15.item():.4f}, RMSE@30min: {rmse_30.item():.4f}, RMSE@60min: {rmse_60.item():.4f}')

  scaler = torch.cuda.amp.GradScaler()


(1, 1, 1)
(1, 1, 1)
Scaler is enabled: True


  0%|          | 0/100 [00:00<?, ?it/s]

y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(5.2884, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(8.8107, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(9.2473, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(7.5270, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(9.9927, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(9.8592, device='cuda:0', grad_

  1%|          | 1/100 [00:45<1:14:42, 45.28s/it]

y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(5.1653, device='cuda:0', grad_fn=<SqrtBackward0>)
Epoch 0 - Gradient norm before unscaling: nan
Epoch 0 - Gradient norm after unscaling: nan
Epoch 0, LR: 0.010000, Loss: 9.0178, RMSE@15min: 5.1653, RMSE@30min: 5.4994, RMSE@60min: 5.3649
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(5.2884, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(8.8107, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(9.2473, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(7.5270, devic

  2%|▏         | 2/100 [01:29<1:13:02, 44.72s/it]

y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(5.1653, device='cuda:0', grad_fn=<SqrtBackward0>)
Epoch 1 - Gradient norm before unscaling: nan
Epoch 1 - Gradient norm after unscaling: nan
Epoch 1, LR: 0.010000, Loss: 9.0178, RMSE@15min: 5.1653, RMSE@30min: 5.4994, RMSE@60min: 5.3649
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(5.2884, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(8.8107, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(9.2473, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(7.5270, devic

  3%|▎         | 3/100 [02:13<1:11:54, 44.48s/it]

y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(5.1653, device='cuda:0', grad_fn=<SqrtBackward0>)
Epoch 2 - Gradient norm before unscaling: inf
Epoch 2 - Gradient norm after unscaling: nan
Epoch 2, LR: 0.010000, Loss: 9.0178, RMSE@15min: 5.1653, RMSE@30min: 5.4994, RMSE@60min: 5.3649
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(5.2884, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(8.8107, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(9.2473, device='cuda:0', grad_fn=<SqrtBackward0>)
y_hat device: cuda:0
batch_y device: cuda:0
y_hat dtype: torch.float32
batch_y dtype: torch.float32
Masked mse:  tensor(7.5270, devic

  3%|▎         | 3/100 [02:32<1:21:57, 50.70s/it]


KeyboardInterrupt: 

In [41]:
batch_cost

tensor(3.9042, device='cuda:0', requires_grad=True)

In [None]:
model.eval()
cost = 0
rmse_15_total = 0
rmse_30_total = 0
rmse_60_total = 0
mae_15_total = 0
mae_30_total = 0
mae_60_total = 0
mape_15_total = 0
mape_30_total = 0
mape_60_total = 0

test_snapshots = list(test_dataset)
batch_size = 512  # Can use larger batch size for evaluation since we don't store gradients

with torch.no_grad(), torch.cuda.amp.autocast():
    for i in range(0, len(test_snapshots), batch_size):
        batch_snapshots = test_snapshots[i:i+batch_size]

        # Process entire batch at once
        batch_x = torch.stack([df.scaler.normalize(snapshot.x) for snapshot in batch_snapshots]).to(device)
        batch_edge_index = batch_snapshots[0].edge_index.to(device)
        batch_edge_attr = batch_snapshots[0].edge_attr.to(device)
        batch_y = torch.stack([snapshot.y[:, 0, :] for snapshot in batch_snapshots]).to(device)

        # Get predictions for all horizons
        y_hat = model(batch_x, batch_edge_index, batch_edge_attr)
        y_hat_15 = y_hat[:, :, 2]  # 15min prediction
        y_hat_30 = y_hat[:, :, 5]  # 30min prediction
        y_hat_60 = y_hat[:, :, 11] # 60min prediction

        # Denormalize predictions and targets
        y_hat_15 = df.scaler.unnormalize_y(y_hat_15)
        y_hat_30 = df.scaler.unnormalize_y(y_hat_30)
        y_hat_60 = df.scaler.unnormalize_y(y_hat_60)

        # Calculate RMSE, MAE, and MAPE for each horizon
        rmse_15 = torch.sqrt(torch.mean((y_hat_15 - batch_y_15)**2))
        rmse_30 = torch.sqrt(torch.mean((y_hat_30 - batch_y_30)**2))
        rmse_60 = torch.sqrt(torch.mean((y_hat_60 - batch_y_60)**2))

        mae_15 = torch.mean(torch.abs(y_hat_15 - batch_y_15))
        mae_30 = torch.mean(torch.abs(y_hat_30 - batch_y_30))
        mae_60 = torch.mean(torch.abs(y_hat_60 - batch_y_60))

        mape_15 = torch.mean(torch.abs((y_hat_15 - batch_y_15) / batch_y_15)) * 100
        mape_30 = torch.mean(torch.abs((y_hat_30 - batch_y_30) / batch_y_30)) * 100
        mape_60 = torch.mean(torch.abs((y_hat_60 - batch_y_60) / batch_y_60)) * 100

        # Accumulate metrics for logging
        rmse_15_total += rmse_15.item()
        rmse_30_total += rmse_30.item()
        rmse_60_total += rmse_60.item()
        mae_15_total += mae_15.item()
        mae_30_total += mae_30.item()
        mae_60_total += mae_60.item()
        mape_15_total += mape_15.item()
        mape_30_total += mape_30.item()
        mape_60_total += mape_60.item()

        # Calculate batch cost (could use MSE as a proxy here)
        batch_cost = torch.mean((y_hat - batch_y)**2)
        cost += batch_cost.item()

        del batch_x, batch_edge_index, batch_edge_attr, y_hat, batch_y
        torch.cuda.empty_cache()

# Calculate average metrics
num_batches = (len(test_snapshots) + batch_size - 1) // batch_size
cost /= num_batches

# Print the evaluation metrics
print(f"Evaluation Loss: {cost:.4f}")
print(f"RMSE@15min: {rmse_15_total / num_batches:.4f}")
print(f"RMSE@30min: {rmse_30_total / num_batches:.4f}")
print(f"RMSE@60min: {rmse_60_total / num_batches:.4f}")
print(f"MAE@15min: {mae_15_total / num_batches:.4f}")
print(f"MAE@30min: {mae_30_total / num_batches:.4f}")
print(f"MAE@60min: {mae_60_total / num_batches:.4f}")
print(f"MAPE@15min: {mape_15_total / num_batches:.4f}%")
print(f"MAPE@30min: {mape_30_total / num_batches:.4f}%")
print(f"MAPE@60min: {mape_60_total / num_batches:.4f}%")

In [None]:
import math

model.eval()
cost = 0
test_snapshots = list(test_dataset)
batch_size = 512  # Can use larger batch size for evaluation since we don't store gradients

predictions = []
truths = []
with torch.no_grad(), torch.cuda.amp.autocast():
    for i in range(0, len(test_snapshots), batch_size):
        batch_snapshots = test_snapshots[i:i+batch_size]

        # Process entire batch at once
        batch_x = torch.stack([df.scaler.transform(snapshot.x) for snapshot in batch_snapshots]).to(device)
        batch_edge_index = batch_snapshots[0].edge_index.to(device)
        batch_edge_attr = batch_snapshots[0].edge_attr.to(device)
        batch_y = torch.stack([snapshot.y[:, 0, :] for snapshot in batch_snapshots]).to(device)

        # Forward pass on entire batch
        y_hat = model(batch_x, batch_edge_index, batch_edge_attr)

        # Denormalize predictions and ground truth
        y_hat_denorm = df.scaler.inverse_transform(y_hat)
        y_true_denorm = df.scaler.inverse_transform(batch_y)

        # Calculate MSE on denormalized values
        batch_cost = torch.mean((y_hat_denorm - y_true_denorm)**2)
        cost += batch_cost.item()

        del batch_x, batch_edge_index, batch_edge_attr, y_hat, batch_y
        torch.cuda.empty_cache()

num_batches = (len(test_snapshots) + batch_size - 1) // batch_size
mse = cost / num_batches
rmse = math.sqrt(mse)
print("MSE: {:.4f}".format(mse))
print("RMSE: {:.4f}".format(rmse))

In [None]:
len(list(train_dataset))

36457

In [None]:
print(y_hat.shape,snapshot.y.shape)

torch.Size([325, 12]) torch.Size([325, 2, 12])


In [None]:
print(snapshot.x.shape, snapshot.edge_index.shape, snapshot.edge_attr.shape)

torch.Size([207, 2, 12]) torch.Size([2, 1722]) torch.Size([1722])
