In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GATv2Conv, GraphConv
from torch_geometric.nn.norm import BatchNorm
import torch.nn as nn
import os
import pandas as pd
import numpy as np
import itertools
from math import radians, sin, cos, sqrt, atan2
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


  from .autonotebook import tqdm as notebook_tqdm


# <div align="center">**ZE EPIC DATA**</div>

In [2]:
# Directory containing the CSV files
directory = '../processed-final-data-2'

# Dictionary to store the dataframes
dataframes = {}

# Iterate over all files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        # Extract the file name without extension and convert it to int
        key = int(os.path.splitext(filename)[0])
        
        # Read the CSV file into a dataframe
        df = pd.read_csv(os.path.join(directory, filename))
        dataframes[key] = df


# Print the dictionary keys to verify
print(dataframes.keys())

dict_keys([72790024141, 72785524114, 72789094197, 72793024233, 72785794129, 72788594266, 72797624217, 72785024157, 72797094240, 72798594276, 72792424223, 72792894263, 72781024243, 72781524237, 72788324220, 72698824219, 72793894274, 74206024207, 72782724110, 72793724222, 72792594227, 72782594239, 72794504205, 72792394225, 72784524163, 72792024227, 72785694176])


In [3]:
# Dictionaries to store the training and testing dataframes
train_dataframes = {}
test_dataframes = {}

# Split each dataframe into training and testing sets
for key, df in dataframes.items():
    train_df, test_df = train_test_split(df, test_size=0.2, shuffle=False)
    train_dataframes[key] = train_df
    test_dataframes[key] = test_df
    # Check if the maximum index of the training set is less than the minimum index of the testing set
    if train_df.index.max() < test_df.index.min():
        print(f"{key}: Sequential split verified.")
    else:
        print(f"{key}: Sequential split NOT verified.")

# Print the keys of the training and testing dictionaries to verify
print(train_dataframes.keys())
print(test_dataframes.keys())

72790024141: Sequential split verified.
72785524114: Sequential split verified.
72789094197: Sequential split verified.
72793024233: Sequential split verified.
72785794129: Sequential split verified.
72788594266: Sequential split verified.
72797624217: Sequential split verified.
72785024157: Sequential split verified.
72797094240: Sequential split verified.
72798594276: Sequential split verified.
72792424223: Sequential split verified.
72792894263: Sequential split verified.
72781024243: Sequential split verified.
72781524237: Sequential split verified.
72788324220: Sequential split verified.
72698824219: Sequential split verified.
72793894274: Sequential split verified.
74206024207: Sequential split verified.
72782724110: Sequential split verified.
72793724222: Sequential split verified.
72792594227: Sequential split verified.
72782594239: Sequential split verified.
72794504205: Sequential split verified.
72792394225: Sequential split verified.
72784524163: Sequential split verified.


In [4]:
def create_node_features_sequences(dataframes):
    # Create a list to store the node features for each time step for input and desired output
    node_features_sequence_input = []
    node_features_sequence_output = []

    # Iterate over the rows of the dataframes (assuming all dataframes have the same number of rows)
    for i in range(len(next(iter(dataframes.values())))):
        if i == len(next(iter(dataframes.values()))) - 1:
            break
        # Create a list to store the features of all nodes at the current time step for input
        node_features_input = []
        # Create a list to store the features of all nodes at the next time step for output
        node_features_output = []

        # Iterate over each dataframe and extract the features at the current row for input
        # and the next row for output
        for key, df in dataframes.items():
            node_features_input.append((df.iloc[i].values - df.iloc[i].mean()) / df.iloc[i].std())
            node_features_output.append(df.iloc[i + 1].values)

        # Stack the features of all nodes to create a 2D array (num_nodes, num_features)
        node_features_sequence_input.append(np.stack(node_features_input))
        node_features_sequence_output.append(np.stack(node_features_output))

    # Convert the lists to numpy arrays (time_steps, num_nodes, num_features)
    node_features_sequence_input = np.array(node_features_sequence_input)
    node_features_sequence_output = np.array(node_features_sequence_output)

    # Convert the numpy arrays to torch tensors
    node_features_sequence_input = torch.tensor(node_features_sequence_input, dtype=torch.float)
    node_features_sequence_output = torch.tensor(node_features_sequence_output, dtype=torch.float)

    return node_features_sequence_input, node_features_sequence_output

# Call the function and print the shapes of the resulting tensors
node_features_sequence_input_train, node_features_sequence_output_train = create_node_features_sequences(train_dataframes)
node_features_sequence_input_test, node_features_sequence_output_test = create_node_features_sequences(test_dataframes)
print(node_features_sequence_input_train.shape)
print(node_features_sequence_output_test.shape)

torch.Size([1408, 27, 69])
torch.Size([352, 27, 69])


In [5]:
# Calculate mean and standard deviation of the training data
mean = node_features_sequence_input_train.mean(dim=(0, 1), keepdim=True)
std = node_features_sequence_input_train.std(dim=(0, 1), keepdim=True)

# Normalize the training and testing data
node_features_sequence_input_train = (node_features_sequence_input_train - mean) / std
node_features_sequence_input_test = (node_features_sequence_input_test - mean) / std

# Print the mean and standard deviation to verify
print("Mean:", mean)
print("Standard Deviation:", std)

Mean: tensor([[[-0.3190, -0.3226,  2.4857,  3.3285,  4.7139,  1.7727, -0.2059,
           1.8469, -0.3219, -0.0128,  0.1560,  2.9096, -0.3223,  1.8477,
          -0.3223, -0.3223, -0.3222, -0.3221, -0.3212, -0.3223, -0.3223,
          -0.3223, -0.3223, -0.3223, -0.3223, -0.2858, -0.3211, -0.3214,
          -0.3211, -0.3221, -0.3222, -0.3209, -0.3223, -0.3222, -0.3222,
          -0.3222, -0.3154, -0.3219, -0.3220, -0.3223, -0.3219, -0.3219,
          -0.3222, -0.3223, -0.3152, -0.2861, -0.3221, -0.3217, -0.3223,
          -0.3219, -0.3222, -0.3222, -0.3223, -0.2875, -0.3219, -0.3218,
          -0.3155, -0.3223, -0.3052, -0.3222, -0.3223, -0.3166, -0.3223,
          -0.3223, -0.3202, -0.3166, -0.3223, -0.3222, -0.3221]]])
Standard Deviation: tensor([[[0.0520, 0.0565, 0.5482, 0.9377, 1.4352, 0.2523, 0.1534, 0.2769,
          0.0145, 0.1039, 0.3005, 0.6277, 0.0144, 0.2778, 0.0143, 0.0143,
          0.0146, 0.0148, 0.0165, 0.0143, 0.0145, 0.0143, 0.0143, 0.0144,
          0.0144, 0.0332, 0.

# <div align="center">**ZE EPIC EDGE DATA**</div>

In [6]:
# Import the location-datamap.csv file as a dataframe
location_datamap_df = pd.read_csv('../location-datamap.csv')

# Print the first few rows of the dataframe to verify
print(location_datamap_df.head())

        STATION  LONGITUDE  LATITUDE  ELEVATION
0  7.279002e+10 -119.51551  47.30777      382.1
1  7.278552e+10 -117.65000  47.63333      750.1
2  7.278909e+10 -119.52091  48.46113      397.4
3  7.279302e+10 -122.31442  47.44467      112.5
4  7.278579e+10 -117.11581  46.74376      775.7


In [7]:
def haversine_distance(lat1, lon1, lat2, lon2, el1=0, el2=0):
    # Radius of the Earth in kilometers
    R = 6371.0

    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    # Difference in coordinates
    dlat = lat2 - lat1
    dlon = lon2 - lon1

    # Haversine formula
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    # Distance in kilometers
    distance = R * c

    # Elevation difference
    height = el2 - el1

    # Calculate the total distance considering elevation
    total_distance = sqrt(distance**2 + height**2)

    return total_distance

In [8]:
def create_edge_index(dataframes):
    edges = []
    edges_verifications = []
    keys = list(dataframes.keys())
    for i in range(len(keys)):
        for j in range(i + 1, len(keys)):
            if i != j:
                edges.append(([i, j]))
                edges.append(([j, i]))
                edges_verifications.append(([keys[i],keys[j],i, j]))
                edges_verifications.append(([keys[j],keys[i],j, i]))
    display(edges_verifications)
    return edges

edge_index = create_edge_index(dataframes)
display(edge_index)        

[[72790024141, 72785524114, 0, 1],
 [72785524114, 72790024141, 1, 0],
 [72790024141, 72789094197, 0, 2],
 [72789094197, 72790024141, 2, 0],
 [72790024141, 72793024233, 0, 3],
 [72793024233, 72790024141, 3, 0],
 [72790024141, 72785794129, 0, 4],
 [72785794129, 72790024141, 4, 0],
 [72790024141, 72788594266, 0, 5],
 [72788594266, 72790024141, 5, 0],
 [72790024141, 72797624217, 0, 6],
 [72797624217, 72790024141, 6, 0],
 [72790024141, 72785024157, 0, 7],
 [72785024157, 72790024141, 7, 0],
 [72790024141, 72797094240, 0, 8],
 [72797094240, 72790024141, 8, 0],
 [72790024141, 72798594276, 0, 9],
 [72798594276, 72790024141, 9, 0],
 [72790024141, 72792424223, 0, 10],
 [72792424223, 72790024141, 10, 0],
 [72790024141, 72792894263, 0, 11],
 [72792894263, 72790024141, 11, 0],
 [72790024141, 72781024243, 0, 12],
 [72781024243, 72790024141, 12, 0],
 [72790024141, 72781524237, 0, 13],
 [72781524237, 72790024141, 13, 0],
 [72790024141, 72788324220, 0, 14],
 [72788324220, 72790024141, 14, 0],
 [72790024

[[0, 1],
 [1, 0],
 [0, 2],
 [2, 0],
 [0, 3],
 [3, 0],
 [0, 4],
 [4, 0],
 [0, 5],
 [5, 0],
 [0, 6],
 [6, 0],
 [0, 7],
 [7, 0],
 [0, 8],
 [8, 0],
 [0, 9],
 [9, 0],
 [0, 10],
 [10, 0],
 [0, 11],
 [11, 0],
 [0, 12],
 [12, 0],
 [0, 13],
 [13, 0],
 [0, 14],
 [14, 0],
 [0, 15],
 [15, 0],
 [0, 16],
 [16, 0],
 [0, 17],
 [17, 0],
 [0, 18],
 [18, 0],
 [0, 19],
 [19, 0],
 [0, 20],
 [20, 0],
 [0, 21],
 [21, 0],
 [0, 22],
 [22, 0],
 [0, 23],
 [23, 0],
 [0, 24],
 [24, 0],
 [0, 25],
 [25, 0],
 [0, 26],
 [26, 0],
 [1, 2],
 [2, 1],
 [1, 3],
 [3, 1],
 [1, 4],
 [4, 1],
 [1, 5],
 [5, 1],
 [1, 6],
 [6, 1],
 [1, 7],
 [7, 1],
 [1, 8],
 [8, 1],
 [1, 9],
 [9, 1],
 [1, 10],
 [10, 1],
 [1, 11],
 [11, 1],
 [1, 12],
 [12, 1],
 [1, 13],
 [13, 1],
 [1, 14],
 [14, 1],
 [1, 15],
 [15, 1],
 [1, 16],
 [16, 1],
 [1, 17],
 [17, 1],
 [1, 18],
 [18, 1],
 [1, 19],
 [19, 1],
 [1, 20],
 [20, 1],
 [1, 21],
 [21, 1],
 [1, 22],
 [22, 1],
 [1, 23],
 [23, 1],
 [1, 24],
 [24, 1],
 [1, 25],
 [25, 1],
 [1, 26],
 [26, 1],
 [2, 3],
 [3, 

In [9]:
def create_edge_attributes(dataframes, location_datamap_df, edge_index):
    edge_attr = []
    edge_attr_verifications = []
    keys = list(dataframes.keys())
    for i, j in edge_index:
        station_i = location_datamap_df[location_datamap_df['STATION'] == keys[i]]
        station_j = location_datamap_df[location_datamap_df['STATION'] == keys[j]]
        lati, loni, eli = station_i['LATITUDE'].values[0], station_i['LONGITUDE'].values[0], station_i['ELEVATION'].values[0]
        latj, lonj, elj = station_j['LATITUDE'].values[0], station_j['LONGITUDE'].values[0], station_j['ELEVATION'].values[0]
        edge_attr_verifications.append(([keys[i], keys[j], i, j, haversine_distance(lati, loni, latj, lonj, eli, elj)]))
        edge_attr.append(haversine_distance(lati, loni, latj, lonj, eli, elj))
    display(edge_attr_verifications)
    return edge_attr

edge_attr = create_edge_attributes(dataframes, location_datamap_df, edge_index)
display(edge_attr)



[[72790024141, 72785524114, 0, 1, 395.4679457314314],
 [72785524114, 72790024141, 1, 0, 395.4679457314314],
 [72790024141, 72789094197, 0, 2, 129.15783117310457],
 [72789094197, 72790024141, 2, 0, 129.15783117310457],
 [72790024141, 72793024233, 0, 3, 342.5330553210191],
 [72793024233, 72790024141, 3, 0, 342.5330553210191],
 [72790024141, 72785794129, 0, 4, 438.10430559873856],
 [72785794129, 72790024141, 4, 0, 438.10430559873856],
 [72790024141, 72788594266, 0, 5, 432.0504921333856],
 [72788594266, 72790024141, 5, 0, 432.0504921333856],
 [72790024141, 72797624217, 0, 6, 437.11321302131137],
 [72797624217, 72790024141, 6, 0, 437.11321302131137],
 [72790024141, 72785024157, 0, 7, 369.0087247919059],
 [72785024157, 72790024141, 7, 0, 369.0087247919059],
 [72790024141, 72797094240, 0, 8, 503.7031148557305],
 [72797094240, 72790024141, 8, 0, 503.7031148557305],
 [72790024141, 72798594276, 0, 9, 455.6811328123654],
 [72798594276, 72790024141, 9, 0, 455.6811328123654],
 [72790024141, 7279242

[395.4679457314314,
 395.4679457314314,
 129.15783117310457,
 129.15783117310457,
 342.5330553210191,
 342.5330553210191,
 438.10430559873856,
 438.10430559873856,
 432.0504921333856,
 432.0504921333856,
 437.11321302131137,
 437.11321302131137,
 369.0087247919059,
 369.0087247919059,
 503.7031148557305,
 503.7031148557305,
 455.6811328123654,
 455.6811328123654,
 474.59654599988056,
 474.59654599988056,
 348.0950737502581,
 348.0950737502581,
 128.6980383337319,
 128.6980383337319,
 835.9738346074198,
 835.9738346074198,
 171.59537540199554,
 171.59537540199554,
 384.56540064951065,
 384.56540064951065,
 373.58764051749137,
 373.58764051749137,
 362.1843030623904,
 362.1843030623904,
 32.53657446575969,
 32.53657446575969,
 306.4908073667999,
 306.4908073667999,
 404.50991220772727,
 404.50991220772727,
 52.81240627430448,
 52.81240627430448,
 404.8085057270694,
 404.8085057270694,
 505.4288197147213,
 505.4288197147213,
 286.0836462058196,
 286.0836462058196,
 412.6227567328569,
 412

In [10]:
# Convert edge_index to a torch tensor
edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()

# Print the shape to verify
print(edge_index.shape)
print(edge_index)

torch.Size([2, 702])
tensor([[ 0,  1,  0,  ..., 26, 25, 26],
        [ 1,  0,  2,  ..., 24, 26, 25]])


In [11]:
# Convert edge_attr to a torch tensor
edge_attr_tensor = torch.tensor(edge_attr, dtype=torch.float)

# Min-Max normalization to scale the values to the range [0, 1]
edge_attr_min = edge_attr_tensor.min()
edge_attr_max = edge_attr_tensor.max()
edge_attr = (edge_attr_tensor - edge_attr_min) / (edge_attr_max - edge_attr_min)

# Print the normalized edge_attr tensor to verify
print(edge_attr.shape)
print(edge_attr)


torch.Size([702])
tensor([0.3146, 0.3146, 0.0928, 0.0928, 0.2705, 0.2705, 0.3501, 0.3501, 0.3450,
        0.3450, 0.3492, 0.3492, 0.2925, 0.2925, 0.4047, 0.4047, 0.3647, 0.3647,
        0.3805, 0.3805, 0.2751, 0.2751, 0.0924, 0.0924, 0.6814, 0.6814, 0.1281,
        0.1281, 0.3055, 0.3055, 0.2963, 0.2963, 0.2869, 0.2869, 0.0123, 0.0123,
        0.2405, 0.2405, 0.3221, 0.3221, 0.0292, 0.0292, 0.3223, 0.3223, 0.4061,
        0.4061, 0.2235, 0.2235, 0.3289, 0.3289, 0.2119, 0.2119, 0.3101, 0.3101,
        0.5912, 0.5912, 0.0767, 0.0767, 0.6506, 0.6506, 0.6535, 0.6535, 0.0133,
        0.0133, 0.7057, 0.7057, 0.6728, 0.6728, 0.7020, 0.7020, 0.5890, 0.5890,
        0.3982, 0.3982, 0.4313, 0.4313, 0.2472, 0.2472, 0.6211, 0.6211, 0.6176,
        0.6176, 0.6086, 0.6086, 0.3318, 0.3318, 0.5505, 0.5505, 0.6388, 0.6388,
        0.3348, 0.3348, 0.6399, 0.6399, 0.7233, 0.7233, 0.5312, 0.5312, 0.6500,
        0.6500, 0.1164, 0.1164, 0.2937, 0.2937, 0.3687, 0.3687, 0.3454, 0.3454,
        0.3328, 0.3328

# <div align="center">**ZE EPIC GPU IMPORT**</div>

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [13]:
# Move data to device
edge_index = edge_index.to(device)
edge_attr = edge_attr.to(device)

# <div align="center">**ZE EPIC BATCHING**</div>

In [14]:
batch_size = node_features_sequence_input_train.shape[0] //2 # Adjust this value based on your GPU memory capacity
# Calculate the number of full batches
num_full_batches_train = (node_features_sequence_input_train.size(0) // batch_size) * batch_size

# Trim the input and output tensors to have a size divisible by the batch size
trimmed_input_train = node_features_sequence_input_train[:num_full_batches_train]
trimmed_output_train = node_features_sequence_output_train[:num_full_batches_train]

# Create batches of data
batched_input_train = trimmed_input_train.view(-1, batch_size, trimmed_input_train.size(1), trimmed_input_train.size(2))
batched_output_train = trimmed_output_train.view(-1, batch_size, trimmed_output_train.size(1), trimmed_output_train.size(2))

# Adjust the number of batches
num_batches_train = batched_input_train.size(0)

# Print the batch size and number of full batches to verify
print("Batch size:", batch_size)
print("Number of full batches:", num_full_batches_train)

print("Batched input train dimensionality:", batched_input_train.shape)
print("Batched output train dimensionality:", batched_output_train.shape)
print("Batched input train dimensionality:", batched_input_train[0].shape)
print("Batched output train dimensionality:", batched_output_train[0].shape)

# Check for NaN or Inf in input data
print(torch.isnan(batched_input_train).any())
print(torch.isinf(batched_input_train).any())

# Check for NaN or Inf in output data
print(torch.isnan(batched_output_train).any())
print(torch.isinf(batched_output_train).any())

Batch size: 704
Number of full batches: 1408
Batched input train dimensionality: torch.Size([2, 704, 27, 69])
Batched output train dimensionality: torch.Size([2, 704, 27, 69])
Batched input train dimensionality: torch.Size([704, 27, 69])
Batched output train dimensionality: torch.Size([704, 27, 69])
tensor(False)
tensor(False)
tensor(False)
tensor(False)


In [15]:
batch_size = node_features_sequence_input_test.shape[0] // 4# Adjust this value based on your GPU memory capacity
# Calculate the number of full batches
num_full_batches_test = (node_features_sequence_input_test.size(0) // batch_size) * batch_size

# Trim the input and output tensors to have a size divisible by the batch size
trimmed_input_test = node_features_sequence_input_test[:num_full_batches_test]
trimmed_output_test = node_features_sequence_output_test[:num_full_batches_test]

# Create batches of data
batched_input_test = trimmed_input_test.view(-1, batch_size, trimmed_input_test.size(1), trimmed_input_test.size(2))
batched_output_test = trimmed_output_test.view(-1, batch_size, trimmed_output_test.size(1), trimmed_output_test.size(2))

# Adjust the number of batches
num_batches_test = batched_input_test.size(0)
print("Batch size:", batch_size)
print("Number of full batches:", num_full_batches_train)

print("Batched input test dimensionality:", batched_input_test.shape)
print("Batched output test dimensionality:", batched_output_test.shape)


Batch size: 88
Number of full batches: 1408
Batched input test dimensionality: torch.Size([4, 88, 27, 69])
Batched output test dimensionality: torch.Size([4, 88, 27, 69])


# <div align="center">**ZE EPIC MODEL**</div>

In [16]:
# Transformer model for the hybrid model
# Input Dimension: [batch_length ,num_nodes, num_features]
# Output Dimension: [batch_length, num_nodes, hidden_channels]
class TransformerModule(nn.Module):
    def __init__(self, features_channels, out_channels, transformer_layers):
        super(TransformerModule, self).__init__()
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=features_channels, nhead=features_channels, batch_first=True),
            num_layers=transformer_layers
        )
        self.output_linear = nn.Linear(features_channels, out_channels)

    def forward(self, x):
        temporal_outputs = self.transformer_encoder(x)  # Shape: [num_timesteps, num_nodes, hidden_channels]
        x = self.output_linear(temporal_outputs)  # Shape: [num_timesteps, num_nodes, out_channels]
        return x

In [17]:
# GNN module for the hybrid model
# Input Dimension: [batch_length ,num_nodes, num_features]
# Output Dimension: [batch_length, num_nodes, hidden_channels]
class GNNModule(torch.nn.Module):
    def __init__(self, features_channels, hidden_channels, edge_in_channels):
        super(GNNModule, self).__init__()
        self.conv1 = GATv2Conv(features_channels, hidden_channels, edge_dim=edge_in_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GraphConv(hidden_channels, hidden_channels)
        self.batch_norm1 = BatchNorm(hidden_channels)
        self.batch_norm2 = BatchNorm(hidden_channels)
        self.batch_norm3 = BatchNorm(hidden_channels)
        self.dropout = torch.nn.Dropout(p=0.5)

    def gnn_forward(self, x, edge_index, edge_attr):
        edge_weight = 1.0 / (edge_attr + 1e-6)
        x1 = self.conv1(x, edge_index, edge_attr=edge_weight)
        x1 = self.batch_norm1(x1)
        x1 = F.relu(x1)
        x1 = self.dropout(x1)

        # print("Gat",x1.shape)
        # print(torch.isnan(x1).any())
        # print(torch.isinf(x1).any())

        x2 = self.conv2(x1, edge_index, edge_weight=edge_attr)
        x2 = self.batch_norm2(x2)
        x2 = F.relu(x2)        
        x2 = self.dropout(x2)

        # print("gnc",x2.shape)
        # print(torch.isnan(x2).any())
        # print(torch.isinf(x2).any())

        x3 = self.conv3(x2, edge_index)
        x3 = self.batch_norm3(x3)
        x3 = F.relu(x3)
        x3 = self.dropout(x3)

        # print("graph",x3.shape)
        # print(torch.isnan(x3).any())
        # print(torch.isinf(x3).any())

        x = x1 + x2 + x3  # Residual connection

        # print("residual",x.shape)
        # print(torch.isnan(x).any())
        # print(torch.isinf(x).any())

        return x  # Shape: [num_nodes, hidden_channels]

    def forward(self, x, edge_index, edge_attr):
        spatial_outputs = []
        for t in range(x.size(0)):
            x_t = self.gnn_forward(x[t], edge_index, edge_attr)
            spatial_outputs.append(x_t)

        x = torch.stack(spatial_outputs, dim=0)  # Shape: [num_timesteps, num_nodes, hidden_channels]
        
        return x

In [18]:
# Hybrid model combining the Transformer and GNN modules
# Input Dimension: [batch_length ,num_nodes, num_features]
# Output Dimension: [batch_length, num_nodes, out_channels]
class HybridModel_Transformer(torch.nn.Module):
    def __init__(self, in_channels, out_channels, features_channels, edge_in_channels, hidden_channels, transformer_layers):
        super(HybridModel_Transformer, self).__init__()
        self.transformer = TransformerModule(features_channels, hidden_channels, transformer_layers)
        self.gnn = GNNModule(hidden_channels, hidden_channels, edge_in_channels)
        self.output_layer = torch.nn.Linear(hidden_channels, features_channels)

    def forward(self, x, edge_index, edge_attr):
        # Project the input features to the hidden dimension
        # Transform the input features using the transformer
        x = self.transformer(x)

        # print("transformer forward",x.shape)
        # print(torch.isnan(x).any())
        # print(torch.isinf(x).any())

        # Pass the transformed features to the GNN
        x = self.gnn(x, edge_index, edge_attr)

        # print("gnn",x.shape)
        # print(torch.isnan(x).any())
        # print(torch.isinf(x).any())

        # Pass the GNN outputs through a linear layer
        x = self.output_layer(x)
        return x

In [19]:
class HybridModel_GNN(torch.nn.Module):
    def __init__(self, in_channels, out_channels, features_channels, edge_in_channels, hidden_channels, transformer_layers):
        super(HybridModel_GNN, self).__init__()
        self.gnn = GNNModule(features_channels, hidden_channels, edge_in_channels)
        self.transformer = TransformerModule(hidden_channels, features_channels, transformer_layers)

    def forward(self, x, edge_index, edge_attr):
        # Apply GNN first
        x = self.gnn(x, edge_index, edge_attr)

        # Apply transformer
        x = self.transformer(x)
        return x 

In [20]:
class HybridModel_Parallel(torch.nn.Module):
    def __init__(self, in_channels, out_channels, features_channels, edge_in_channels, hidden_channels, transformer_layers):
        super(HybridModel_Parallel, self).__init__()
        self.gnn = GNNModule(features_channels, hidden_channels, edge_in_channels)
        self.transformer = TransformerModule(features_channels, hidden_channels, transformer_layers)
        self.output_layer = torch.nn.Linear(hidden_channels, features_channels)

    def forward(self, x, edge_index, edge_attr):
        # Project the input features to the hidden dimension
        # Transform the input features using the transformer
        x_gnn = self.gnn(x, edge_index, edge_attr)
        x_transformer = self.transformer(x)

        # Pass the transformed features to the GNN
        x = x_gnn + x_transformer

        # Pass the GNN outputs through a linear layer
        x = self.output_layer(x)

        
        return x
        

In [21]:
class SingleModel_Transformer(torch.nn.Module):
    def __init__(self, in_channels, out_channels, features_channels, edge_in_channels, hidden_channels, transformer_layers):
        super(SingleModel_Transformer, self).__init__()
        self.transformer = TransformerModule(features_channels, hidden_channels, transformer_layers)
        self.output_layer = torch.nn.Linear(hidden_channels, features_channels)

    def forward(self, x, edge_index, edge_attr):
        # Project the input features to the hidden dimension
        # Transform the input features using the transformer
        x = self.transformer(x)

        # Pass the GNN outputs through a linear layer
        x = self.output_layer(x)
        return x

In [22]:
class SingleModel_GNN(torch.nn.Module):
    def __init__(self, in_channels, out_channels, features_channels, edge_in_channels, hidden_channels, transformer_layers):
        super(SingleModel_GNN, self).__init__()
        self.gnn = GNNModule(features_channels, hidden_channels, edge_in_channels)
        self.output_layer = torch.nn.Linear(hidden_channels, features_channels)

    def forward(self, x, edge_index, edge_attr):
        # Apply GNN first
        x = self.gnn(x, edge_index, edge_attr)

        # Apply transformer
        x = self.output_layer(x)
        return x

# <div align="center">**ZE EPIC MODEL SETUP**</div>

In [None]:
# Define the parameters
num_epochs = 500  # Adjust the number of epochs as needed
learning_rate = 0.01
scheduler_mode = 'min'
scheduler_factor = 0.8
scheduler_patience = 5

#Print all parameter 
print("Number of epochs:", num_epochs)
print("Learning rate:", learning_rate)
print("Scheduler mode:", scheduler_mode)
print("Scheduler factor:", scheduler_factor)
print("Scheduler patience:", scheduler_patience)

# Define the loss function
loss_fn = torch.nn.MSELoss()

Number of epochs: 500
Learning rate: 0.01
Scheduler mode: min
Scheduler factor: 0.5
Scheduler patience: 5


# <div align="center">**ZE EPIC MODEL TRAIN**</div>

In [24]:
def train_model(model, optimizer, scheduler, loss_fn, num_epochs, num_batches_train, batched_input_train, batched_output_train, device):
    epoch_losses = []
    epoch_maes = []

    # Set the model to training mode
    model.train()

    for epoch in range(num_epochs):
        total_loss = 0
        total_mae = 0

        optimizer.zero_grad()

        for b in range(num_batches_train):
            # Get the batched node features and desired output
            node_features_batch = batched_input_train[b].to(device)
            desired_output_batch = batched_output_train[b].to(device)

            # print("Node features batch: ", node_features_batch.shape)
            # print(torch.isnan(node_features_batch).any())
            # print(torch.isinf(node_features_batch).any())
            # Forward pass with batch_size parameter
            model_output_batch = model(node_features_batch, edge_index, edge_attr)
            
            # print("Model be outputing: ", model_output_batch.shape)
            # print(torch.isnan(model_output_batch).any())
            # print(torch.isinf(model_output_batch).any())

            # print("Desired output: ", desired_output_batch.shape)
            # print(torch.isnan(desired_output_batch).any())
            # print(torch.isinf(desired_output_batch).any())

            # Compute loss
            loss = loss_fn(model_output_batch, desired_output_batch)

            # Backward pass
            loss.backward()
            # Update the optimizer
            optimizer.step()
            optimizer.zero_grad()

            # Compute MAE for debugging
            mae = torch.mean(torch.abs(model_output_batch - desired_output_batch))

            total_loss += loss.item()
            total_mae += mae.item()

        average_loss = total_loss / num_batches_train
        average_mae = total_mae / num_batches_train
        epoch_losses.append(average_loss)
        epoch_maes.append(average_mae)

        scheduler.step(average_loss)

        current_lr = optimizer.param_groups[0]['lr']
        current_patience = scheduler.num_bad_epochs
        print(f"Epoch {epoch+1}, Average Loss: {average_loss}, Average MAE: {average_mae}, Learning Rate: {current_lr}, Current Patience: {current_patience}")
        torch.cuda.empty_cache()

    return epoch_losses, epoch_maes


In [25]:
# Model Train Toggle
run_model_parallel = True
run_model_transformer_gnn = True
run_model_gnn_transformer = True
run_model_single_transformer = True
run_model_single_gnn = True

In [26]:
from datetime import datetime

def save_model_data(model, optimizer, scheduler, epoch_losses, epoch_maes, model_name):
    # Create the directory if it doesn't exist
    current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
    folder_path = f'saved_models/{model_name}_{current_time}'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Save the model, optimizer, and scheduler states
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
    }, os.path.join(folder_path, f'{model_name}.pt'))

    # Save epoch losses and MAEs to a CSV file
    df = pd.DataFrame({
        'epoch': range(1, len(epoch_losses) + 1),
        'loss': epoch_losses,
        'mae': epoch_maes
    })
    df.to_csv(os.path.join(folder_path, f'{model_name}_metrics.csv'), index=False)
    print(f"Model data saved in {folder_path}")

In [27]:

if run_model_parallel:
    # Call the function
    # Define the parameters
    in_channels = batched_input_train.shape[2]
    features_channels = batched_input_train.shape[3]
    out_channels = in_channels  
    edge_in_channels = 1
    hidden_channels = 256
    transformer_layers = 3

    # Print the parameters to verify
    print("Input Channels:", in_channels)
    print("Features Channels:", features_channels)
    print("Output Channels:", out_channels)
    print("Edge Input Channels:", edge_in_channels)
    print("Hidden Channels:", hidden_channels)
    print("Transformer Layers:", transformer_layers)

    model_parallel = HybridModel_Parallel(in_channels, out_channels, features_channels, edge_in_channels, hidden_channels, transformer_layers).to(device)

    # Define the optimizer and loss function for model_parallel
    optimizer_parallel = torch.optim.Adam(model_parallel.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler_parallel = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_parallel, mode=scheduler_mode, factor=scheduler_factor, patience=scheduler_patience, verbose=True, min_lr=1e-5)

    parallel_epoch_losses, parallel_epoch_maes = train_model(model_parallel, optimizer_parallel, scheduler_parallel, loss_fn, num_epochs, num_batches_train, batched_input_train, batched_output_train, device)
    save_model_data(model_parallel, optimizer_parallel, scheduler_parallel, parallel_epoch_losses, parallel_epoch_maes, 'model_parallel')
    del model_parallel

    torch.cuda.empty_cache()

Input Channels: 27
Features Channels: 69
Output Channels: 27
Edge Input Channels: 1
Hidden Channels: 256
Transformer Layers: 3




Epoch 1, Average Loss: 201.84803009033203, Average MAE: 5.592649936676025, Learning Rate: 0.01, Current Patience: 0
Epoch 2, Average Loss: 117.43497848510742, Average MAE: 4.225418329238892, Learning Rate: 0.01, Current Patience: 0
Epoch 3, Average Loss: 43.20058250427246, Average MAE: 2.8258520364761353, Learning Rate: 0.01, Current Patience: 0
Epoch 4, Average Loss: 39.11910629272461, Average MAE: 3.169965624809265, Learning Rate: 0.01, Current Patience: 0
Epoch 5, Average Loss: 28.9498291015625, Average MAE: 2.519943118095398, Learning Rate: 0.01, Current Patience: 0
Epoch 6, Average Loss: 19.09143304824829, Average MAE: 2.029530644416809, Learning Rate: 0.01, Current Patience: 0
Epoch 7, Average Loss: 24.623035430908203, Average MAE: 2.258751153945923, Learning Rate: 0.01, Current Patience: 1
Epoch 8, Average Loss: 21.450279235839844, Average MAE: 1.9795401692390442, Learning Rate: 0.01, Current Patience: 2
Epoch 9, Average Loss: 17.896961212158203, Average MAE: 1.7529076933860779,

In [28]:
# Toggle to run the code
if run_model_transformer_gnn:
    # Call the function
    # Define the parameters
    in_channels = batched_input_train.shape[2]
    features_channels = batched_input_train.shape[3]
    out_channels = in_channels  
    edge_in_channels = 1
    hidden_channels = 256
    transformer_layers = 3

    # Print the parameters to verify
    print("Input Channels:", in_channels)
    print("Features Channels:", features_channels)
    print("Output Channels:", out_channels)
    print("Edge Input Channels:", edge_in_channels)
    print("Hidden Channels:", hidden_channels)
    print("Transformer Layers:", transformer_layers)

    model_transformer_gnn = HybridModel_Transformer(in_channels, out_channels, features_channels, edge_in_channels, hidden_channels, transformer_layers).to(device)

    # Define the optimizer and loss function for model_transformer_gnn
    optimizer_transformer_gnn = torch.optim.Adam(model_transformer_gnn.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler_transformer_gnn = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_transformer_gnn, mode=scheduler_mode, factor=scheduler_factor, patience=scheduler_patience, verbose=True, min_lr=1e-5)

    transformer_gnn_epoch_losses, transformer_gnn_epoch_maes = train_model(model_transformer_gnn, optimizer_transformer_gnn, scheduler_transformer_gnn, loss_fn, num_epochs, num_batches_train, batched_input_train, batched_output_train, device)
    save_model_data(model_transformer_gnn, optimizer_transformer_gnn, scheduler_transformer_gnn, transformer_gnn_epoch_losses, transformer_gnn_epoch_maes, 'model_transformer_gnn')
    del model_transformer_gnn
    torch.cuda.empty_cache()

Input Channels: 27
Features Channels: 69
Output Channels: 27
Edge Input Channels: 1
Hidden Channels: 256
Transformer Layers: 3
Epoch 1, Average Loss: 208.77224731445312, Average MAE: 5.650066137313843, Learning Rate: 0.01, Current Patience: 0
Epoch 2, Average Loss: 156.1156005859375, Average MAE: 4.569018602371216, Learning Rate: 0.01, Current Patience: 0
Epoch 3, Average Loss: 112.39171600341797, Average MAE: 4.315544128417969, Learning Rate: 0.01, Current Patience: 0
Epoch 4, Average Loss: 75.79342651367188, Average MAE: 2.9287009239196777, Learning Rate: 0.01, Current Patience: 0
Epoch 5, Average Loss: 53.12350273132324, Average MAE: 2.828753352165222, Learning Rate: 0.01, Current Patience: 0
Epoch 6, Average Loss: 36.7504768371582, Average MAE: 2.154445171356201, Learning Rate: 0.01, Current Patience: 0
Epoch 7, Average Loss: 29.10667896270752, Average MAE: 2.307546854019165, Learning Rate: 0.01, Current Patience: 0
Epoch 8, Average Loss: 27.631205558776855, Average MAE: 2.21701824

In [None]:
# Toggle to run the code


if run_model_gnn_transformer:
    # Define the parameters
    in_channels = batched_input_train.shape[2]
    features_channels = batched_input_train.shape[3]
    out_channels = in_channels  
    edge_in_channels = 1
    hidden_channels = 256
    transformer_layers = 2

    # Print the parameters to verify
    print("Input Channels:", in_channels)
    print("Features Channels:", features_channels)
    print("Output Channels:", out_channels)
    print("Edge Input Channels:", edge_in_channels)
    print("Hidden Channels:", hidden_channels)
    print("Transformer Layers:", transformer_layers)

    model_gnn_transformer = HybridModel_GNN(in_channels, out_channels, features_channels, edge_in_channels, hidden_channels, transformer_layers).to(device)

    # Define the optimizer and loss function for model_gnn_transformer
    optimizer_gnn_transformer = torch.optim.Adam(model_gnn_transformer.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler_gnn_transformer = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_gnn_transformer, mode=scheduler_mode, factor=scheduler_factor, patience=scheduler_patience, verbose=True, min_lr=1e-5)

    gnn_transformer_epoch_losses, gnn_transformer_epoch_maes = train_model(model_gnn_transformer, optimizer_gnn_transformer, scheduler_gnn_transformer, loss_fn, num_epochs, num_batches_train, batched_input_train, batched_output_train, device)
    save_model_data(model_gnn_transformer, optimizer_gnn_transformer, scheduler_gnn_transformer, gnn_transformer_epoch_losses, gnn_transformer_epoch_maes, 'model_gnn_transformer')
    del model_gnn_transformer
    torch.cuda.empty_cache()


Input Channels: 27
Features Channels: 69
Output Channels: 27
Edge Input Channels: 1
Hidden Channels: 256
Transformer Layers: 2
Epoch 1, Average Loss: 204.65093231201172, Average MAE: 4.737619876861572, Learning Rate: 0.01, Current Patience: 0
Epoch 2, Average Loss: 165.34148406982422, Average MAE: 4.6039323806762695, Learning Rate: 0.01, Current Patience: 0
Epoch 3, Average Loss: 132.86632537841797, Average MAE: 3.9697035551071167, Learning Rate: 0.01, Current Patience: 0
Epoch 4, Average Loss: 103.73302459716797, Average MAE: 3.5436246395111084, Learning Rate: 0.01, Current Patience: 0
Epoch 5, Average Loss: 77.69018936157227, Average MAE: 2.849448323249817, Learning Rate: 0.01, Current Patience: 0
Epoch 6, Average Loss: 56.07606506347656, Average MAE: 2.299950122833252, Learning Rate: 0.01, Current Patience: 0
Epoch 7, Average Loss: 39.94814872741699, Average MAE: 1.7724019289016724, Learning Rate: 0.01, Current Patience: 0
Epoch 8, Average Loss: 29.551669120788574, Average MAE: 1.67

In [None]:
if run_model_single_transformer:
    # Define the parameters
    in_channels = batched_input_train.shape[2]
    features_channels = batched_input_train.shape[3]
    out_channels = in_channels  
    edge_in_channels = 1
    hidden_channels = 512
    transformer_layers = 8

    # Print the parameters to verify
    print("Input Channels:", in_channels)
    print("Features Channels:", features_channels)
    print("Output Channels:", out_channels)
    print("Edge Input Channels:", edge_in_channels)
    print("Hidden Channels:", hidden_channels)
    print("Transformer Layers:", transformer_layers)

    model_single_transformer = SingleModel_Transformer(in_channels, out_channels, features_channels, edge_in_channels, hidden_channels, transformer_layers).to(device)

    # Define the optimizer and loss function for model_transformer
    optimizer_single_transformer = torch.optim.Adam(model_single_transformer.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler_single_transformer = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_single_transformer, mode=scheduler_mode, factor=scheduler_factor, patience=scheduler_patience, verbose=True, min_lr=1e-5)

    single_transformer_epoch_losses, single_transformer_epoch_maes = train_model(model_single_transformer, optimizer_single_transformer, scheduler_single_transformer, loss_fn, num_epochs, num_batches_train, batched_input_train, batched_output_train, device)
    save_model_data(model_single_transformer, optimizer_single_transformer, scheduler_single_transformer, single_transformer_epoch_losses, single_transformer_epoch_maes, 'model_single_transformer')
    del model_single_transformer
    torch.cuda.empty_cache()

In [None]:
if run_model_single_gnn:
    torch.cuda.empty_cache()
    # Define the parameters
    in_channels = batched_input_train.shape[2]
    features_channels = batched_input_train.shape[3]
    out_channels = in_channels  
    edge_in_channels = 1
    hidden_channels = 512
    transformer_layers = 3

    # Print the parameters to verify
    print("Input Channels:", in_channels)
    print("Features Channels:", features_channels)
    print("Output Channels:", out_channels)
    print("Edge Input Channels:", edge_in_channels)
    print("Hidden Channels:", hidden_channels)
    print("Transformer Layers:", transformer_layers)

    model_single_gnn = SingleModel_GNN(in_channels, out_channels, features_channels, edge_in_channels, hidden_channels, transformer_layers).to(device)
    
    # Define the optimizer and loss function for model_gnn
    optimizer_single_gnn = torch.optim.Adam(model_single_gnn.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler_single_gnn = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_single_gnn, mode=scheduler_mode, factor=scheduler_factor, patience=scheduler_patience, verbose=True, min_lr=1e-5)

    single_gnn_epoch_losses, single_gnn_epoch_maes = train_model(model_single_gnn, optimizer_single_gnn, scheduler_single_gnn, loss_fn, num_epochs, num_batches_train, batched_input_train, batched_output_train, device)
    save_model_data(model_single_gnn, optimizer_single_gnn, scheduler_single_gnn, single_gnn_epoch_losses, single_gnn_epoch_maes, 'model_single_gnn')
    del model_single_gnn
    torch.cuda.empty_cache()


# <div align="center">**ZE EPIC GRAPHS**</div>

In [None]:
# Print the shape of each variable
print("Shape of gnn_transformer_epoch_losses:", np.shape(gnn_transformer_epoch_losses))
print("Shape of gnn_transformer_epoch_maes:", np.shape(gnn_transformer_epoch_maes))
print("Shape of transformer_gnn_epoch_losses:", np.shape(transformer_gnn_epoch_losses))
print("Shape of transformer_gnn_epoch_maes:", np.shape(transformer_gnn_epoch_maes))
print("Shape of single_gnn_epoch_losses:", np.shape(single_gnn_epoch_losses))
print("Shape of single_gnn_epoch_maes:", np.shape(single_gnn_epoch_maes))
print("Shape of single_transformer_epoch_losses:", np.shape(single_transformer_epoch_losses))
print("Shape of single_transformer_epoch_maes:", np.shape(single_transformer_epoch_maes))
print("Shape of parallel_epoch_losses:", np.shape(parallel_epoch_losses))
print("Shape of parallel_epoch_maes:", np.shape(parallel_epoch_maes))

# Flatten all arrays
gnn_transformer_epoch_losses = np.array(gnn_transformer_epoch_losses).flatten()
gnn_transformer_epoch_maes = np.array(gnn_transformer_epoch_maes).flatten()
transformer_gnn_epoch_losses = np.array(transformer_gnn_epoch_losses).flatten()
transformer_gnn_epoch_maes = np.array(transformer_gnn_epoch_maes).flatten()
single_gnn_epoch_losses = np.array(single_gnn_epoch_losses).flatten()
single_gnn_epoch_maes = np.array(single_gnn_epoch_maes).flatten()
single_transformer_epoch_losses = np.array(single_transformer_epoch_losses).flatten()
single_transformer_epoch_maes = np.array(single_transformer_epoch_maes).flatten()
parallel_epoch_losses = np.array(parallel_epoch_losses).flatten()
parallel_epoch_maes = np.array(parallel_epoch_maes).flatten()


# Plotting the epoch losses
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(gnn_transformer_epoch_losses, label='GNN Transformer Loss')
plt.plot(transformer_gnn_epoch_losses, label='Transformer GNN Loss')
plt.plot(single_gnn_epoch_losses, label='GNN Loss')
plt.plot(single_transformer_epoch_losses, label='Transformer Loss')
plt.plot(parallel_epoch_losses, label='Parallel Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Epoch Losses')
plt.legend()

# Plotting the epoch MAE
plt.subplot(1, 2, 2)
plt.plot(gnn_transformer_epoch_maes, label='GNN Transformer MAE')
plt.plot(transformer_gnn_epoch_maes, label='Transformer GNN MAE')
plt.plot(single_gnn_epoch_maes, label='GNN MAE')
plt.plot(single_transformer_epoch_maes, label='Transformer MAE')
plt.plot(parallel_epoch_maes, label='Parallel MAE')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.title('Epoch MAE')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
def evaluate_model(model, loss_fn, num_batches_test, batched_input_test, batched_output_test, device):
    model.eval()
    # Initialize lists to store losses and MAEs
    test_losses = []
    test_maes = []

    test_losses_per_matrix = []
    test_maes_per_matrix = []

    # Disable gradient calculation for evaluation
    with torch.no_grad():
        for b in range(num_batches_test):
            # Get the batched node features and desired output
            node_features_batch = batched_input_test[b].to(device)
            desired_output_batch = batched_output_test[b].to(device)
            
            # Reshape node_features_batch to match the expected input shape for the model
            node_features_batch = node_features_batch.view(-1, node_features_batch.size(2))
            
            # Get the model output
            model_output_batch = model(node_features_batch, edge_index, edge_attr)
            
            # Reshape model_output_batch back to the original shape
            model_output_batch = model_output_batch.view(batched_input_test.size(1), -1, model_output_batch.size(1))
            
            # Compute loss and MAE
            loss = loss_fn(model_output_batch, desired_output_batch)
            mae = torch.mean(torch.abs(model_output_batch - desired_output_batch))
            
            # Store the loss and MAE
            test_losses.append(loss.item())
            test_maes.append(mae.item())

            print(f"Batch {b+1} - Model Output and Desired Output Matrices:")
            model_output_np = model_output_batch.cpu().numpy()
            desired_output_np = desired_output_batch.cpu().numpy()

            for i in range(model_output_np.shape[0]):
                print()
                print(f"Model Output Matrix {i+1}:")
                np.set_printoptions(formatter={'float_kind':lambda x: "%.4f" % x})
                print(model_output_np[i])
                print(f"Desired Output Matrix {i+1}:")
                print(desired_output_np[i])
                
                # Calculate loss and MAE for each matrix
                matrix_loss = loss_fn(torch.tensor(model_output_np[i], device=device), torch.tensor(desired_output_np[i], device=device))
                matrix_mae = torch.mean(torch.abs(torch.tensor(model_output_np[i], device=device) - torch.tensor(desired_output_np[i], device=device)))
                print(f"Loss for Matrix {i+1}: {matrix_loss.item()}")
                print(f"MAE for Matrix {i+1}: {matrix_mae.item()}")

                test_losses_per_matrix.append(matrix_loss.item())
                test_maes_per_matrix.append(matrix_mae.item())

    # Print the average loss and MAE for the test set
    average_test_loss = sum(test_losses) / len(test_losses)
    average_test_mae = sum(test_maes) / len(test_maes)
    print(f"Average Test Loss: {average_test_loss}, Average Test MAE: {average_test_mae}")

    return test_losses, test_maes, test_losses_per_matrix, test_maes_per_matrix, average_test_loss, average_test_mae

In [None]:

evaluate_model(model_gnn_transformer, loss_fn, num_batches_test, batched_input_test, batched_output_test, device)
evaluate_model(model_transformer_gnn, loss_fn, num_batches_test, batched_input_test, batched_output_test, device)
evaluate_model(model_parallel, loss_fn, num_batches_test, batched_input_test, batched_output_test, device)
evaluate_model(model_single_transformer, loss_fn, num_batches_test, batched_input_test, batched_output_test, device)
evaluate_model(model_single_gnn, loss_fn, num_batches_test, batched_input_test, batched_output_test, device)