In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
!pip install torch_geometric



In [None]:
import os
import math
import datetime
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader, random_split

from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse

In [None]:
directory = '/content/drive/MyDrive/Thesis'
data_dir = directory + "/Data"
models_dir = directory + "/models"

In [None]:
loaded = np.load(f'{data_dir}/demand_graphs.pkl.npz')
demand_graphs = [loaded[f'arr_{i}'] for i in range(8757, len(loaded)-1)]
adj_matrices = torch.tensor(demand_graphs)
del demand_graphs

  adj_matrices = torch.tensor(demand_graphs)


In [None]:
adj_matrices.shape

torch.Size([10942, 183, 183])

In [None]:
num_stations = adj_matrices.shape[1]

In [None]:
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse

# Convert adjacency matrices to PyTorch Geometric format for each hour
data_list = []

for i in range(adj_matrices.shape[0]):  # Iterate over each hour
    adj_matrix = adj_matrices[i]

    # Convert dense adjacency matrix to edge_index (sparse representation)
    edge_index, edge_attr = dense_to_sparse(adj_matrix)
    # Use an identity matrix for node features (can be replaced with other features)
    x = torch.eye(num_stations)
    # Create a Data object
    data = Data(x=adj_matrix.float(), edge_index=edge_index, edge_attr=edge_attr)
    data_list.append(data)

print(data_list[0])

for data in data_list:
    data.edge_attr = data.edge_attr.float()

Data(x=[183, 183], edge_index=[2, 77], edge_attr=[77])


In [None]:
del adj_matrices

In [None]:
from torch_geometric.nn import GAT
import torch.nn.functional as F

class GATForDemandPrediction(torch.nn.Module):
    def __init__(self, in_channels, out_channels=50, h1=100, dropout_prob=0.2):
        super(GATForDemandPrediction, self).__init__()
        self.conv1 = GAT(in_channels, h1, 5)
        self.conv2 = GAT(h1, out_channels, 5)
        #self.fc_embed = torch.nn.Linear(out_channels, in_channels)
        self.fc = torch.nn.Linear(out_channels, 1)  # Predict demand for each edge (regression)
        self.dropout_prob = dropout_prob

    def forward(self, data):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        # First and second GCN layers
        x = self.conv1(x, edge_index)
        x = F.leaky_relu(x)
        x = F.dropout(x, p=self.dropout_prob, training=self.training)

        x = self.conv2(x, edge_index)

        # Calculate edge embeddings: take the embeddings of node pairs and aggregate them
        edge_embeddings = (x[edge_index[0]] + x[edge_index[1]]) / 2


        # Predict demand using a fully connected layer
        predicted_demand = self.fc(edge_embeddings).squeeze()  # Shape: [num_edges]

        return predicted_demand  # Return predicted demand for each edge

    # New method to extract node embeddings
    def get_node_embeddings(self, data):
        x, edge_index = data.x, data.edge_index

        # Apply GCN layers to get node embeddings
        x = self.conv1(x, edge_index)
        x = F.leaky_relu(x)
        x = self.conv2(x, edge_index)

        #edge_embeddings = (x[edge_index[0]] + x[edge_index[1]]) / 2

        #x = self.fc(edge_embeddings).squeeze()

        return x

In [None]:
model = GATForDemandPrediction(
    in_channels=num_stations,
    h1=100,
    out_channels=50
  )
model.load_state_dict(torch.load(f'{models_dir}/gnn_gat_2024-11-15 18:41:35.830762.pth'))

  model.load_state_dict(torch.load(f'{models_dir}/gnn_gat_2024-11-15 18:41:35.830762.pth'))


<All keys matched successfully>

In [None]:
output_list = []
for i in data_list:
  output_list.append(model.get_node_embeddings(i))

In [None]:
del data_list

In [None]:
torch.save(output_list, f'{data_dir}/gnn_gat_2024-11-15 18:41:35.830762_output_embedding.pt')

In [None]:
torch.load(f'{data_dir}/gnn_gat_2024-11-15 18:41:35.830762_output_embedding.pt')

In [None]:
timestamps = np.load(f'{data_dir}/demand_graph_timestamps.pkl.npz')
stamps = [timestamps[f'arr_{i}'][0] for i in range(8758, len(timestamps))]

In [None]:
len(stamps)

10942

In [None]:
stamps[0]

array([2.023e+03, 1.000e+00, 1.000e+00, 0.000e+00, 1.000e+00])

In [None]:
timestamps = np.load(f'{data_dir}/demand_graph_timestamps.pkl.npz')
stamps = [timestamps[f'arr_{i}'][0] for i in range(0, 8758)]

In [None]:
stamps[-1]

array([2022.,   12.,   31.,   23.,    0.])

In [None]:
model.get_node_embeddings(data_list[i])

tensor([[ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02],
        [ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02],
        [ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02],
        ...,
        [ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02],
        [ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02],
        [ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02]], grad_fn=<AddBackward0>)