In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m46.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [3]:
!unzip /content/drive/MyDrive/Thesis/Data/compressed_data.zip
!unzip /content/drive/MyDrive/Thesis/Data/demand_graphs.pkl-005.zip


Archive:  /content/drive/MyDrive/Thesis/Data/compressed_data.zip
  inflating: demand_graphs.pkl.npz   
  inflating: final_model_input_partial_scale_4.csv  
Archive:  /content/drive/MyDrive/Thesis/Data/demand_graphs.pkl-005.zip
  inflating: demand_graphs.pkl-005.npz  


In [10]:
import os
import math
import datetime
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader, random_split

from torch_geometric.nn import GAT, global_mean_pool
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse

In [5]:
directory = '/content/drive/MyDrive/Thesis'
data_dir = directory + "/Data"
models_dir = directory + "/models"

In [6]:
loaded = np.load(f'demand_graphs.pkl-005.npz')
demand_graphs = [loaded[f'arr_{i}'] for i in range(8757, len(loaded)-1)]
adj_matrices = torch.tensor(demand_graphs)
del demand_graphs

  adj_matrices = torch.tensor(demand_graphs)


In [18]:
weather_len = len(['dwpt', 'rhum', 'prcp', 'wdir', 'wspd', 'pres'])

In [16]:
len(loaded)

19700

In [17]:
timestamps = np.load(f'{data_dir}/demand_graph_timestamps.pkl.npz')
stamps = [timestamps[f'arr_{i}'] for i in range(8757, len(loaded)-1)] #17516
datetimes = [datetime.datetime(int(arr[0][0]), int(arr[0][1]), int(arr[0][2]), int(arr[0][3])) for arr in stamps]

In [20]:
len(datetimes)

10942

In [7]:
adj_matrices.shape

torch.Size([10942, 183, 183])

In [8]:
num_stations = adj_matrices.shape[1]

In [9]:
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse

# Convert adjacency matrices to PyTorch Geometric format for each hour
data_list = []

for i in range(adj_matrices.shape[0]):  # Iterate over each hour
    adj_matrix = adj_matrices[i]

    # Convert dense adjacency matrix to edge_index (sparse representation)
    edge_index, edge_attr = dense_to_sparse(adj_matrix)
    # Use an identity matrix for node features (can be replaced with other features)
    x = torch.eye(num_stations)
    # Create a Data object
    data = Data(x=adj_matrix.float(), edge_index=edge_index, edge_attr=edge_attr)
    data_list.append(data)

print(data_list[0])

for data in data_list:
    data.edge_attr = data.edge_attr.float()

Data(x=[183, 183], edge_index=[2, 77], edge_attr=[77])


In [11]:
del adj_matrices

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [12]:

import torch.nn.functional as F

class GNNLayer(torch.nn.Module):
    def __init__(self, in_channels, h1, out_channels, datetime_feats_len, weather_feats_len, num_layers, dropout_prob):
        super(GNNLayer, self).__init__()
        self.convs = nn.ModuleList()
        self.convs.append(GAT(in_channels, h1, 3))

        for _ in range(num_layers - 2):
            self.convs.append(GAT(h1, h1, 3))
        self.leaky_relu = nn.LeakyReLU()
        self.dropout_prob = dropout_prob
        self.datetime_feats_len = datetime_feats_len
        self.weather_feats_len = weather_feats_len

        self.convs.append(GAT(h1, out_channels, 3))

    def forward(self, x, edge_index, batch, datetime_features, weather_features):
      for conv in self.convs[:-1]:
        x = self.leaky_relu(conv(x, edge_index))
        x = F.dropout(x, p=self.dropout_prob, training=self.training)
      x = self.convs[-1](x, edge_index)

      x = global_mean_pool(x, batch)
      datetime_feats = datetime_features[0:self.datetime_feats_len]
      datetime_feats = datetime_feats.unsqueeze(0).expand(x.shape[0], -1)

      weather_feats = weather_features[0:self.weather_feats_len]
      weather_feats = weather_feats.unsqueeze(0).expand(x.shape[0], -1)

      combined_features = torch.cat([x, datetime_feats], dim=1)
      combined_features = torch.cat([combined_features, weather_feats], dim=1)
      return combined_features

In [15]:


class GNNForDemandPrediction(torch.nn.Module):
    def __init__(
          self, in_channels, out_channels=50, datetime_feats_len=5, weather_feats_len=6, lag=3, h1=100, num_layers=2, num_layers_lstm=1, fc_hidden_dim = 256, LSTM_hidden_dim=128, dropout_prob=0.2
        ):
        super(GNNForDemandPrediction, self).__init__()

        self.gnn = GNNLayer(in_channels, h1, out_channels, datetime_feats_len, weather_feats_len, num_layers, dropout_prob)

        self.lstm = nn.LSTM(
            (out_channels+datetime_feats_len+weather_feats_len)*lag,
            LSTM_hidden_dim,
            num_layers_lstm,
            batch_first=True,
            bidirectional=True,
            dropout=dropout_prob
        )
        self.fc1 = torch.nn.Linear(LSTM_hidden_dim, fc_hidden_dim)
        self.fc2 = torch.nn.Linear(fc_hidden_dim, in_channels)
        self.dropout_prob = dropout_prob
        self.leaky_relu = nn.LeakyReLU()

    #x, edge_index, batch, datetime_features, weather_features
    def forward(self, batch):
        current_graphs, lag_graphs = batch

        current_graphs.to(device)
        current_x = torch.cat([
            self.gnn(g.x, g.edge_index, g.batch, g.datetime_features, g.weather_features) for g in [current_graphs]
        ], dim=0)
        lag_x_list = []
        for i in lag_graphs:
          #print(i)
          i.to(device)
          x, edge_index, batch_indices, datetime_features, weather_features =\
              i.x, i.edge_index, i.batch, i.datetime_features, i.weather_features
          lag_x = torch.cat([self.gnn(x, edge_index, batch_indices, datetime_features, weather_features)], dim=1)
          #gnn_embedding = gnn_embedding.unsqueeze(1)
          lag_x_list.append(lag_x)
        lag_x = torch.stack(lag_x_list)
        #gnn_embedding = lag_x.mean(dim=0)
        gnn_embedding = current_x
        for i in lag_x:
          gnn_embedding = torch.cat([gnn_embedding, i], dim=1)
        gnn_embedding = gnn_embedding.unsqueeze(1)
        lstm_out, (hidden, _) = self.lstm(gnn_embedding)

        final_hidden_state = lstm_out[:, -1, :]

        x = self.leaky_relu(self.fc1(hidden[-1]))

        x = F.dropout(x, p=self.dropout_prob, training=self.training)

        predicted_demand = self.fc2(x)

        return predicted_demand



In [22]:
model = GNNForDemandPrediction(
    in_channels=num_stations,
    h1=100,
    datetime_feats_len = stamps[0][0].shape[0],
    weather_feats_len = weather_len,
    num_layers_lstm=20,
    num_layers=7
  ).to(device)

model.load_state_dict(torch.load(f'{models_dir}/gnn_gat_weather_datetime_2023_2025-01-01 20_52_39.974310.pth'))

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [None]:
output_list = []
for i in data_list:
  output_list.append(model.get_node_embeddings(i))

In [None]:
del data_list

In [None]:
torch.save(output_list, f'{data_dir}/gnn_gat_2024-11-15 18:41:35.830762_output_embedding.pt')

In [None]:
torch.load(f'{data_dir}/gnn_gat_2024-11-15 18:41:35.830762_output_embedding.pt')

In [None]:
timestamps = np.load(f'{data_dir}/demand_graph_timestamps.pkl.npz')
stamps = [timestamps[f'arr_{i}'][0] for i in range(8758, len(timestamps))]

In [None]:
len(stamps)

10942

In [None]:
stamps[0]

array([2.023e+03, 1.000e+00, 1.000e+00, 0.000e+00, 1.000e+00])

In [None]:
timestamps = np.load(f'{data_dir}/demand_graph_timestamps.pkl.npz')
stamps = [timestamps[f'arr_{i}'][0] for i in range(0, 8758)]

In [None]:
stamps[-1]

array([2022.,   12.,   31.,   23.,    0.])

In [None]:
model.get_node_embeddings(data_list[i])

tensor([[ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02],
        [ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02],
        [ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02],
        ...,
        [ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02],
        [ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02],
        [ 2.4265e-03, -2.5289e-02,  2.0832e-02,  ...,  7.7369e-02,
          7.1916e-06,  1.3067e-02]], grad_fn=<AddBackward0>)