In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import math
import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torch_geometric.nn import GATv2Conv
from torch.nn import BatchNorm1d
from torch.utils.data import Dataset, DataLoader
from entsoe import load_data

In [4]:
data = load_data()

Loading cached data


In [5]:
for d in data:
    print(d)
    break

Data(x=[10, 24], edge_index=[2, 32], edge_attr=[32, 1], y=[32, 1])


In [6]:
# https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.GATv2Conv.html
class GNNEncoder(nn.Module):
    def __init__(
        self, hidden_channels, num_heads_GAT, dropout_p_GAT, edge_dim_GAT, momentum_GAT
    ):
        super().__init__()
        self.gat = GATv2Conv(
            (-1, -1),
            hidden_channels,
            add_self_loops=False,
            heads=num_heads_GAT,
            edge_dim=edge_dim_GAT,
        )
        self.norm = BatchNorm1d(
            hidden_channels,
            momentum=momentum_GAT,
            affine=False,
            track_running_stats=False,
        )
        self.dropout = nn.Dropout(dropout_p_GAT)

    def forward(self, x, edge_indices, edge_attrs):
        x = self.dropout(x)
        x = self.norm(x)
        nodes_embedds = self.gat(x, edge_indices, edge_attrs)
        nodes_embedds = F.leaky_relu(nodes_embedds, negative_slope=0.1)
        return nodes_embedds

In [7]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)
        )
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer("pe", pe)

    def forward(self, x: Tensor) -> Tensor:
        x = x + self.pe[: x.size(0)]
        return self.dropout(x)

In [8]:
class Transformer(nn.Module):
    def __init__(
        self,
        dim_model,
        num_heads_TR,
        num_encoder_layers_TR,
        num_decoder_layers_TR,
        dropout_p_TR,
    ):
        super().__init__()
        self.pos_encoder = PositionalEncoding(dim_model)
        self.transformer = nn.Transformer(
            d_model=dim_model,
            nhead=num_heads_TR,
            num_decoder_layers=num_encoder_layers_TR,
            num_encoder_layers=num_decoder_layers_TR,
            dropout=dropout_p_TR,
        )

    def forward(self, src, trg):
        src = self.pos_encoder(src)
        trg = self.pos_encoder(trg)
        temporal_node_embeddings = self.transformer(src, trg)
        return temporal_node_embeddings

In [9]:
class EdgeDecoder(nn.Module):
    def __init__(self, hidden_channels, num_heads_GAT, num_edges, num_nodes):
        super().__init__()
        self.lin1 = nn.Linear(
            num_nodes * hidden_channels * num_heads_GAT, hidden_channels
        )
        self.lin2 = nn.Linear(hidden_channels, num_edges)

    def forward(self, x):
        # Flatten the tensor
        x = torch.flatten(x)
        x = self.lin1(x)
        x = F.leaky_relu(x, negative_slope=0.1)
        x = self.lin2(x)
        return x.view(-1)

In [10]:
class Model(nn.Module):
    def __init__(
        self,
        window_size,
        hidden_channels,
        num_heads_GAT,
        dropout_p_GAT,
        edge_dim_GAT,
        momentum_GAT,
        dim_model,
        num_heads_TR,
        num_encoder_layers_TR,
        num_decoder_layers_TR,
        dropout_p_TR,
        n_edges,
        n_nodes,
    ):
        super().__init__()
        self.encoder = GNNEncoder(
            hidden_channels, num_heads_GAT, dropout_p_GAT, edge_dim_GAT, momentum_GAT
        )  # node embedding with GAT
        self.transformer = Transformer(
            dim_model,
            num_heads_TR,
            num_encoder_layers_TR,
            num_decoder_layers_TR,
            dropout_p_TR,
        )
        self.decoder = EdgeDecoder(hidden_channels, num_heads_GAT, n_edges, n_nodes)
        self.window_size = window_size

    def forward(self, x, edge_indices, edge_attrs):
        src_embedds = []
        for i in range(self.window_size):
            src_embedds.append(self.encoder(x[i], edge_indices[i], edge_attrs[i]))
        src_embedds = torch.stack(src_embedds)
        trg_embedds = src_embedds[-1].unsqueeze(0)
        temporal_node_embedds = self.transformer(src_embedds, trg_embedds)
        temporal_node_embedds = temporal_node_embedds.squeeze(0)
        edge_predictions = self.decoder(temporal_node_embedds)
        return edge_predictions

In [43]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def train(model, data, window_size, num_epochs, lr):
    model = model.to(device)
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.L1Loss()
    for epoch in range(num_epochs):
        epoch_loss = 0
        for i in tqdm(range(0, len(data) - window_size)):
            optimizer.zero_grad()
            windowed_data = data[i : i + window_size]
            x = []
            edge_indices = []
            edge_attrs = []
            for d in windowed_data:
                x.append(d["x"])
                edge_indices.append(d["edge_index"])
                edge_attrs.append(d["edge_attr"])
            x = torch.stack(x).to(device)
            edge_indices = torch.stack(edge_indices).to(device)
            edge_attrs = torch.stack(edge_attrs).to(device)
            y_hat = model(x, edge_indices, edge_attrs)

            y = data[i + window_size]["y"].to(device)
            # edge_mask = 6
            # print(y_hat[edge_mask].item(), y[edge_mask].item())
            loss = criterion(y_hat, y)
            # loss = criterion(y_hat, y[6])
            # loss = criterion(y_hat[edge_mask], y[edge_mask])
            # print(y_hat.item(), y[6].item())
            # print(y_hat)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        print(f"Epoch {epoch} Loss {epoch_loss / len(data)}")

    return model


model = Model(
    window_size=4,
    hidden_channels=32,
    num_heads_GAT=3,
    dropout_p_GAT=0.1,
    momentum_GAT=0.1,
    dim_model=32 * 3,  # hidden_channels * num_heads_GAT
    num_heads_TR=2,
    num_encoder_layers_TR=6,
    num_decoder_layers_TR=6,
    dropout_p_TR=0.1,
    n_nodes=10,
    n_edges=32,
    edge_dim_GAT=1,  # edge attributes
)
trained_model = train(model, data[:73], window_size=4, num_epochs=100, lr=0.01)

100%|██████████| 69/69 [00:01<00:00, 38.02it/s]


Epoch 0 Loss 0.5361646140274936


100%|██████████| 69/69 [00:01<00:00, 37.96it/s]


Epoch 1 Loss 0.4352165901497619


100%|██████████| 69/69 [00:01<00:00, 38.32it/s]


Epoch 2 Loss 0.4332777849615437


100%|██████████| 69/69 [00:01<00:00, 37.78it/s]


Epoch 3 Loss 0.422519764671587


100%|██████████| 69/69 [00:01<00:00, 37.06it/s]


Epoch 4 Loss 0.4217639977801336


100%|██████████| 69/69 [00:01<00:00, 36.44it/s]


Epoch 5 Loss 0.42105655882456533


100%|██████████| 69/69 [00:01<00:00, 37.30it/s]


Epoch 6 Loss 0.42225495470713265


100%|██████████| 69/69 [00:01<00:00, 37.83it/s]


Epoch 7 Loss 0.41838559100072675


100%|██████████| 69/69 [00:01<00:00, 37.01it/s]


Epoch 8 Loss 0.418458053102232


100%|██████████| 69/69 [00:01<00:00, 37.76it/s]


Epoch 9 Loss 0.4171317129918974


100%|██████████| 69/69 [00:01<00:00, 38.03it/s]


Epoch 10 Loss 0.4162512082759648


100%|██████████| 69/69 [00:01<00:00, 37.97it/s]


Epoch 11 Loss 0.41565895080566406


100%|██████████| 69/69 [00:01<00:00, 36.42it/s]


Epoch 12 Loss 0.4159796025655041


100%|██████████| 69/69 [00:01<00:00, 36.48it/s]


Epoch 13 Loss 0.4155739035508404


100%|██████████| 69/69 [00:01<00:00, 37.35it/s]


Epoch 14 Loss 0.4128619016033329


100%|██████████| 69/69 [00:01<00:00, 37.32it/s]


Epoch 15 Loss 0.412623160505948


100%|██████████| 69/69 [00:01<00:00, 37.61it/s]


Epoch 16 Loss 0.4121340178463557


100%|██████████| 69/69 [00:01<00:00, 38.22it/s]


Epoch 17 Loss 0.4113813577449485


100%|██████████| 69/69 [00:01<00:00, 37.38it/s]


Epoch 18 Loss 0.41171949856901824


 94%|█████████▍| 65/69 [00:01<00:00, 37.15it/s]


KeyboardInterrupt: 

In [13]:
from torchviz import make_dot

In [16]:
print(data[0])

Data(x=[10, 24], edge_index=[2, 32], edge_attr=[32, 1], y=[32, 1])


In [25]:
xs = []
edge_indices = []
edge_attrs = []
for d in data[0:4]:
    xs.append(d["x"])
    edge_indices.append(d["edge_index"])
    edge_attrs.append(d["edge_attr"])
xs = torch.stack(xs).to(device)
edge_indices = torch.stack(edge_indices).to(device)
edge_attrs = torch.stack(edge_attrs).to(device)

example_output = trained_model(xs, edge_indices, edge_attrs)
dot = make_dot(example_output, params=dict(trained_model.named_parameters()))
dot.format = "png"
dot.render("example_output")

dot: graph is too large for cairo-renderer bitmaps. Scaling by 0.672627 to fit


'example_output.png'

In [29]:
print(xs.shape)

torch.Size([4, 10, 24])


In [26]:
print(trained_model)

Model(
  (encoder): GNNEncoder(
    (gat): GATv2Conv((-1, -1), 128, heads=4)
    (norm): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (pos_encoder): PositionalEncoding(
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (encoder): TransformerEncoder(
        (layers): ModuleList(
          (0-7): 8 x TransformerEncoderLayer(
            (self_attn): MultiheadAttention(
              (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
            )
            (linear1): Linear(in_features=512, out_features=2048, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
            (linear2): Linear(in_features=2048, out_features=512, bias=True)
            (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
            (norm2): LayerNorm((512,), eps=1e-05, elementwise_affin