In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append("../src/utils")
from utils import SimpleGraphVoltDatasetLoader, read_and_prepare_data

In [2]:
from torch_geometric_temporal.signal import StaticGraphTemporalSignal

In [3]:
trafo_id = "T1330"
data = read_and_prepare_data(trafo_id)

In [4]:
df_edges = data["edges_static_data"]
df_measurments = data["measurements"]

In [None]:
df_measurments.isna().sum()

date_time                   0
node_id                     0
voltage                     0
temperature_2m              0
snow_depth                  0
cloud_cover                 0
is_day                      0
shortwave_radiation         0
direct_radiation            0
diffuse_radiation           0
direct_normal_irradiance    0
active_power                0
reactive_power              0
year                        0
month                       0
day                         0
hour                        0
minute                      0
dtype: int64

In [None]:
df_measurments[df_measurments["node_id"]==0].isna().sum()

date_time                   0
node_id                     0
voltage                     8
temperature_2m              0
snow_depth                  0
cloud_cover                 0
is_day                      0
shortwave_radiation         0
direct_radiation            0
diffuse_radiation           0
direct_normal_irradiance    0
active_power                0
reactive_power              0
year                        0
month                       0
day                         0
hour                        0
minute                      0
dtype: int64

In [38]:
def get_array_of_timestemps(df_measurments):
    """
    Returns list of dfs ordered by date_time.
    """
    df_grouped = df_measurments.groupby("date_time")
    dfs = [(date, df) for date, df in df_grouped]
    dfs = sorted(dfs, key=lambda x: x[0])
    dfs = [df.sort_values(by="node_id").drop(columns=["date_time", "node_id"]) for _, df in dfs]
    #get index of voltage column

    #column_names = dfs[0].columns

    dfs = np.stack([df.values for df in dfs])#, axis=-1)
    return dfs#, column_names

In [39]:
neki = get_array_of_timestemps(df_measurments)

In [40]:
neki.shape

(113, 16, 70080)

In [41]:
class SimpleGraphVoltDatasetLoader(object):
    """
    Check this https://pytorch-geometric-temporal.readthedocs.io/en/latest/_modules/torch_geometric_temporal/dataset/wikimath.html#WikiMathsDatasetLoader
    for an example of how to implement a dataset loader

    And here are the docs https://pytorch-geometric-temporal.readthedocs.io/en/latest/modules/signal.html
    """
    def __init__(self, trafo_id):
        self._trafo_id = trafo_id
        self._read_data()

    def _read_data(self):
        dataset = read_and_prepare_data(self._trafo_id)
        self._df_edges = dataset["edges_static_data"]
        self._df_measurments = dataset["measurements"]
        self._periods = len(self._df_measurments["date_time"].unique())
        self._node_counts = len(self._df_measurments["node_id"].unique())

    def _get_edges_and_edge_weights(self):
        self._edges = self._df_edges[["from_node_id", "to_node_id"]].to_numpy().T
        self._edge_weights = self._df_edges.drop(["from_node_id", "to_node_id"], axis=1).to_numpy()

    def _get_targets_and_features(self):
        #voltage is the 0th column
        #columns names: ['voltage', 'temperature_2m', 'snow_depth', 'cloud_cover', 'is_day',
        #'shortwave_radiation', 'direct_radiation', 'diffuse_radiation',
        #'direct_normal_irradiance', 'active_power', 'reactive_power', 'year',
        #'month', 'day', 'hour', 'minute']
        voltage_index = 0

        dfs = get_array_of_timestemps(self._df_measurments)
        targets = []
        features = []
        for i in range(self._periods-self.num_timesteps_in-self.num_timesteps_out+1):
            features.append(dfs[i:i+self.num_timesteps_in, :, :])
            # features.append(dfs[:,:,i:i+self.num_timesteps_in])
            targets.append(dfs[i+self.num_timesteps_in:i+self.num_timesteps_in+self.num_timesteps_out, :, voltage_index:voltage_index+1])
            # targets.append(dfs[:, voltage_index, i+self.num_timesteps_in:i+self.num_timesteps_in+self.num_timesteps_out])
        self.features = np.stack(features)
        self.targets = np.stack(targets)
        print(self.features.shape)
        print(self.targets.shape)


    def get_dataset(self, num_timesteps_in: int = 12, num_timesteps_out: int = 4) -> StaticGraphTemporalSignal:
        self.num_timesteps_in = num_timesteps_in
        self.num_timesteps_out = num_timesteps_out
        self._get_edges_and_edge_weights()
        self._get_targets_and_features()
        dataset = StaticGraphTemporalSignal(
            self._edges, 
            self._edge_weights, 
            self.features, 
            self.targets
            )
        return dataset
        

In [43]:
trafo_id = "T1330"
loader = SimpleGraphVoltDatasetLoader(trafo_id)

In [44]:
loader_data = loader.get_dataset(num_timesteps_out=4)

(70065, 113, 16, 12)
(70065, 113, 4)


In [45]:
loader.features.shape

(70065, 113, 16, 12)

In [46]:
from torch_geometric_temporal.signal import temporal_signal_split

In [47]:
train_dataset, test_dataset = temporal_signal_split(loader_data, train_ratio=0.2)

In [51]:
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import A3TGCN

class TemporalGNN(torch.nn.Module):
    def __init__(self, node_features, periods):
        super(TemporalGNN, self).__init__()
        # Attention Temporal Graph Convolutional Cell
        self.tgnn = A3TGCN(in_channels=node_features, 
                           out_channels=32, 
                           periods=periods)
        # Equals single-shot prediction
        self.linear = torch.nn.Linear(32, periods)

    def forward(self, x, edge_index):
        """
        x = Node features for T time steps
        edge_index = Graph edge indices
        """
        h = self.tgnn(x, edge_index)
        h = F.relu(h)
        h = self.linear(h)
        return h

TemporalGNN(node_features=16, periods=4)

TemporalGNN(
  (tgnn): A3TGCN(
    (_base_tgcn): TGCN(
      (conv_z): GCNConv(16, 32)
      (linear_z): Linear(in_features=64, out_features=32, bias=True)
      (conv_r): GCNConv(16, 32)
      (linear_r): Linear(in_features=64, out_features=32, bias=True)
      (conv_h): GCNConv(16, 32)
      (linear_h): Linear(in_features=64, out_features=32, bias=True)
    )
  )
  (linear): Linear(in_features=32, out_features=4, bias=True)
)

In [52]:
# GPU support
device = torch.device('cpu') # cuda
subset = 2000

# Create model and optimizers
model = TemporalGNN(node_features=2, periods=12).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
model.train()

print("Running training...")
for epoch in range(10): 
    loss = 0
    step = 0
    for snapshot in train_dataset:
        snapshot = snapshot.to(device)
        # Get model predictions
        y_hat = model(snapshot.x, snapshot.edge_index)
        # Mean squared error
        loss = loss + torch.mean((y_hat-snapshot.y)**2) 
        step += 1
        if step > subset:
          break

    loss = loss / (step + 1)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    print("Epoch {} train MSE: {:.4f}".format(epoch, loss.item()))

Running training...


RuntimeError: mat1 and mat2 shapes cannot be multiplied (113x16 and 2x32)

In [25]:
from torch_geometric_temporal.dataset.metr_la import METRLADatasetLoader

In [28]:
#get working directory
import os
os.getcwd()

'c:\\Users\\Karel\\Documents\\eg\\GraphVolt\\notebooks'

In [29]:
loader = METRLADatasetLoader()

In [30]:
dataset = loader.get_dataset()

In [37]:
print(len(loader.features))
print(loader.features[0].shape)
print(len(loader.targets))
print(loader.targets[0].shape)
print(loader.edges.shape)
print(loader.edge_weights.shape)

34249
(207, 2, 12)
34249
(207, 12)
(2, 1722)
(1722,)
