In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append("../src/utils")
from utils import SimpleGraphVoltDatasetLoader, read_and_prepare_data
from torch_geometric_temporal.signal import temporal_signal_split
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import A3TGCN
from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


  nonzero_finite_vals = torch.masked_select(


In [3]:
trafo_id = "T1330"
epochs = 25
num_timesteps_in = 12
num_timesteps_out = 4
train_ratio = 0.7
test_ratio_vs_eval_ratio = 0.5
learning_rate = 0.01
device_str = 'cpu'

#----------------------
if device_str == 'cpu':
    torch.cuda.empty_cache()

#get dateime string of now
now = pd.Timestamp.now().strftime("%Y%m%d%H%M%S")

In [4]:
print("Loading data...")
loader = SimpleGraphVoltDatasetLoader(trafo_id)
loader_data = loader.get_dataset(num_timesteps_in=num_timesteps_in, num_timesteps_out=num_timesteps_out)

train_dataset, test_eval_dataset = temporal_signal_split(loader_data, train_ratio=train_ratio)
test_dataset, eval_dataset = temporal_signal_split(test_eval_dataset, train_ratio=test_ratio_vs_eval_ratio)

Loading data...
Voltage index: 5
Voltage index: 5


In [16]:
class TemporalGNN(torch.nn.Module):
    def __init__(self, node_features, periods, num_of_nodes, num_timesteps_in):
        super(TemporalGNN, self).__init__()
        # Attention Temporal Graph Convolutional Cell
        out_channels = 32
        self.tgnn = A3TGCN(in_channels=node_features, 
                           out_channels=out_channels, 
                           periods=periods)
        
        self.feature_mlp = torch.nn.Linear(
            num_of_nodes*num_of_features*num_timesteps_in, 
            num_of_nodes*num_of_features*num_timesteps_in, 
            bias=False)

        # Equals single-shot prediction
        self.linear = torch.nn.Linear(out_channels,periods)

    def forward(self, x, edge_index, edge_weights):
        """
        x = Node features for T time steps
        edge_index = Graph edge indices
        edge_weights = Graph edge weights
        """

        x = x.reshape(1, -1)
        x = self.feature_mlp(x)
        x = x.reshape(-1,num_of_features,num_timesteps_in)

        h = self.tgnn(x, edge_index, edge_weights)
        h = F.relu(h)
        h = self.linear(h)
        return h


In [14]:
def train_test(model,device, train_dataset, test_dataset, optimizer, loss_fn, epochs, now):
    """
    Definition of the training loop.
    """
    epoch_losses_train = []
    epoch_losses_test = []
    
    for epoch in range(epochs):
        model.train()
        epoch_loss_train = 0

        subset = 3
        step=0

        for snapshot in tqdm(train_dataset, desc="Training epoch {}".format(epoch)):
            snapshot.to(device)

            optimizer.zero_grad()
            out = model(snapshot.x, snapshot.edge_index,snapshot.edge_weight)
            loss = loss_fn()(out, snapshot.y)
            loss.backward()
            optimizer.step()
            epoch_loss_train += loss.detach().cpu().numpy()

            step+=1

            if step > subset:
                break


        epoch_losses_train.append(epoch_loss_train)

        print(f"Epoch {epoch + 1}, FeatureMLP Weights: {model.state_dict()['feature_mlp.weight']}")

        model.eval()
        epoch_loss_test = 0
        with torch.no_grad():

            subset = 100
            step=0

            for snapshot in tqdm(test_dataset, desc="Testing epoch {}".format(epoch)):
                snapshot.to(device)

                out = model(snapshot.x, snapshot.edge_index,snapshot.edge_weight)
                loss = loss_fn()(out, snapshot.y).cpu().numpy()
                epoch_loss_test += loss

                step+=1
                if step > subset:
                    break

            epoch_losses_test.append(epoch_loss_test)
            if min(epoch_losses_test) == epoch_loss_test:
                torch.save(model.state_dict(), f"../models/A3TGCN_{now}_{trafo_id}_epochs-{epochs}_in-{num_timesteps_in}_out-{num_timesteps_out}_train-ratio-{train_ratio}_lr-{learning_rate}.pt")
            print("Epoch: {}, Train Loss: {:.7f}, Test Loss: {:.7f}".format(epoch, epoch_loss_train, epoch_loss_test))
        
        
    return epoch_losses_train, epoch_losses_test
            

In [9]:
def eval(model, feature_mlp,eval_dataset, device, loss_fn, std):
    with torch.no_grad():
        model.eval()
        loss_all = 0
        loss_elementwise = 0
        
        steps = 0
        for snapshot in tqdm(eval_dataset, desc="Evaluating"):
            steps += 1
            snapshot.to(device)

            out = model(snapshot.x, snapshot.edge_index,snapshot.edge_weight)
            loss_all += loss_fn()(out, snapshot.y).cpu().numpy()
            loss_elementwise += loss_fn(reduction="none")(out, snapshot.y).cpu().numpy()

            if steps > 1000:
                break

        loss_all *= std/steps
        loss_elementwise *= std/steps
    return loss_all, loss_elementwise

In [17]:
print("Running training...")
num_of_nodes = train_dataset[0].x.shape[0]
num_of_features = train_dataset[0].x.shape[1]
num_of_timesteps = train_dataset[0].x.shape[2]

device = torch.device(device_str)
#feature_mlp = FeatureMLP(num_of_nodes,num_of_features,num_timesteps_in).to(device)
model = TemporalGNN(node_features=num_of_features, periods=train_dataset[0].y.shape[1],num_timesteps_in=num_timesteps_in,num_of_nodes=num_of_nodes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = torch.nn.L1Loss
losses = train_test(model, device, train_dataset, test_dataset, optimizer, loss_fn, epochs=epochs, now=now)

Running training...


Training epoch 0: 3it [01:09, 23.30s/it]


Epoch 1, FeatureMLP Weights: tensor([[ 0.0213,  0.0297,  0.0212,  ..., -0.0258, -0.0225, -0.0217],
        [ 0.0268,  0.0260,  0.0257,  ..., -0.0305, -0.0292, -0.0207],
        [ 0.0277,  0.0277,  0.0208,  ..., -0.0227, -0.0210, -0.0248],
        ...,
        [ 0.0029, -0.0032, -0.0005,  ..., -0.0012,  0.0016,  0.0023],
        [ 0.0028,  0.0040,  0.0017,  ..., -0.0029,  0.0012,  0.0007],
        [ 0.0040, -0.0035,  0.0007,  ..., -0.0008, -0.0029, -0.0036]])


Testing epoch 0: 100it [00:30,  3.29it/s]


Epoch: 0, Train Loss: 0.9691823, Test Loss: 70.6295935


Training epoch 1: 3it [00:37, 12.61s/it]


Epoch 2, FeatureMLP Weights: tensor([[ 0.0326,  0.0410,  0.0326,  ..., -0.0372, -0.0339, -0.0330],
        [ 0.0383,  0.0375,  0.0372,  ..., -0.0421, -0.0407, -0.0321],
        [ 0.0390,  0.0391,  0.0321,  ..., -0.0342, -0.0325, -0.0360],
        ...,
        [ 0.0029, -0.0032, -0.0005,  ..., -0.0012,  0.0016,  0.0023],
        [ 0.0028,  0.0040,  0.0017,  ..., -0.0029,  0.0012,  0.0007],
        [ 0.0040, -0.0035,  0.0007,  ..., -0.0008, -0.0029, -0.0036]])


Testing epoch 1: 100it [00:28,  3.55it/s]


Epoch: 1, Train Loss: 0.7610249, Test Loss: 63.0642041


Training epoch 2: 3it [00:39, 13.05s/it]


Epoch 3, FeatureMLP Weights: tensor([[ 0.0396,  0.0480,  0.0395,  ..., -0.0442, -0.0409, -0.0398],
        [ 0.0453,  0.0445,  0.0442,  ..., -0.0491, -0.0477, -0.0391],
        [ 0.0459,  0.0460,  0.0390,  ..., -0.0412, -0.0394, -0.0429],
        ...,
        [ 0.0029, -0.0032, -0.0005,  ..., -0.0012,  0.0016,  0.0023],
        [ 0.0028,  0.0040,  0.0017,  ..., -0.0029,  0.0012,  0.0007],
        [ 0.0040, -0.0035,  0.0007,  ..., -0.0008, -0.0029, -0.0036]])


Testing epoch 2: 100it [00:29,  3.40it/s]


Epoch: 2, Train Loss: 0.6422945, Test Loss: 63.1826697


Training epoch 3: 3it [00:45, 15.26s/it]


Epoch 4, FeatureMLP Weights: tensor([[ 0.0441,  0.0525,  0.0440,  ..., -0.0488, -0.0454, -0.0443],
        [ 0.0500,  0.0492,  0.0489,  ..., -0.0538, -0.0525, -0.0438],
        [ 0.0504,  0.0504,  0.0435,  ..., -0.0458, -0.0439, -0.0473],
        ...,
        [ 0.0029, -0.0032, -0.0005,  ..., -0.0012,  0.0016,  0.0023],
        [ 0.0028,  0.0040,  0.0017,  ..., -0.0029,  0.0012,  0.0007],
        [ 0.0040, -0.0035,  0.0007,  ..., -0.0008, -0.0029, -0.0036]])


Testing epoch 3: 100it [00:31,  3.20it/s]


Epoch: 3, Train Loss: 0.6437714, Test Loss: 62.7352140


Training epoch 4: 3it [00:44, 14.73s/it]


Epoch 5, FeatureMLP Weights: tensor([[ 0.0471,  0.0555,  0.0470,  ..., -0.0519, -0.0485, -0.0473],
        [ 0.0533,  0.0524,  0.0521,  ..., -0.0570, -0.0558, -0.0472],
        [ 0.0534,  0.0535,  0.0465,  ..., -0.0488, -0.0470, -0.0503],
        ...,
        [ 0.0029, -0.0032, -0.0005,  ..., -0.0012,  0.0016,  0.0023],
        [ 0.0028,  0.0040,  0.0017,  ..., -0.0029,  0.0012,  0.0007],
        [ 0.0040, -0.0035,  0.0007,  ..., -0.0008, -0.0029, -0.0036]])


Testing epoch 4: 100it [00:30,  3.31it/s]


Epoch: 4, Train Loss: 0.6290912, Test Loss: 63.7379737


Training epoch 5: 1it [00:13, 13.82s/it]


KeyboardInterrupt: 

In [None]:
print(losses)

std = loader.mean_and_std["measurements"][1]["voltage"]

#read saved model
model.load_state_dict(torch.load(f"../models/A3TGCN_{now}_{trafo_id}_epochs-{epochs}_in-{num_timesteps_in}_out-{num_timesteps_out}_train-ratio-{train_ratio}_lr-{learning_rate}.pt"))

loss_all, loss_elementwise = eval(model, eval_dataset, device, loss_fn, std)

print("Loss all: {:.7f}".format(loss_all))
print("Loss elementwise: {}".format(loss_elementwise))

NameError: name 'losses' is not defined