In [1]:
import torch
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl
# import scipy
# import umap
import random
# import itertools
import tsl
import numpy as np
# import pandas as pd
import networkx as nx
import torch_geometric
import lightning as L
import matplotlib.pyplot as plt

from tsl.datasets import PvUS
from tsl.data.datamodule import SpatioTemporalDataModule, TemporalSplitter
from tsl.data.preprocessing import StandardScaler
from tsl.metrics.torch import MaskedMAE
from tsl.engines import Predictor
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from einops import rearrange

# from dataset.NCI1_dataset import NCI1
# from tqdm import trange
# from sklearn.model_selection import train_test_split

from models.DynGraphESN import DynGESNModel

from DMD.dmd import KANN

seed = 42
random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

  from .autonotebook import tqdm as notebook_tqdm


for more datasets:

https://github.com/dtortorella/dyngraphesn/tree/master

In [2]:
dataset = PvUS(root="/dataset", zones=['west'])
print(dataset)

print(f"Default similarity: {dataset.similarity_score}")
print(f"Available similarity options: {dataset.similarity_options}")

sim = dataset.get_similarity("distance")  # or dataset.compute_similarity()

connectivity = dataset.get_connectivity(threshold=0.1,
                                        include_self=False,
                                        normalize_axis=1,
                                        layout="edge_index")

PvUS-west(length=52560, n_nodes=1082, n_channels=1)
Default similarity: distance
Available similarity options: {'correntropy', 'distance'}


In [3]:
horizon = 24

torch_dataset = tsl.data.SpatioTemporalDataset(target=dataset.dataframe(),
                                      connectivity=connectivity,
                                      mask=dataset.mask,
                                      horizon=horizon,
                                      window=64,
                                      stride=1)
print(torch_dataset)

SpatioTemporalDataset(n_samples=52473, n_nodes=1082, n_channels=1)


In [4]:
sample = torch_dataset[0].to(device)
print(sample)
print(sample.pattern)

Data(
  input=(x=[t=64, n=1082, f=1], edge_index=[2, e=124778], edge_weight=[e=124778]),
  target=(y=[t=24, n=1082, f=1]),
  has_mask=False
)
{'x': 't n f', 'edge_index': '2 e', 'edge_weight': 'e', 'y': 't n f'}


In [5]:
scalers = {'target': StandardScaler(axis=(0, 1))}

# Split data sequentially:
#   |------------ dataset -----------|
#   |--- train ---|- val -|-- test --|
splitter = TemporalSplitter(val_len=0.1, test_len=0.2)

dm = SpatioTemporalDataModule(
    dataset=torch_dataset,
    scalers=scalers,
    splitter=splitter,
    batch_size=1,
)

dm.setup()
print(dm)

SpatioTemporalDataModule(train_len=37718, val_len=4133, test_len=10494, scalers=[target], batch_size=1)


In [6]:
time_interval, num_nodes, feat_size = sample.input.x.shape

model = DynGESNModel(input_size=feat_size,
                reservoir_size=100,
                input_scaling=1.,
                reservoir_layers=1,
                leaking_rate=0.9,
                spectral_radius=0.9,
                density=0.5,
                reservoir_activation='tanh',
                alpha_decay=False).to(device)

In [7]:
sample.input

StorageView(x=[64, 1082, 1], edge_index=[2, 124778], edge_weight=[124778])

In [7]:
model(sample.input.x.unsqueeze(dim=0), sample.input.edge_index, sample.input.edge_weight).shape

torch.Size([1, 64, 1082, 100])

In [11]:
class LinearRegression(pl.LightningModule):
    def __init__(self, encoder, input_size, output_size, horizon):
        super().__init__()
        self.output_size = output_size
        self.encoder = encoder
        self.linear = nn.Linear(input_size, output_size*horizon)

    def forward(self, x, edge_index, edge_weight):
        z = self.encoder(x, edge_index, edge_weight)
        b, t, n, f = z.shape
        z = rearrange(z, 'b t n f -> b n (t f)', t=t, n=n)
        new_x = self.linear(z)
        new_x = rearrange(new_x, 'b n (t f) -> b t n f', t=horizon, n=n, f=self.output_size)

        return new_x

In [12]:
forecaster = LinearRegression(model, input_size=100*time_interval, output_size=feat_size, horizon=horizon).to(device)

In [13]:
loss_fn = MaskedMAE()

metrics = {'mae': MaskedMAE()}

# setup predictor
predictor = Predictor(
    model=forecaster,              # our initialized model
    optim_class=torch.optim.Adam,  # specify optimizer to be used...
    optim_kwargs={'lr': 0.001},    # ...and parameters for its initialization
    loss_fn=loss_fn,               # which loss function to be used
    metrics=metrics                # metrics to be logged during train/val/test
)

# logger = TensorBoardLogger(save_dir="logs", name="dynGESN", version=0)
# %load_ext tensorboard
# %tensorboard --logdir logs

checkpoint_callback = ModelCheckpoint(
    dirpath='logs',
    save_top_k=1,
    monitor='val_mae',
    mode='min',
)

trainer = pl.Trainer(max_epochs=100,
                    #  logger=logger,
                     devices=1, 
                     accelerator="gpu" if torch.cuda.is_available() else "cpu",
                     limit_train_batches=100,  # end an epoch after 100 updates
                     callbacks=[checkpoint_callback])

trainer.fit(predictor, datamodule=dm)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | loss_fn       | MaskedMAE        | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | test_metrics  | MetricCollection | 0     
4 | model         | LinearRegression | 163 K 
---------------------------------------------------
153 K     Trainable params
10.2 K    Non-trainable params
163 K     Total params
0.655     Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\mgphy\anaconda3\envs\koopman\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
Only args ['x', 'edge_weight', 'edge_index'] are forwarded to the model (LinearRegression). 


                                                                           

c:\Users\mgphy\anaconda3\envs\koopman\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 1: 100%|██████████| 100/100 [00:03<00:00, 30.82it/s, v_num=17, val_mae=27.90, train_mae=27.80]

c:\Users\mgphy\anaconda3\envs\koopman\lib\site-packages\pytorch_lightning\trainer\call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


sistemare la gestione delle batch