In [2]:
import torch
# import scipy
# import umap
import random
# import itertools
import tsl
import numpy as np
import pandas as pd
import networkx as nx
import torch_geometric
import matplotlib.pyplot as plt

from tsl.datasets import PvUS
from tsl.data.datamodule import SpatioTemporalDataModule, TemporalSplitter
from tsl.data.preprocessing import StandardScaler

# from dataset.NCI1_dataset import NCI1
# from tqdm import trange
# from sklearn.model_selection import train_test_split

from models.DynGraphESN import DynGraphESN

from DMD.dmd import KANN

seed = 42
random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

  from .autonotebook import tqdm as notebook_tqdm


for datasets:

https://github.com/dtortorella/dyngraphesn/tree/master

In [3]:
dataset = PvUS(root="/dataset")

In [4]:
print(dataset)

PvUS(length=52560, n_nodes=5016, n_channels=1)


In [5]:
print(f"Default similarity: {dataset.similarity_score}")
print(f"Available similarity options: {dataset.similarity_options}")
print("==========================================")

sim = dataset.get_similarity("distance")  # or dataset.compute_similarity()

print("Similarity matrix W:")
pd.DataFrame(sim)

Default similarity: distance
Available similarity options: {'correntropy', 'distance'}
Similarity matrix W:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5006,5007,5008,5009,5010,5011,5012,5013,5014,5015
0,1.000000,0.095009,0.107265,0.610564,0.074490,0.039714,0.236621,0.392356,0.884068,0.092333,...,8.324627e-35,1.010345e-37,5.468361e-29,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.095009,1.000000,0.000261,0.013574,0.962472,0.000048,0.001269,0.002001,0.138510,0.978488,...,1.401298e-45,0.000000e+00,1.183978e-38,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.107265,0.000261,1.000000,0.079925,0.000125,0.912439,0.915830,0.305130,0.036763,0.000194,...,2.745709e-24,9.773699e-27,1.726642e-19,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.610564,0.013574,0.079925,1.000000,0.012090,0.029830,0.160928,0.700789,0.638735,0.015341,...,2.682818e-34,9.525920e-38,1.080937e-28,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.074490,0.962472,0.000125,0.012090,1.000000,0.000021,0.000666,0.001468,0.124429,0.994520,...,0.000000e+00,0.000000e+00,6.678883e-40,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5011,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000e+00,5.605194e-45,0.000000e+00,0.002191,0.755789,1.000000,0.098440,0.029806,0.755789,0.001384
5012,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,3.082857e-44,3.333066e-36,4.063766e-44,0.000221,0.101507,0.098440,1.000000,0.529991,0.101507,0.000170
5013,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,2.547916e-38,6.308124e-31,5.672802e-38,0.003113,0.014262,0.029806,0.529991,1.000000,0.014262,0.002832
5014,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000127,1.000000,0.755789,0.101507,0.014262,1.000000,0.000074


In [6]:
connectivity = dataset.get_connectivity(threshold=0.1,
                                        include_self=False,
                                        normalize_axis=1,
                                        layout="edge_index")

In [7]:
edge_index, edge_weight = connectivity

print(f'edge_index {edge_index.shape}:\n', edge_index)
print(f'edge_weight {edge_weight.shape}:\n', edge_weight)

edge_index (2, 1090660):
 [[   0    0    0 ... 5015 5015 5015]
 [   2    3    6 ... 3025 3032 5009]]
edge_weight (1090660,):
 [0.00104493 0.00537338 0.00241709 ... 0.02544574 0.02086223 0.05187358]


In [8]:
torch_dataset = tsl.data.SpatioTemporalDataset(target=dataset.dataframe(),
                                      connectivity=connectivity,
                                      mask=dataset.mask,
                                      horizon=12,
                                      window=12,
                                      stride=1)
print(torch_dataset)

SpatioTemporalDataset(n_samples=52537, n_nodes=5016, n_channels=1)


In [9]:
sample = torch_dataset[0]
print(sample)

Data(
  input=(x=[t=12, n=5016, f=1], edge_index=[2, e=1090660], edge_weight=[e=1090660]),
  target=(y=[t=12, n=5016, f=1]),
  has_mask=False
)


In [10]:
print(sample.pattern)

{'x': 't n f', 'edge_index': '2 e', 'edge_weight': 'e', 'y': 't n f'}


In [14]:
scalers = {'target': StandardScaler(axis=(0, 1))}

# Split data sequentially:
#   |------------ dataset -----------|
#   |--- train ---|- val -|-- test --|
splitter = TemporalSplitter(val_len=0.1, test_len=0.2)

dm = SpatioTemporalDataModule(
    dataset=torch_dataset,
    scalers=scalers,
    splitter=splitter,
    batch_size=64,
)

print(dm)

SpatioTemporalDataModule(train_len=None, val_len=None, test_len=None, scalers=[target], batch_size=64)


In [15]:
dm.setup()
print(dm)

SpatioTemporalDataModule(train_len=37815, val_len=4191, test_len=10507, scalers=[target], batch_size=64)
