In [1]:
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device, torch.cuda.is_available() 

(device(type='cuda', index=0), True)

In [2]:
torch.version.cuda,  torch.cuda.get_arch_list(), torch.__version__

('12.1',
 ['sm_50', 'sm_60', 'sm_61', 'sm_70', 'sm_75', 'sm_80', 'sm_86', 'sm_90'],
 '2.1.0')

In [3]:
import pickle
from torch_geometric.utils.convert import from_networkx
sphere_graph = pickle.load(open('./sphere_graph.pickle', 'rb'))
graph_data = from_networkx(sphere_graph)

In [4]:
graph_data.distance

tensor([    0.0000, 30815.5723, 30815.5723,  ..., 61557.0859, 30815.5723,
            0.0000])

In [5]:
from tsl.datasets import MetrLA
import os
import torch
os.environ['TORCH'] = torch.__version__

dataset = MetrLA(root='./test-data')

  df = df.replace(to_replace=0., method='ffill')


In [6]:
edge_index, edge_weight = dataset.get_connectivity()
edge_index.shape

(2, 11753)

In [7]:
edge_index, edge_weight

(array([[  0,   0,   0, ..., 206, 206, 206],
        [  0,  10,  13, ..., 198, 199, 206]]),
 array([1.0000000e+00, 1.0204708e-05, 2.6093593e-01, ..., 6.4949799e-01,
        6.2726212e-07, 1.0000000e+00], dtype=float32))

In [7]:
from tsl.data import SpatioTemporalDataset

torch_dataset = SpatioTemporalDataset(target=dataset.dataframe(),
                                      connectivity=(edge_index, edge_weight),
                                      mask=dataset.mask,
                                      horizon=12,
                                      window=12,
                                      stride=1)

In [9]:
sample = torch_dataset[0]
sample

Data(
  input=(x=[t=12, n=207, f=1], edge_index=[2, e=11753], edge_weight=[e=11753]),
  target=(y=[t=12, n=207, f=1]),
  has_mask=True
)

In [12]:
sample.input.x, sample.target.y

(tensor([[[64.3750],
          [67.6250],
          [67.1250],
          ...,
          [59.2500],
          [69.0000],
          [61.8750]],
 
         [[62.6667],
          [68.5556],
          [65.4444],
          ...,
          [55.8889],
          [68.4444],
          [62.8750]],
 
         [[64.0000],
          [63.7500],
          [60.0000],
          ...,
          [61.3750],
          [69.8571],
          [62.0000]],
 
         ...,
 
         [[63.5000],
          [61.5000],
          [62.5000],
          ...,
          [50.7500],
          [66.2500],
          [62.2500]],
 
         [[65.2222],
          [63.6667],
          [65.1111],
          ...,
          [60.5556],
          [67.2222],
          [60.0000]],
 
         [[62.2500],
          [67.7500],
          [66.8750],
          ...,
          [57.0000],
          [66.5000],
          [59.4286]]]),
 tensor([[[61.1250],
          [67.0000],
          [58.5000],
          ...,
          [53.1250],
          [70.0000],


In [1]:
from weatherbench2.metrics import RMSE

In [33]:
sample = torch_dataset[0]
sample

Data(
  input=(x=[t=12, n=207, f=1], edge_index=[2, e=11753], edge_weight=[e=11753]),
  target=(y=[t=12, n=207, f=1]),
  has_mask=True
)

In [34]:
torch_dataset

SpatioTemporalDataset(n_samples=34249, n_nodes=207, n_channels=1)

In [8]:
from tsl.data.datamodule import (SpatioTemporalDataModule,
                                 TemporalSplitter)
from tsl.data.preprocessing import StandardScaler

# Normalize data using mean and std computed over time and node dimensions
scalers = {'target': StandardScaler(axis=(0, 1))}

# Split data sequentially:
#   |------------ dataset -----------|
#   |--- train ---|- val -|-- test --|
splitter = TemporalSplitter(val_len=0.1, test_len=0.2)

dm = SpatioTemporalDataModule(
    dataset=torch_dataset,
    scalers=scalers,
    splitter=splitter,
    batch_size=64,
)
dm.setup()
dm

SpatioTemporalDataModule(train_len=24648, val_len=2728, test_len=6849, scalers=[target], batch_size=64)

In [14]:
next(iter(dm.get_dataloader()))

StaticBatch(
  input=(x=[b=64, t=12, n=207, f=1], edge_index=[2, e=11753], edge_weight=[e=11753]),
  target=(y=[b=64, t=12, n=207, f=1]),
  has_mask=True,
  transform=[x, y]
)

In [9]:
import torch.nn as nn

from tsl.nn.blocks.encoders import RNN
from tsl.nn.layers import NodeEmbedding, DiffConv
from einops.layers.torch import Rearrange  # reshape data with Einstein notation


class TimeThenSpaceModel(nn.Module):
    def __init__(self, input_size: int, n_nodes: int, horizon: int,
                 hidden_size: int = 32,
                 rnn_layers: int = 1,
                 gnn_kernel: int = 2):
        super(TimeThenSpaceModel, self).__init__()

        self.encoder = nn.Linear(input_size, hidden_size)

        self.node_embeddings = NodeEmbedding(n_nodes, hidden_size)

        self.time_nn = RNN(input_size=hidden_size,
                           hidden_size=hidden_size,
                           n_layers=rnn_layers,
                           cell='gru',
                           return_only_last_state=True)
        
        self.space_nn = DiffConv(in_channels=hidden_size,
                                 out_channels=hidden_size,
                                 k=gnn_kernel)

        self.decoder = nn.Linear(hidden_size, input_size * horizon)
        self.rearrange = Rearrange('b n (t f) -> b t n f', t=horizon)

    def forward(self, x, edge_index, edge_weight):
        print(x)
        # x: [batch time nodes features]
        x_enc = self.encoder(x)  # linear encoder: x_enc = xΘ + b
        x_emb = x_enc + self.node_embeddings()  # add node-identifier embeddings
        h = self.time_nn(x_emb)  # temporal processing: x=[b t n f] -> h=[b n f]
        z = self.space_nn(h, edge_index, edge_weight)  # spatial processing
        x_out = self.decoder(z)  # linear decoder: z=[b n f] -> x_out=[b n t⋅f]
        x_horizon = self.rearrange(x_out)
        return x_horizon

In [10]:
hidden_size = 32   #@param
rnn_layers = 1     #@param
gnn_kernel = 2     #@param

input_size = torch_dataset.n_channels   # 1 channel
n_nodes = torch_dataset.n_nodes         # 207 nodes
horizon = torch_dataset.horizon         # 12 time steps

stgnn = TimeThenSpaceModel(input_size=input_size,
                           n_nodes=n_nodes,
                           horizon=horizon,
                           hidden_size=hidden_size,
                           rnn_layers=rnn_layers,
                           gnn_kernel=gnn_kernel)
stgnn

TimeThenSpaceModel(
  (encoder): Linear(in_features=1, out_features=32, bias=True)
  (node_embeddings): NodeEmbedding(n_nodes=207, embedding_size=32)
  (time_nn): RNN(
    (rnn): GRU(32, 32)
  )
  (space_nn): DiffConv(32, 32)
  (decoder): Linear(in_features=32, out_features=12, bias=True)
  (rearrange): Rearrange('b n (t f) -> b t n f', t=12)
)

In [11]:
from tsl.metrics.torch import MaskedMAE, MaskedMAPE
from tsl.engines import Predictor

loss_fn = MaskedMAE()

metrics = {'mae': MaskedMAE(),
           'mape': MaskedMAPE(),
           'mae_at_15': MaskedMAE(at=2),  # '2' indicates the third time step,
                                          # which correspond to 15 minutes ahead
           'mae_at_30': MaskedMAE(at=5),
           'mae_at_60': MaskedMAE(at=11)}

# setup predictor
predictor = Predictor(
    model=stgnn,                   # our initialized model
    optim_class=torch.optim.Adam,  # specify optimizer to be used...
    optim_kwargs={'lr': 0.001},    # ...and parameters for its initialization
    loss_fn=loss_fn,               # which loss function to be used
    metrics=metrics                # metrics to be logged during train/val/test
)

In [12]:
from pytorch_lightning.loggers import TensorBoardLogger

logger = TensorBoardLogger(save_dir="logs", name="tsl_intro", version=0)

In [13]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(
    dirpath='logs',
    save_top_k=1,
    monitor='val_mae',
    mode='min',
)

trainer = pl.Trainer(max_epochs=1,
                     logger=logger,
                    #  gpus=1 if torch.cuda.is_available() else None,
                    # devices=[],
                    accelerator='gpu',
                     limit_train_batches=100,  # end an epoch after 100 updates
                     callbacks=[checkpoint_callback])

trainer.fit(predictor, datamodule=dm)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
/home/arturas/anaconda3/envs/weather/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:630: Checkpoint directory logs exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | loss_fn       | MaskedMAE          | 0     
1 | train_metrics | MetricCollection   | 0     
2 | val_metrics   | MetricCollection   | 0     
3 | test_metrics  | Metri

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/arturas/anaconda3/envs/weather/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

Only args ['edge_weight', 'edge_index', 'x'] are forwarded to the model (TimeThenSpaceModel). 


tensor([[[[-3.0269],
          [ 0.4952],
          [ 0.6113],
          ...,
          [-0.1531],
          [-3.0850],
          [-3.6559]],

         [[-2.6861],
          [ 0.5135],
          [ 0.5221],
          ...,
          [ 0.5049],
          [-2.6001],
          [-3.9161]],

         [[-3.0172],
          [ 0.4371],
          [ 0.5533],
          ...,
          [ 0.2726],
          [-2.8334],
          [-3.6946]],

         ...,

         [[-3.0753],
          [ 0.5920],
          [ 0.6113],
          ...,
          [-0.0080],
          [-3.5978],
          [-3.9849]],

         [[-3.1334],
          [ 0.5737],
          [ 0.5049],
          ...,
          [-0.2864],
          [-2.6001],
          [-4.0279]],

         [[-3.2979],
          [ 0.5629],
          [ 0.6694],
          ...,
          [-0.8221],
          [-3.4140],
          [-3.7913]]],


        [[[-2.6861],
          [ 0.5135],
          [ 0.5221],
          ...,
          [ 0.5049],
          [-2.6001],
     

/home/arturas/anaconda3/envs/weather/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 0:   0%|          | 0/100 [00:00<?, ?it/s] tensor([[[[ 7.8011e-01],
          [ 6.0809e-01],
          [ 7.2850e-01],
          ...,
          [ 1.6083e-01],
          [-2.9097e+00],
          [-1.7658e+00]],

         [[ 7.3710e-01],
          [ 6.5002e-01],
          [ 7.8548e-01],
          ...,
          [ 1.1390e-02],
          [-1.9432e+00],
          [-1.6529e+00]],

         [[ 7.4678e-01],
          [ 6.4034e-01],
          [ 7.6613e-01],
          ...,
          [ 1.4686e-01],
          [-2.7173e+00],
          [-9.3687e-01]],

         ...,

         [[ 6.9840e-01],
          [ 4.4682e-01],
          [ 6.6937e-01],
          ...,
          [ 3.0167e-01],
          [-1.9916e+00],
          [ 3.2103e-01]],

         [[ 7.1990e-01],
          [ 6.1669e-01],
          [ 8.4031e-01],
          ...,
          [ 2.0384e-01],
          [-1.8862e+00],
          [ 2.8985e-01]],

         [[ 6.4034e-01],
          [ 5.9196e-01],
          [ 7.1775e-01],
          ...,
          [

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 100/100 [00:02<00:00, 35.56it/s, v_num=0, val_mae=3.230, val_mae_at_15=2.730, val_mae_at_30=3.200, val_mae_at_60=4.010, val_mape=0.0909, train_mae=4.900, train_mae_at_15=4.390, train_mae_at_30=4.660, train_mae_at_60=5.790, train_mape=0.151]


In [14]:
predictor.load_model(checkpoint_callback.best_model_path)
predictor.freeze()

trainer.test(predictor, datamodule=dm)

Predictor with already instantiated model is loading a state_dict from logs/epoch=0-step=100-v2.ckpt. Cannot  check if model hyperparameters are the same.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/arturas/anaconda3/envs/weather/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing DataLoader 0:   0%|          | 0/108 [00:00<?, ?it/s]tensor([[[[ 0.3597],
          [ 0.7758],
          [ 0.7952],
          ...,
          [ 0.6403],
          [ 0.0888],
          [ 0.4081]],

         [[ 0.2382],
          [ 0.3931],
          [ 0.6683],
          ...,
          [ 0.3501],
          [ 0.4275],
          [ 0.1006]],

         [[ 0.7855],
          [ 0.6887],
          [ 0.4758],
          ...,
          [ 0.5049],
          [ 0.2726],
          [ 0.1952]],

         ...,

         [[ 0.3017],
          [ 0.3501],
          [ 0.6597],
          ...,
          [ 0.3307],
          [ 0.5339],
          [ 0.3597]],

         [[ 0.2533],
          [ 0.3114],
          [ 0.6307],
          ...,
          [ 0.6597],
          [ 0.3017],
          [ 0.2920]],

         [[ 0.5242],
          [ 0.1469],
          [ 0.2823],
          ...,
          [ 0.3888],
          [ 0.6790],
          [ 0.2630]]],


        [[[ 0.2382],
          [ 0.3931],
          [ 0.6683],
 

[{'test_mae': 3.935426950454712,
  'test_mae_at_15': 3.2437334060668945,
  'test_mae_at_30': 3.8980026245117188,
  'test_mae_at_60': 5.005066394805908,
  'test_mape': 0.11395630985498428,
  'test_loss': 3.7823309898376465}]