In [1]:
!git clone https://github.com/TorchSpatiotemporal/tsl.git
!pip install torch==1.10.1+cu111 torchvision==0.11.2+cu111 torchaudio==0.10.1 -f https://download.pytorch.org/whl/torch_stable.html
!pip install torch-scatter torch-sparse torch-geometric -f https://data.pyg.org/whl/torch-1.10.1+cu113.html
!pip install ./tsl

fatal: destination path 'tsl' already exists and is not an empty directory.
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Looking in links: https://data.pyg.org/whl/torch-1.10.1+cu113.html
Processing ./tsl
[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m
Building wheels for collected packages: torch-spatiotemporal
  Building wheel for torch-spatiotemporal (setup.py) ... [?25l[?25hdone
  Created wheel for torch-spatiotemporal: filename=torch_spatiotemporal-0.1.0-py3-none-any.whl size=157010 sha256=a8ab737e8e539d73fdda9ab186a5e6156b878308e68d4b52878321b5c13d4f1f
  Stored in directory: /tmp/pip-ephem-wheel-cache-lp

In [2]:
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import tsl

from datetime import datetime
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from tsl.data import SpatioTemporalDataset, SpatioTemporalDataModule
from tsl.data.preprocessing import StandardScaler
from tsl.nn.blocks.encoders import RNN
from tsl.nn.blocks.decoders import GCNDecoder
from tsl.nn.metrics.metrics import MaskedMAE, MaskedMAPE
from tsl.predictors import Predictor
from tsl.datasets.prototypes import PandasDataset

np.set_printoptions(suppress=True)
tsl.logger.disabled = True

print(f"tsl version  : {tsl.__version__}")
print(f"torch version: {torch.__version__}")

tsl version  : 0.1.0
torch version: 1.10.1+cu111


In [3]:
# TO REMOVE, ONLY FOR TRY
from tsl.datasets import MetrLA

met = MetrLA()

adj_met = met.get_connectivity(threshold=0.1,
                               include_self=False,
                               normalize_axis=1,
                               layout="edge_index")

In [5]:
# Import needed CSV
final_2020 = pd.read_csv("/content/final_df_2020.csv")
# final_2021 = pd.read_csv("/content/final_df_2021.csv")
# final_2022 = pd.read_csv("/content/final_df_2022.csv")

# Extrate information from dataframe and create a new dataframe with
# column = FIPS and row = Cases per Date
db = {}
for index, row in final_2020.iterrows():
  date = datetime.strptime(row['Date'], '%Y-%m-%d')
  db[(date, (row['FIPS']))] = row['Cases']
# for index, row in final_2021.iterrows():
#   date = datetime.strptime(row['Date'], '%Y-%m-%d')
#   db[(date, (row['FIPS']))] = row['Cases']
# for index, row in final_2022.iterrows():
#   date = datetime.strptime(row['Date'], '%Y-%m-%d')
#   db[(date, (row['FIPS']))] = row['Cases']

df_cases = pd.DataFrame(db.values(), index=pd.MultiIndex.from_tuples(db.keys())).unstack(1).fillna(0)[0]

In [6]:
# Convert the dataframe to a PandasDataset
dataset = PandasDataset(dataframe=df_cases,
              similarity_score="correntropy",
              temporal_aggregation="nearest")
dataset

PandasDataset(length=346, n_nodes=3218, n_channels=1)

In [7]:
# Import the adjacency matrix and set the FIPS as index
adj_df = pd.read_csv('adjacency_matrix.csv')
adj_df.set_index('--', inplace=True)

In [23]:
# Create a tuple for the adjacency as the get_conncetivity of tsl
cols = adj_df.columns
bt = adj_df.apply(lambda x: x > 0)
adj_series = bt.apply(lambda x: list(cols[x.values]), axis=1)

adj_dict = adj_series.to_dict()
edge_index = [[], []]
edge_weight = []
for key in adj_dict:
  adj_fips = adj_dict[key]
  length = len(adj_fips)
  for i in adj_fips:
    edge_index[0].append(int(key))
    edge_index[1].append(int(i))
    edge_weight.append(1/length)

edge_index = np.array(edge_index)
edge_weight = np.array(edge_weight, dtype=np.float32)
adj = (edge_index, edge_weight)

In [24]:
# Create the torch dataset with the Spatio-Temporal dataset
torch_dataset = SpatioTemporalDataset(*dataset.numpy(return_idx=True),
                                      connectivity=adj,
                                      mask=dataset.mask,
                                      horizon=12,
                                      window=12)
print(torch_dataset)

SpatioTemporalDataset(n_samples=323, n_nodes=3218, n_channels=1)


In [25]:
# Create teh Data Module
scalers = {'data': StandardScaler(axis=(0, 1))}

splitter = dataset.get_splitter(val_len=0.1, test_len=0.2)

dm = SpatioTemporalDataModule(
    dataset=torch_dataset,
    scalers=scalers,
    splitter=splitter,
    batch_size=64,
)

dm.setup()
print(dm)

SpatioTemporalDataModule(train_len=222, val_len=13, test_len=64, scalers=[data], batch_size=64)


In [26]:
# Create the class for the model
class TimeThenSpaceModel(torch.nn.Module):
    def __init__(self,
                 input_size,
                 hidden_size,
                 rnn_layers,
                 gcn_layers,
                 horizon):
        super(TimeThenSpaceModel, self).__init__()

        self.input_encoder = torch.nn.Linear(input_size, hidden_size)

        self.encoder = RNN(input_size=hidden_size,
                           hidden_size=hidden_size,
                           n_layers=rnn_layers)

        self.decoder = GCNDecoder(
            input_size=hidden_size,
            hidden_size=hidden_size,
            output_size=input_size,
            horizon=horizon,
            n_layers=gcn_layers
        )

    def forward(self, x, edge_index, edge_weight):
        # x: [batches steps nodes channels]
        x = self.input_encoder(x)

        x = self.encoder(x, return_last_state=True)

        return self.decoder(x, edge_index, edge_weight)

In [27]:
# Create the predictor
loss_fn = MaskedMAE(compute_on_step=True)

metrics = {'mae': MaskedMAE(compute_on_step=False),
           'mape': MaskedMAPE(compute_on_step=False),
           'mae_at_15': MaskedMAE(compute_on_step=False, at=2),  # `2` indicated the third time step,
                                                                 # which correspond to 15 minutes ahead
           'mae_at_30': MaskedMAE(compute_on_step=False, at=5),
           'mae_at_60': MaskedMAE(compute_on_step=False, at=11), }

model_kwargs = {
    'input_size': dm.n_channels,  # 1 channel
    'horizon': dm.horizon,  # 12, the number of steps ahead to forecast
    'hidden_size': 32,
    'rnn_layers': 1,
    'gcn_layers': 2
}

# setup predictor
predictor = Predictor(
    model_class=TimeThenSpaceModel,
    model_kwargs=model_kwargs,
    optim_class=torch.optim.Adam,
    optim_kwargs={'lr': 0.001},
    loss_fn=loss_fn,
    metrics=metrics
)

  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."


In [41]:
# TRAIN THE DATA
logger = TensorBoardLogger(save_dir="logs", name="tsl_intro", version=0)

checkpoint_callback = ModelCheckpoint(
    dirpath='logs',
    save_top_k=1,
    monitor='val_mae',
    mode='min',
)

trainer = pl.Trainer(max_epochs=100,
                     logger=logger,
                     gpus=1 if torch.cuda.is_available() else None,
                     limit_train_batches=100,
                     callbacks=[checkpoint_callback])

trainer.fit(predictor, datamodule=dm)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type               | Params
-----------------------------------------------------
0 | loss_fn       | MaskedMAE          | 0     
1 | train_metrics | MetricCollection   | 0     
2 | val_metrics   | MetricCollection   | 0     
3 | test_metrics  | MetricCollection   | 0     
4 | model         | TimeThenSpaceModel | 12.0 K
-----------------------------------------------------
12.0 K    Trainable params
0         Non-trainable params
12.0 K    Total params
0.048     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

RuntimeError: ignored

In [None]:
# MAKE PREDICTION
predictor.load_model(checkpoint_callback.best_model_path)
predictor.freeze()

performance = trainer.test(predictor, datamodule=dm)