In [39]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pytorch_lightning as pl
import torchmetrics

import numpy as np
import pandas as pd
import scipy.sparse as sp

from torch.nn.functional import normalize
from torch.utils.data.dataloader import DataLoader

In [82]:
def accuracy(pred, y):
    return 1 - torch.linalg.norm(y - pred, 'fro') / torch.linalg.norm(y, 'fro')

In [81]:
class GCN(nn.Module):
    def __init__(self, adj, input_dim, output_dim, **kwargs):
        super(GCN, self).__init__()
        
        self.register_buffer(
            "laplacian", calculate_laplacian_with_self_loop(torch.FloatTensor(adj))
        )
        
        self._num_nodes = adj.shape[0]
        self._input_dim = input_dim
        self._output_dim = output_dim
        self.weights = nn.Parameter(
            torch.FloatTensor(self._input_dim, self._output_dim)
        )
        self.reset_parameters()
        
    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weights, gain = nn.init.calculate_gain('tanh'))
        
    def forward(self, inputs):
        batch_size = inputs.shape[0]
        inputs = inputs.transpose(0, 2).transpose(1, 2)
        inputs = inputs.reshape((self._num_nodes, batch_size * self._input_dim))
        
        ax = self.laplacian @ inputs
        ax = ax.reshape((self._num_nodes, batch_size, self._input_dim))
        ax = ax.reshape((self._num_nodes * batch_size, self._input_dim))
        
        outputs = torch.tanh(ax @ self.weights)
        outputs = outputs.reshape((self._num_nodes, batch_size, self._output_dim))
        outputs = outputs.transpose(0, 1)
        
        return outputs
    
    @property
    def hyperparameters(self):
        return {
            "num_nodes": self._num_nodes,
            "input_dim": self._input_dim,
            "output_dim": self._output_dim
        }

In [83]:
def calculate_laplacian_with_self_loop(matrix):
    matrix = matrix + torch.eye(matrix.size(0))
    row_sum = matrix.sum(1)
    d_inv_sqrt = torch.pow(row_sum, -0.5).flatten()
    d_inv_sqrt[torch.isinf(d_inv_sqrt)] = 0.0
    d_mat_inv_sqrt = torch.diag(d_inv_sqrt)
    normalized_laplacian = (
        matrix.matmul(d_mat_inv_sqrt).transpose(0,1).matmul(d_mat_inv_sqrt)
    )
    return normalized_laplacian

In [84]:
class SupervisedForecastTask(pl.LightningModule):
    def __init__(self, model, regressor='linear', loss='mse', pre_len=3, learning_rate=1e-3,
                weight_decay=1.5e-3, feat_max_val=1.0, **kwargs):
        super(SupervisedForecastTask, self).__init__()
        self.save_hyperparameters()
        self.model = model
        self.regressor = (
            nn.Linear(
                self.model.hyperparameters.get('output_dim'),
                self.hparams.pre_len,
            )
            if regressor == 'linear'
            else regressor
        )
        self._loss = loss
        self.feat_max_val = feat_max_val
        
    def forward(self, x):
        batch_size, _, num_nodes = x.size()
        hidden = self.model(x)
        hidden = hidden.reshape((-1, hidden.size(2)))
        
        if self.regressor is not None:
            predictions = self.regressor(hidden)
        else:
            predictions = hidden
            
        predictions = predictions.reshape((batch_size, num_nodes, -1))
        return predictions
    
    def shared_step(self, batch, batch_idx):
        x, y = batch
        num_nodes = x.size(2)
        predictions = self(x)
        predicctions = predictions.transpose(1, 2).reshape((-1, num_nodes))
        y = y.reshape((-1, y.size(2)))
        return predictions, y
    
    def loss(self, inputs, targets):
        return F.mse_loss(inputs, targets)
    
    def training_step(self, batch, batch_idx):
        predictions, y = self.shared_step(batch, batch_idx)
        loss = self.loss(predictions, y)
        return loss
    
    def validation_step(self, batch, batch_idx):
        predictions, y = self.shared_step(batch, batch_idx)
        predictions = predictions * self.feat_max_val
        y = y * self.feat_max_val
        loss = self.loss(predictions, y)
        
        rmse = torch.sqrt(torchmetrics.functional.mean_squared_error(predictions, y))
        mae = torchmetrics.functional.mean_absolute_error(predictions, y)
        accuracy = utils.metrics.accuracy(predictions, y)
        mape = torchmetrics.functional.mean_absolute_percentage_error(predictions, y)
        metrics = {
            'val_loss':loss,
            'RMSE':rmse,
            'MAE':mae,
            'accuracy':accuracy,
            'MAPE':mape
        }
        self.log_dict(metrics)
        return predictions.reshape(batch[1].size()), y.reshape(batch[1].size())
    
    def configure_optimizers(self):
        return torch.optim.Adam(
            self.parameters(),
            lr = self.hparams.learning_rate,
            weight_decay = self.hparams.weight_decay,
        )

In [85]:
def load_features(feat_path, dtype=np.float32):
    feat_df = pd.read_csv(feat_path)
    feat_df = feat_df.drop('Unnamed: 0', axis=1)
    feat = np.array(feat_df, dtype=dtype)
    return feat

def load_adjacency_matrix(adj_path, dtype=np.float32):
    adj_df = pd.read_csv(adj_path, header=None)
    adj = np.array(adj_df, dtype = dtype)
    return adj

In [87]:
class SpatioTemporalCSVDataModule(pl.LightningDataModule):
    def __init__(self, feat_path, adj_path, batch_size=64,
                seq_len = 12, pre_len = 3, split_ratio = 0.8, normalize=True):
        super(SpatioTemporalCSVDataModule, self).__init__()
        
        self._feat_path = feat_path
        self._adj_path = adj_path
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.pre_len = pre_len
        self.split_ratio = split_ratio
        self.normalize = normalize
        
        self._feat = load_features(self._feat_path)
        self._feat_max_val = np.max(self._feat)
        self._adj = load_adjacency_matrix(self._adj_path)
        
    def setup(self, stage=None):
        (self.train_dataset, self.val_dataset) = generate_torch_datasets(
            self._feat, self.seq_len, self.pre_len, 
            split_ratio=self.split_ratio, normalize=self.normalize
        )
        
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=len(self.val_dataset))
    
    @property
    def feat_max_val(self):
        return self._feat_max_val

    @property
    def adj(self):
        return self._adj

In [88]:
def generate_dataset(
    data, seq_len, pre_len, time_len=None, split_ratio=0.8, normalize=True
):
    if time_len is None:
        time_len = data.shape[0]
    if normalize:
        max_val = np.max(data)
        data = data / max_val
    train_size = int(time_len * split_ratio)
    train_data = data[:train_size]
    test_data = data[train_size:time_len]
    train_X, train_Y, test_X, test_Y = list(), list(), list(), list()
    for i in range(len(train_data) - seq_len - pre_len):
        train_X.append(np.array(train_data[i : i + seq_len]))
        train_Y.append(np.array(train_data[i + seq_len : i + seq_len + pre_len]))
    for i in range(len(test_data) - seq_len - pre_len):
        test_X.append(np.array(test_data[i : i + seq_len]))
        test_Y.append(np.array(test_data[i + seq_len : i + seq_len + pre_len]))
    return np.array(train_X), np.array(train_Y), np.array(test_X), np.array(test_Y)


def generate_torch_datasets(
    data, seq_len, pre_len, time_len=None, split_ratio=0.8, normalize=True
):
    train_X, train_Y, test_X, test_Y = generate_dataset(
        data,
        seq_len,
        pre_len,
        time_len=time_len,
        split_ratio=split_ratio,
        normalize=normalize,
    )
    train_dataset = torch.utils.data.TensorDataset(
        torch.FloatTensor(train_X), torch.FloatTensor(train_Y)
    )
    test_dataset = torch.utils.data.TensorDataset(
        torch.FloatTensor(test_X), torch.FloatTensor(test_Y)
    )
    return train_dataset, test_dataset

In [89]:
feat_path = '../Data/METR-LA/speed_la_0.csv'
adj_path = '../Data/METR-LA/adj_mx_la.csv'
seq_len = 12
pre_len = 3

In [91]:
dm = SpatioTemporalCSVDataModule(feat_path = feat_path, adj_path = adj_path)
model = GCN(adj=dm.adj, input_dim=seq_len, output_dim = 64)
task = SupervisedForecastTask(model=model, feat_max_val = dm.feat_max_val)
trainer = pl.Trainer()
trainer.fit(task, dm)
results = trainer.validate(datamodule=dm)
results

  rank_zero_warn(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name      | Type   | Params
-------------------------------------
0 | model     | GCN    | 768   
1 | regressor | Linear | 195   
-------------------------------------
963       Trainable params
0         Non-trainable params
963       Total params
0.004     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  return F.mse_loss(inputs, targets)


RuntimeError: The size of tensor a (3) must match the size of tensor b (207) at non-singleton dimension 2

In [49]:
load_features(feat_path).shape

(34272, 208)

In [26]:
np.max(load_features(feat_path))

34271.0

In [53]:
207 * 82080

16990560

In [51]:
207 * 34272

7094304

In [68]:
feat_df = pd.read_csv(feat_path)
feat_df = feat_df.drop('Unnamed: 0', axis=1)
feat_df

Unnamed: 0,773869,767541,767542,717447,717446,717445,773062,767620,737529,717816,...,772167,769372,774204,769806,717590,717592,717595,772168,718141,769373
0,64.375000,67.625000,67.125000,61.500000,66.875000,68.750000,65.125000,67.125000,59.625000,62.750000,...,45.625000,65.500000,64.500000,66.428571,66.875000,59.375000,69.000000,59.250000,69.000000,61.875000
1,62.666667,68.555556,65.444444,62.444444,64.444444,68.111111,65.000000,65.000000,57.444444,63.333333,...,50.666667,69.875000,66.666667,58.555556,62.000000,61.111111,64.444444,55.888889,68.444444,62.875000
2,64.000000,63.750000,60.000000,59.000000,66.500000,66.250000,64.500000,64.250000,63.875000,65.375000,...,44.125000,69.000000,56.500000,59.250000,68.125000,62.500000,65.625000,61.375000,69.857143,62.000000
3,64.000000,63.750000,60.000000,59.000000,66.500000,66.250000,64.500000,64.250000,63.875000,65.375000,...,44.125000,69.000000,56.500000,59.250000,68.125000,62.500000,65.625000,61.375000,69.857143,62.000000
4,64.000000,63.750000,60.000000,59.000000,66.500000,66.250000,64.500000,64.250000,63.875000,65.375000,...,44.125000,69.000000,56.500000,59.250000,68.125000,62.500000,65.625000,61.375000,69.857143,62.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34267,65.000000,65.888889,68.555556,61.666667,32.833333,54.555556,62.444444,63.333333,59.222222,65.333333,...,52.888889,69.000000,65.111111,55.666667,66.333333,62.444444,66.777778,64.888889,69.666667,62.333333
34268,61.375000,65.625000,66.500000,62.750000,32.833333,50.500000,62.000000,67.000000,65.250000,67.125000,...,54.000000,69.250000,60.125000,60.500000,67.250000,59.375000,66.000000,61.250000,69.000000,62.000000
34269,67.000000,59.666667,69.555556,61.000000,32.833333,44.777778,64.222222,63.777778,59.777778,57.666667,...,51.333333,67.888889,64.333333,57.000000,66.000000,62.666667,68.666667,63.333333,67.444444,61.222222
34270,66.750000,62.250000,66.000000,59.625000,32.833333,53.000000,64.285714,64.125000,60.875000,66.250000,...,51.125000,69.375000,61.625000,60.500000,65.625000,66.375000,69.500000,63.000000,67.875000,63.500000
