In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import json

from ASTGCN.astgcn import ASTGCN, params
from utility.utils import compute_chebyshev_polynomials, compute_scaled_laplacian, generate_adj_matrix, normalise_data

In [2]:
with open('config.json', 'r') as f:
    config_file = json.load(f)
    f.close()

In [3]:
n_verticies = int(config_file['num_of_vertices'])
points_per_hour = int(config_file['points_per_hour'])

num_for_predict = int(config_file['num_for_predict'])
n_features = int(config_file['n_features'])
n_history = int(config_file['n_history'])

distance_matrix_path = str(config_file['distance_filename'])
graph_signal_path = str(config_file['graph_signal_path'])

epochs = int(config_file['epochs'])
cheb_polynomial = int(config_file['cheb_polynomials'])
lr = float(config_file['lr'])

In [4]:
distance_matrix = pd.read_csv(distance_matrix_path).to_numpy()
adj_matrix = generate_adj_matrix(distance_matrix, n_verticies)
adj_matrix.shape

(307, 307)

In [5]:
graph_signal_matrix = np.load(graph_signal_path)['data']
graph_signal_matrix.shape

(16992, 307, 3)

In [6]:
normalised_graph_signal_matrix = normalise_data(graph_signal_matrix)

In [7]:
# 16,992 = 59 days 

In [8]:
def generate_simple_dataset(data, num_hist=12, num_pred=12, train_ratio=0.7):
    samples_per_day = 288  # 24 * 12
    samples_per_week = 2016  # 7 * 288
    
    start_idx = samples_per_week + num_hist
    
    recent, daily, weekly, targets = [], [], [], []
    
    for i in range(start_idx, len(data) - num_pred):
        recent.append(data[i-num_hist:i])
        daily.append(data[i-samples_per_day-num_hist : i-samples_per_day])
        weekly.append(data[i-samples_per_week-num_hist : i-samples_per_week])
        targets.append(data[i:i+num_pred])
    
    n = len(recent)
    split_idx = int(n * train_ratio)
    
    return {
            'x_train_recent': np.array(recent[:split_idx]),
            'x_train_daily': np.array(daily[:split_idx]),
            'x_train_weekly': np.array(weekly[:split_idx]),
            'y_train': np.array(targets[:split_idx]),
            'x_test_recent': np.array(recent[split_idx:]),
            'x_test_daily': np.array(daily[split_idx:]),
            'x_test_weekly': np.array(weekly[split_idx:]),
            'y_test': np.array(targets[split_idx:])
        }

In [9]:
dataset = generate_simple_dataset(normalised_graph_signal_matrix)

In [10]:
dataset['x_train_recent'].shape

(10466, 12, 307, 3)

In [11]:
dataset['x_train_daily'].shape

(10466, 12, 307, 3)

In [12]:
dataset['x_train_weekly'].shape

(10466, 12, 307, 3)

In [13]:
dataset['y_train'].shape

(10466, 12, 307, 3)

In [14]:
class CustomDataset(Dataset):

    def __init__(self, x_recent, x_daily, x_weekly, output):

        self.x_recent = torch.tensor(x_recent, dtype=torch.float32).permute(0, 2, 3, 1) # x, T, N, F - > x, N, F, T
        self.x_daily = torch.tensor(x_daily, dtype=torch.float32).permute(0, 2, 3, 1)
        self.x_weekly = torch.tensor(x_weekly, dtype=torch.float32).permute(0, 2, 3, 1)
        self.output = torch.tensor(output, dtype=torch.float32).permute(0, 2, 3, 1)

    def __len__(self):

        return self.x_recent.shape[0]

    def __getitem__(self, idx):

        return self.x_recent[idx], self.x_daily[idx], self.x_weekly[idx], self.output[idx]

In [28]:
train_dataset = CustomDataset(
    dataset['x_train_recent'][0:40*16], # only doing the first 40 batches
    dataset['x_train_daily'][0:40*16], 
    dataset['x_train_weekly'][0:40*16],
    dataset['y_train'][0:40*16]
)

test_dataset = CustomDataset(
    dataset['x_test_recent'][0:40*16],
    dataset['x_test_daily'][0:40*16],
    dataset['x_test_weekly'][0:40*16],
    dataset['y_test'][0:40*16]
)

In [29]:
torch.manual_seed(42)

<torch._C.Generator at 0x108d95ef0>

In [35]:
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [36]:
L_scaled = compute_scaled_laplacian(adj_matrix)
cheb_polynomials = compute_chebyshev_polynomials(L_scaled, cheb_polynomial)

In [43]:
ModelParams = params (
    n_timesteps = n_history,
    cheb_polynomials = cheb_polynomials,
    n_features = n_features,
    n_verticies = n_verticies,
    n_layers = 1,
    lr = 4e-2
)

In [44]:
model = ASTGCN(ModelParams)

In [45]:
for epoch in range(epochs):

    train_loss = 0.0
    test_loss = 0.0
    
    for x_recent, x_daily, x_weekly, output in train_dataloader:

        model.optimizer.zero_grad()
        pred_out = model(x_recent, x_daily, x_weekly)
        loss = model.compute_loss(pred_out, output)
        loss.backward()
        model.optimizer.step()
        train_loss += loss.item()
    
    for x_recent, x_daily, x_weekly, output in test_dataloader:
        
        with torch.no_grad():

            pred_out = model(x_recent, x_daily, x_weekly)
            loss = model.compute_loss(pred_out, output)
            test_loss += loss.item()
            
    print("="*50)
    print(f"Epoch: {epoch+1}")
    print(f"Train loss: {train_loss}")
    print(f"Test loss: {test_loss}")
    print("="*50)

Epoch: 1
Train loss: 49.40409529209137
Test loss: 37.22625911235809
Epoch: 2
Train loss: 48.67265683412552
Test loss: 36.482948780059814
Epoch: 3
Train loss: 47.15277272462845
Test loss: 35.73092520236969
Epoch: 4
Train loss: 45.90956288576126
Test loss: 37.406772911548615
Epoch: 5
Train loss: 45.60867887735367
Test loss: 37.71971535682678
Epoch: 6
Train loss: 44.39013624191284
Test loss: 37.76668471097946
Epoch: 7
Train loss: 44.00029045343399
Test loss: 37.91232639551163
Epoch: 8
Train loss: 44.11412167549133
Test loss: 36.265748262405396
Epoch: 9
Train loss: 43.92029392719269
Test loss: 37.29431837797165
Epoch: 10
Train loss: 43.38738417625427
Test loss: 37.79525923728943
Epoch: 11
Train loss: 43.28458559513092
Test loss: 37.12257742881775
Epoch: 12
Train loss: 43.210021674633026
Test loss: 37.235741436481476
Epoch: 13
Train loss: 43.18858712911606
Test loss: 37.14897072315216
Epoch: 14
Train loss: 43.1192507147789
Test loss: 37.10729646682739
Epoch: 15
Train loss: 43.25756078958511

KeyboardInterrupt: 

In [46]:
model.eval()

ASTGCN(
  (blocks): ModuleDict(
    (recent): ModuleList(
      (0): STBlock(
        (spatial_attn): SpatialAttention(
          (sigmoid): Sigmoid()
          (softmax): Softmax(dim=None)
        )
        (temporal_attn): TemporalAttention(
          (sigmoid): Sigmoid()
          (softmax): Softmax(dim=None)
        )
        (spatial_conv): SpatialConv(
          (relu): ReLU()
        )
        (temporal_conv): TemporalConv(
          (conv): Conv2d(3, 3, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
          (relu): ReLU()
        )
        (ln): LayerNorm((3,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.2, inplace=False)
        (gelu): GELU(approximate='none')
      )
      (1): Linear(in_features=12, out_features=12, bias=False)
    )
    (daily): ModuleList(
      (0): STBlock(
        (spatial_attn): SpatialAttention(
          (sigmoid): Sigmoid()
          (softmax): Softmax(dim=None)
        )
        (temporal_attn): TemporalAttention(
    