# Example of using our Proposed Spatio-Temporal Graph Neural Networks

## Import libraries

In [1]:
from stgraph_trainer.datasets import load_province_temporal_data
from stgraph_trainer.datasets import load_province_coordinates
from stgraph_trainer.datasets import preprocess_data_for_stgnn
from stgraph_trainer.utils import PairDataset
from stgraph_trainer.utils import compute_metrics
from stgraph_trainer.utils import get_distance_in_km_between_earth_coordinates
from stgraph_trainer.utils import get_adjacency_matrix
from stgraph_trainer.utils import get_normalized_adj
from torch.utils.data import DataLoader
from stgraph_trainer.models import ProposedSTGNN
from stgraph_trainer.trainers import ProposedSTGNNTrainer
import torch
import numpy as np
import pandas as pd

Using TensorFlow backend.


## Load and process dataset
### Setup parameters

In [2]:
SPLIT_DATE = '2020-10-20'
TIME_STEPS = 7
PROVINCES = [
  "Seoul",
  "Busan",
  "Daegu",
  "Incheon",
  "Gwangju",
  "Daejeon",
  "Ulsan",
  "Sejong",
  "Gyeonggi",
  "Gangwon",
  "Chungbuk",
  "Chungnam",
  "Jeonbuk",
  "Jeonnam",
  "Gyeongbuk",
  "Gyeongnam",
  "Jeju"]
STATUS = 'New'
BATCH_SIZE = 16
EPOCHS = 10
device = torch.device('cuda', 0) if torch.cuda.is_available() else torch.device('cpu')

### Temporal data

In [3]:
df = load_province_temporal_data(provinces=PROVINCES, status=STATUS)

X_train, y_train, X_test, y_test, _, _, scaler = preprocess_data_for_stgnn(df,
                                                                           SPLIT_DATE,
                                                                           TIME_STEPS)

X_train = torch.tensor(X_train).unsqueeze(-1)
y_train = torch.tensor(y_train).unsqueeze(-1)
X_test = torch.tensor(X_test).unsqueeze(-1)
y_test = torch.tensor(y_test).unsqueeze(-1)
n_test_samples = len(y_test)

In [4]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

(torch.Size([194, 17, 7, 1]),
 torch.Size([194, 17, 1]),
 torch.Size([85, 17, 7, 1]),
 torch.Size([85, 17, 1]))

In [5]:
train_dl = DataLoader(PairDataset(X_train, y_train),
                      batch_size=BATCH_SIZE,
                      shuffle=True)

### Spatial data

In [6]:
province_coords = load_province_coordinates().values[:, 1:]

dist_km = []
for idx, c1 in enumerate(province_coords):
  dist_km.append([get_distance_in_km_between_earth_coordinates(c1, c2) for c2 in province_coords])
dist_mx = np.array(dist_km)

adj_mx = get_adjacency_matrix(dist_mx)
# Fix formatting
adj_mx = adj_mx.astype(np.float32)

adj_mx = get_normalized_adj(adj_mx)
adj = torch.tensor(adj_mx)
adj.shape

torch.Size([17, 17])

## Train the model

In [7]:
model = ProposedSTGNN(n_nodes=adj.shape[0],
                      time_steps=TIME_STEPS,
                      predicted_time_steps=1,
                      in_channels=X_train.shape[3],
                      spatial_channels=32,
                      spatial_hidden_channels=16,
                      spatial_out_channels=16,
                      out_channels=16,
                      temporal_kernel=3,
                      drop_rate=0.2).to(device=device)

In [8]:
loss_func = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [9]:
trainer = ProposedSTGNNTrainer(model,
                               train_dl,
                               X_test,
                               adj,
                               scaler,
                               loss_func,
                               optimizer,
                               device,
                               callbacks=None,
                               raw_test=df.iloc[-(n_test_samples + 1):].values)

In [10]:
history = trainer.train(EPOCHS)
history

Epoch: 1; Elapsed time: 0.04953336715698242; Train loss: 0.969324; Test MSE: 405.618561; Test loss RMSE: 20.139974
Epoch: 2; Elapsed time: 0.05053210258483887; Train loss: 0.965432; Test MSE: 396.617920; Test loss RMSE: 19.915269
Epoch: 3; Elapsed time: 0.046793222427368164; Train loss: 0.912030; Test MSE: 387.234161; Test loss RMSE: 19.678266
Epoch: 4; Elapsed time: 0.04871797561645508; Train loss: 0.897380; Test MSE: 378.395599; Test loss RMSE: 19.452393
Epoch: 5; Elapsed time: 0.046854257583618164; Train loss: 0.911294; Test MSE: 367.558472; Test loss RMSE: 19.171815
Epoch: 6; Elapsed time: 0.04424715042114258; Train loss: 0.920928; Test MSE: 360.120514; Test loss RMSE: 18.976842
Epoch: 7; Elapsed time: 0.04700469970703125; Train loss: 0.782902; Test MSE: 347.656372; Test loss RMSE: 18.645546
Epoch: 8; Elapsed time: 0.03743457794189453; Train loss: 0.837908; Test MSE: 343.262146; Test loss RMSE: 18.527335
Epoch: 9; Elapsed time: 0.03305935859680176; Train loss: 0.770195; Test MSE: 3

{'epoch': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 'train_loss': [0.9693244076692141,
  0.9654320902549304,
  0.9120301294785279,
  0.8973803657751817,
  0.9112942195855654,
  0.9209284346837264,
  0.7829024496559913,
  0.8379080983308645,
  0.7701948285102844,
  0.7630832974727337],
 'test_loss': [405.6185607910156,
  396.617919921875,
  387.2341613769531,
  378.3955993652344,
  367.5584716796875,
  360.1205139160156,
  347.6563720703125,
  343.26214599609375,
  339.10693359375,
  333.94097900390625],
 'elapsed_time': [0.04953336715698242,
  0.05053210258483887,
  0.046793222427368164,
  0.04871797561645508,
  0.046854257583618164,
  0.04424715042114258,
  0.04700469970703125,
  0.03743457794189453,
  0.03305935859680176,
  0.0323178768157959]}

## Prediction

In [11]:
predictions = trainer.predict()

In [12]:
predictions.shape

(85, 17)

In [13]:
pd.DataFrame(predictions,
             columns=PROVINCES,
             index=df.iloc[-n_test_samples:].index).head()

Unnamed: 0,Seoul,Busan,Daegu,Incheon,Gwangju,Daejeon,Ulsan,Sejong,Gyeonggi,Gangwon,Chungbuk,Chungnam,Jeonbuk,Jeonnam,Gyeongbuk,Gyeongnam,Jeju
2020-10-20,16.434998,11.846681,0.377825,4.163242,1.670218,1.803768,0.168991,0.068562,20.012554,1.249839,0.67064,1.644362,0.73995,0.236018,1.074349,0.749913,0.121054
2020-10-21,14.778962,4.063126,0.471197,3.794794,0.788469,0.833758,0.23753,0.063441,27.574669,2.103812,1.267345,1.764697,0.235352,0.246873,1.405837,1.110403,0.122224
2020-10-22,16.539724,9.555019,0.224903,5.15815,1.186094,2.749295,0.524874,0.074417,25.867645,2.22938,0.963057,0.399974,0.372635,0.566936,1.66766,0.328303,0.119438
2020-10-23,15.653477,4.867497,1.289272,3.717338,0.871321,0.594761,0.173816,0.040523,46.749451,1.958138,0.394497,6.471957,1.178643,0.281737,1.36599,0.443597,0.12234
2020-10-24,17.662086,2.868346,3.139606,4.416123,3.090621,0.420742,0.239853,0.053558,77.659363,1.519273,0.440844,3.489955,1.287953,0.712951,0.65634,0.388869,0.119428


In [14]:
# Compute RMSE of test dataset
m, m_avg = compute_metrics(df.iloc[-n_test_samples:], predictions, metric='rmse')
m_avg

18.27405273769909