# Traffic prediction modeling with GNN

In [1]:
# Only colab stuff
try:
  from google.colab import drive
  drive.mount('/content/gdrive')
  import torch
  import os
  print("PyTorch has version {}".format(torch.__version__))
  if 'IS_GRADESCOPE_ENV' not in os.environ:
    !pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
    !pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
    !pip install torch-geometric
    !pip install ogb

    os.chdir("/content/gdrive/MyDrive/MLG_cloned_repo/src/scripts")
except:
  print("Not in colab!")
  os.chdir("./src/scripts")

Mounted at /content/gdrive
PyTorch has version 1.13.1+cu116
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.13.0%2Bcu116/torch_scatter-2.1.1%2Bpt113cu116-cp39-cp39-linux_x86_64.whl (9.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m37.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.1+pt113cu116
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.13.0%2Bcu116/torch_sparse-0.6.17%2Bpt113cu116-cp39-cp39-linux_x86_64.whl (4.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [None]:
#You should be in src/scripts!!!
!pwd

/content/gdrive/MyDrive/MLG_cloned_repo/src/scripts


### Load libraries

In [None]:
import json
import logging
import numpy as np
np.random.seed(0)
import os
from tqdm import tqdm
import time
import matplotlib.pyplot as plt
import datetime

import random
random.seed(0)

#Custom scripts
import modeling_utils as modeling_utils 
import data_preparation as data_preparation

#Pytorch and PyG
import torch
torch.manual_seed(0)
import torch.optim as optim
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR 
from torch_geometric.nn import GATConv, GCNConv, GATv2Conv
from torch.utils.tensorboard import SummaryWriter

print(torch.__version__)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using {device}")

#Lets start at src location
if os.path.exists("./src"):
  os.chdir("./src")
elif 'scripts' in os.getcwd():
  os.chdir("../")

1.13.1+cu116
Using cuda


In [None]:
#%cd ..

### Constants and setting-up

In [None]:
# Constant config to use throughout
config = {
    'TRAIN_TEST_PROPORTION'             : (0.7, 0.1, 0.2),
    'BATCH_SIZE'                        : 128,
    'EPOCHS'                            : 200,
    'WEIGHT_DECAY'                      : 0,
    'INITIAL_LR'                        : 0.1,
    'LR_DECAY'                          : 0.5,
    'LR_DECAY_STEP'                     : 50,
    'DROPOUT'                           : 0.0,
    'ATTENTION_HEADS'                   : 8,
    'RESULTS_DIR'                       : './runs/'+time.strftime("%m-%dT%H-%M-%S")+'/',
    'data_with_already_filled_gaps'     : True,
    'counter_files_path'                : '../data/counters_interpolated_gaps/',                         # '../data/counters_temporal_data_2023-03-03T09-24-06/'
    'counters_nontemporal_aggregated'   : '../data/counters_non_temporal_aggregated_data.csv',
    'holidays_path'                     : '../data/holidays.csv',
    'USE_YEAR_PERIODIC_DATA'            : False,
    'USE_HOLIDAY_FEATURES'              : False,
    'USE_WEEKDAY_FEATURES'              : True,
    'USE_MONTH_FEATURES'                : False,
    'N_GRAPHS'                          : 380*24,
    'F_IN'                              : 7*24,
    'F_OUT'                             : 7*24,
    'N_NODE'                            : 165,
    'target_col'                        : 'Fast',
    'use_tensorboard'                   : True,
    'USE_GAT'                           : True, # if True use GAT, else use GCN
    'USE_LSTM'                          : True, # if True use LSTM, else use GRU
    'LSTM_LAYER_SIZES'                  : [500, 500],  
    'GRU_LAYER_SIZES'                   : [500, 500],  
    'LINEAR_HIDDEN_SIZE'                : 600,     
    'USE_EARLY_STOPPING'                : True,
    "MIN_ITERATIONS_EARLY_STOPPING"     : 50,
    "EARLY_STOPPING_TOLERANCE"          : 15,
    "LOG_BASELINE"                      : True, # if true outputs average rmse on computed on each batch,
    "DATA_DATE_SPLIT"                   : '05/07/22 00:00:00',
    "SCALE_DATA"                        : False,
    "USE_ONEHOT_FEATURES"               : False
}

# Set logging level
logging.getLogger().setLevel(logging.INFO)

# Make a tensorboard writer
if config["use_tensorboard"]:
    writer = SummaryWriter()

### Model

In [None]:
class ST_GNN(torch.nn.Module):
    """
    Spatio-Temporal Graph Neural Network which has options of using:
    1) Normal neighbor aggregation OR attention mechanism
    2) GRU temporal layer or LSTM
    """ 
    def __init__(self, device, in_channels, out_channels, n_nodes, heads=None, dropout=0.0):
        """
        Initialize the ST-GNN model
        :param in_channels Number of input channels
        :param out_channels Number of output channels
        :param n_nodes Number of nodes in the graph
        :param heads Number of attention heads to use in graph
        :param dropout Dropout probability on output of Graph Attention Network
        """
        # Set up params
        super(ST_GNN, self).__init__()
        self.device = device
        self.n_pred = out_channels
        self.dropout = dropout
        self.n_nodes = n_nodes
        #self.n_preds = 9 TODO is this needed?
        
        # Init spatial part
        if config['USE_GAT']:
            self.heads = heads
            self.gat = GATv2Conv(in_channels=in_channels, out_channels=in_channels,
                    heads=heads, dropout=0, concat=False)
        else:
            self.gcn = GCNConv(in_channels=in_channels, out_channels=in_channels, dropout=0, concat=False)

        # Init temporal part
        if config['USE_LSTM']:
            self.lstms = []
            for layer_index, layer_size in enumerate(config["LSTM_LAYER_SIZES"]):
                if layer_index == 0: input_size = self.n_nodes
                else: input_size = config["LSTM_LAYER_SIZES"][layer_index - 1]

                lstm = torch.nn.LSTM(input_size=input_size, hidden_size=layer_size, num_layers=1, device = self.device)
                for name, param in lstm.named_parameters():
                    if 'bias' in name:
                        torch.nn.init.constant_(param, 0.0)
                    elif 'weight' in name:
                        torch.nn.init.xavier_uniform_(param)
                self.lstms.append(lstm)

            # fully-connected neural network
            self.linear1 = torch.nn.Linear(config["LSTM_LAYER_SIZES"][-1], config["LINEAR_HIDDEN_SIZE"])
            self.linear2 = torch.nn.Linear(config["LINEAR_HIDDEN_SIZE"], self.n_nodes*self.n_pred)
        else:
            self.grus = []
            for layer_index, layer_size in enumerate(config["GRU_LAYER_SIZES"]):
                if layer_index == 0: input_size = self.n_nodes
                else: input_size = config["GRU_LAYER_SIZES"][layer_index - 1]

                gru = torch.nn.GRU(input_size=input_size, hidden_size=layer_size, num_layers=1, device = self.device)
                self.grus.append(gru)

            # fully-connected neural network
            self.linear1 = torch.nn.Linear(config["GRU_LAYER_SIZES"][-1], config["LINEAR_HIDDEN_SIZE"])
            self.linear2 = torch.nn.Linear(config["LINEAR_HIDDEN_SIZE"], self.n_nodes*self.n_pred)

        torch.nn.init.xavier_uniform_(self.linear1.weight)
        torch.nn.init.xavier_uniform_(self.linear2.weight)
        self.relu = torch.nn.ReLU()

    def forward(self, data, device):
        """
        Forward pass of the ST-GNN model
        :param data Data to make a pass on
        :param device Device to operate on
        """
        x, edge_index = data.x, data.edge_index
        if self.device == 'cpu':
            x = torch.FloatTensor(x)
        else:
            x = torch.cuda.FloatTensor(x)

        if config['USE_GAT']:
            x = self.gat(x, edge_index)
        else:
            x = self.gcn(x, edge_index)
        x = F.dropout(x, self.dropout, training=self.training)


        batch_size = data.num_graphs
        n_node = int(data.num_nodes/batch_size)
        x = torch.reshape(x, (batch_size, n_node, data.num_features))
        x = torch.movedim(x, 2, 0)
        if config["USE_LSTM"]:
            for lstm in self.lstms:
                x, _ = lstm(x)
        else:
            for gru in self.grus:
                x, _ = gru(x)


        x = torch.squeeze(x[-1, :, :])
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        x = self.relu(x)

        s = x.shape
        x = torch.reshape(x, (s[0], self.n_nodes, self.n_pred))
        x = torch.reshape(x, (s[0]*self.n_nodes, self.n_pred))
        return x

In [None]:
@torch.no_grad()
def eval(model, device, dataloader, type='', dim_vars=None, save_predictions=False):
    model.eval()
    model.to(device)

    mae = 0
    rmse = 0
    baseline_rmse = 0
    mape = 0
    n = 0

    # Evaluate model on all data
    for i, batch in enumerate(dataloader):
        batch = batch.to(device)
        if batch.x.shape[0] == 1:
            pass
        else:
            with torch.no_grad():
                pred = model(batch, device)
            truth = batch.y.view(pred.shape)
            if i == 0:
                y_pred = torch.zeros(len(dataloader), pred.shape[0], pred.shape[1])
                y_truth = torch.zeros(len(dataloader), pred.shape[0], pred.shape[1])
            #truth = un_z_score(truth, dataloader.dataset.mean, dataloader.dataset.std_dev)
            #pred = un_z_score(pred, dataloader.dataset.mean, dataloader.dataset.std_dev)
            
            # reshape predictions
            y_pred[i, :pred.shape[0], :] = pred
            y_truth[i, :pred.shape[0], :] = truth

            # save y_prediction & true values for later analysis
            if save_predictions:
                modeling_utils.save_all_predictions(y_pred, y_truth, dim_vars, config['RESULTS_DIR'])

            # calculate batch average (take info only from x and take mean)
            pred_avg = torch.mean(batch.x[:,:config['F_IN']], axis=1, keepdim=True).repeat(1,config['F_OUT'])

            # calculate loss
            rmse += modeling_utils.RMSE(truth, pred)
            baseline_rmse += modeling_utils.RMSE(truth, pred_avg)
            mae += modeling_utils.MAE(truth, pred)
            mape += modeling_utils.MAPE(truth, pred)

            n += 1
    rmse, mae, mape, baseline_rmse = rmse / n, mae / n, mape / n, baseline_rmse / n

    logging.info(f'{type}, MAE: {round(int(mae),2)}, RMSE: {round(int(rmse),2)}, MAPE: {round(int(mape),2)}')

    #get the average score for each metric in each batch
    return rmse, mae, mape, baseline_rmse, y_pred, y_truth


def epoch_train(model, device, dataloader, optimizer, loss_fn, epoch):    
    scheduler = StepLR(optimizer, step_size = config['LR_DECAY_STEP'], gamma = config['LR_DECAY'])

    model.train()
    for _, batch in enumerate(tqdm(dataloader, desc=f"Epoch {epoch}")):
        batch = batch.to(device)
        optimizer.zero_grad()
        y_pred = torch.squeeze(model(batch, device))
        loss = loss_fn()(y_pred.float(), torch.squeeze(batch.y).float())
        if config["use_tensorboard"]:
            writer.add_scalar("Loss/train", loss, epoch)
        loss.backward()
        optimizer.step()
        scheduler.step()

    return loss

## Train the model

In [None]:
def model_train(train_dataloader, val_dataloader, config, device, save_test_results = False, test_dataloader = None, dim_vars = None):
    """
    Train the ST-GAT model. Evaluate on validation dataset as you go.
    :param train_dataloader Data loader of training dataset
    :param val_dataloader Dataloader of val dataset
    :param config configuration to use
    :param device Device to evaluate on
    """

    # Make the model. Each datapoint in the graph is 228x12: N x F (N = # nodes, F = time window)
    in_channels=config['F_IN']
    if config["USE_YEAR_PERIODIC_DATA"]:
        in_channels += 2 * 2 + 1
    if config["USE_HOLIDAY_FEATURES"]: 
        in_channels += 7 * data_preparation.number_of_countries_in_holiday_dataset(config)
    if config["USE_WEEKDAY_FEATURES"]:
        in_channels += 1
    if config["USE_MONTH_FEATURES"]:
        in_channels += 1
    if config["USE_ONEHOT_FEATURES"]:
        in_channels += config["N_NODE"]
    
    model = ST_GNN(
        device = device,
        in_channels=in_channels, 
        out_channels=config['F_OUT'], 
        n_nodes=config['N_NODE'], 
        heads=config['ATTENTION_HEADS'], 
        dropout=config['DROPOUT']
    )
    
    logging.info("Model initialized")
    optimizer = optim.Adam(model.parameters(), lr=config['INITIAL_LR']) #weight_decay=config['WEIGHT_DECAY'])
    # optimizer = optim.SGD(model.parameters(), lr=config['INITIAL_LR'], weight_decay=config['WEIGHT_DECAY'])
    loss_fn = torch.nn.MSELoss
    model.to(device)

    # Early stopping variables
    n_iteration_since_loss_improvment = 0
    best_train_loss = 999999999999999999

    # For every epoch, train the model on training dataset. Evaluate model on validation dataset
    for epoch in range(config['EPOCHS']):
        loss = epoch_train(model, device, train_dataloader, optimizer, loss_fn, epoch)
        logging.info(f"Loss: {loss:.3f}")
        if epoch % 5 == 0:
            train_rmse, train_mae, train_mape, _, _, _ = eval(model, device, train_dataloader, 'Train')
            val_rmse, val_mae, val_mape, _, _, _ = eval(model, device, val_dataloader, 'Valid')
            _, _, _, _, _, _ = eval(model, device, test_dataloader, 'Test')
            if config["use_tensorboard"]:
                writer.add_scalar(f"MAE/train", train_mae, epoch)
                writer.add_scalar(f"RMSE/train", train_rmse, epoch)
                writer.add_scalar(f"MAPE/train", train_mape, epoch)
                writer.add_scalar(f"MAE/val", val_mae, epoch)
                writer.add_scalar(f"RMSE/val", val_rmse, epoch)
                writer.add_scalar(f"MAPE/val", val_mape, epoch)
        
        if config["USE_EARLY_STOPPING"]:
          if loss < best_train_loss:
            best_train_loss = loss
            n_iteration_since_loss_improvment = 0
          else: n_iteration_since_loss_improvment += 1

          if epoch >= config["MIN_ITERATIONS_EARLY_STOPPING"] and \
                n_iteration_since_loss_improvment >= config["EARLY_STOPPING_TOLERANCE"]:
            break
    logging.info("All epochs done, finished training")

    if config["use_tensorboard"]:
        writer.flush()
    # Save the model
    os.mkdir(config["RESULTS_DIR"])
    torch.save({
            "epoch": epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "loss": loss},
            os.path.join(config["RESULTS_DIR"], "model.pt")
    )
    
    with open(os.path.join(config["RESULTS_DIR"], "config.json"), "w") as fp:
        json.dump(config, fp)

    if save_test_results:
        test_rmse, test_mae, test_mape, baseline_rmse, _, _ = eval(model, device, test_dataloader, 'Test', dim_vars, save_predictions=True)
        logging.info(f"Test RMSE:{test_rmse}")
        if config['LOG_BASELINE']:
          logging.info(f"Test BASELINE RMSE:{baseline_rmse}")
        results = {'MAE': test_mae.item(),
                    'RMSE': test_rmse.item(),
                    'MAPE': test_mape.item(),
                   'BASELINE_RMSE': baseline_rmse}
        with open(os.path.join(config["RESULTS_DIR"], "results.json"), "w") as fp:
            json.dump(results, fp)
    return model

### Start training

In [None]:
#import importlib
#importlib.reload(data_preparation)
#importlib.reload(modeling_utils)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# Make runs directory if it does not exist
if not os.path.exists(config['RESULTS_DIR'].rsplit('/', 2)[0]):
    os.mkdir(config['RESULTS_DIR'].rsplit('/', 2)[0])

dataset, dim_vars = data_preparation.prepare_pyg_dataset(config)
train_g, val_g, test_g, train_vars, val_vars, test_vars = data_preparation.split_dataset(dataset, config, dim_vars = dim_vars)


INFO:root:Preparing data...
INFO:root:Historical counter data successfully read
INFO:root:Edge index constructed
INFO:root:Final dataset constructed
INFO:root:Dataset splitted to train,val,test


Size of train data: 6384
Size of validation data: 912
Size of test data: 1824


In [None]:
# Split the Data instances in 
train_dataloader = DataLoader(train_g, batch_size=config['BATCH_SIZE'], shuffle=False, drop_last = True)
val_dataloader = DataLoader(val_g, batch_size=config['BATCH_SIZE'], shuffle=False, drop_last = True)
test_dataloader = DataLoader(test_g, batch_size=config['BATCH_SIZE'], shuffle=False, drop_last = True)

In [None]:
# Configure and train model
model = model_train(train_dataloader, val_dataloader, config, device, True, test_dataloader, val_vars)

INFO:root:Model initialized
Epoch 0: 100%|██████████| 49/49 [00:11<00:00,  4.18it/s]
INFO:root:Loss: 234993.406
INFO:root:Train, MAE: 406, RMSE: 567, MAPE: 160385572165124096
INFO:root:Valid, MAE: 315, RMSE: 447, MAPE: 41769544796078080
INFO:root:Test, MAE: 359, RMSE: 506, MAPE: 220252674627469312
Epoch 1: 100%|██████████| 49/49 [00:11<00:00,  4.15it/s]
INFO:root:Loss: 130093.719
Epoch 2: 100%|██████████| 49/49 [00:11<00:00,  4.25it/s]
INFO:root:Loss: 129379.125
Epoch 3: 100%|██████████| 49/49 [00:11<00:00,  4.33it/s]
INFO:root:Loss: 122129.359
Epoch 4: 100%|██████████| 49/49 [00:11<00:00,  4.34it/s]
INFO:root:Loss: 123710.391
Epoch 5: 100%|██████████| 49/49 [00:11<00:00,  4.32it/s]
INFO:root:Loss: 98174.359
INFO:root:Train, MAE: 280, RMSE: 413, MAPE: 162803862091071488
INFO:root:Valid, MAE: 206, RMSE: 317, MAPE: 170371749085446144
INFO:root:Test, MAE: 240, RMSE: 365, MAPE: 384525295274688512
Epoch 6: 100%|██████████| 49/49 [00:11<00:00,  4.26it/s]
INFO:root:Loss: 101023.227
Epoch 7: 1

In [None]:
print(config['RESULTS_DIR'])

In [None]:
%load_ext tensorboard
#%reload_ext tensorboard
#%tensorboard --logdir ./runs/03-17T17-41-07/ --load_fast=true

In [None]:
modeling_utils.plot_predictions_vs_gt(pickle_path=config['RESULTS_DIR']+'ygt_ypred.pkl')

In [None]:
test_rmse, test_mae, test_mape, baseline_rmse, _, _ = eval(model, device, test_dataloader, 'Test', dim_vars, save_predictions=True)
print(test_rmse, test_mae, test_mape, baseline_rmse)