# Traffic prediction modeling with GNN

### Load libraries

In [19]:
import json
import logging
import numpy as np
import os
from tqdm import tqdm
import time
import matplotlib.pyplot as plt

#Custom scripts
from modeling_utils import *
import data_preparation as data_preparation

#Pytorch and PyG
import torch
import torch.optim as optim
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
import torch
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR 
from torch_geometric.nn import GATConv, GCNConv
from torch.utils.tensorboard import SummaryWriter

print(torch.__version__)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using {device}")

#Lets start at src location
if os.path.exists("./src"):
    os.chdir("./src")

1.13.1+cpu
Using cpu


### Constants and setting-up

In [20]:
# Constant config to use throughout
config = {
    'TRAIN_TEST_PROPORTION'             : (0.6, 0.1, 0.3),
    'BATCH_SIZE'                        : 64,
    'EPOCHS'                            : 100,
    'WEIGHT_DECAY'                      : 5e-5,
    'INITIAL_LR'                        : 1e-1,
    'DROPOUT'                           : 0.2,
    'ATTENTION_HEADS'                   : 8,
    'CHECKPOINT_DIR'                    : '../runs',
    'counter_files_path'                : '../data/counters_temporal_data_2023-03-03T09-24-06/',
    'counters_nontemporal_aggregated'   : '../data/counters_non_temporal_aggregated_data.csv',
    'holidays_path'                     : '../data/holidays.csv',
    'USE_HOLIDAY_FEATURES'              : True,
    'N_GRAPHS'                          : 30*24,
    'F_IN'                              : 7*24,
    'F_OUT'                             : 7*24,
    'N_NODE'                            : 165,
    'target_col'                        : 'Sum',
    'use_tensorboard'                   : False,
    'USE_GAT'                           : True, # if True use GAT, else use GCN
    'USE_LSTM'                          : False, # if True use LSTM, else use GRU
    'LSTM_LAYER_SIZES'                  : [128, 32],  
    'GRU_LAYER_SIZES'                   : [128, 32],    
}

# Set logging level
logging.getLogger().setLevel(logging.INFO)

# Make a tensorboard writer
if config["use_tensorboard"]:
    writer = SummaryWriter()

### Model

In [21]:
class ST_GNN(torch.nn.Module):
    """
    Spatio-Temporal Graph Neural Network which has options of using:
    1) Normal neighbor aggregation OR attention mechanism
    2) GRU temporal layer or LSTM
    """ 
    def __init__(self, in_channels, out_channels, n_nodes, heads=None, dropout=0.0):
        """
        Initialize the ST-GNN model
        :param in_channels Number of input channels
        :param out_channels Number of output channels
        :param n_nodes Number of nodes in the graph
        :param heads Number of attention heads to use in graph
        :param dropout Dropout probability on output of Graph Attention Network
        """
        # Set up params
        super(ST_GNN, self).__init__()
        self.n_pred = out_channels
        self.dropout = dropout
        self.n_nodes = n_nodes
        #self.n_preds = 9 TODO is this needed?
        
        # Init spatial part
        if config['USE_GAT']:
            self.heads = heads
            self.gat = GATConv(in_channels=in_channels, out_channels=in_channels,
                    heads=heads, dropout=0, concat=False)
        else:
            self.gcn = GCNConv(in_channels=in_channels, out_channels=in_channels, dropout=0, concat=False)

        # Init temporal part
        if config['USE_LSTM']:
            self.lstms = []
            for layer_index, layer_size in enumerate(config["LSTM_LAYER_SIZES"]):
                if layer_index == 0: input_size = self.n_nodes
                else: input_size = config["LSTM_LAYER_SIZES"][layer_index - 1]

                lstm = torch.nn.LSTM(input_size=input_size, hidden_size=layer_size, num_layers=1)
                for name, param in lstm.named_parameters():
                    if 'bias' in name:
                        torch.nn.init.constant_(param, 0.0)
                    elif 'weight' in name:
                        torch.nn.init.xavier_uniform_(param)
                self.lstms.append(lstm)

            # fully-connected neural network
            self.linear = torch.nn.Linear(config["LSTM_LAYER_SIZES"][-1], self.n_nodes*self.n_pred)
        else:
            self.grus = []
            for layer_index, layer_size in enumerate(config["GRU_LAYER_SIZES"]):
                if layer_index == 0: input_size = self.n_nodes
                else: input_size = config["GRU_LAYER_SIZES"][layer_index - 1]

                lstm = torch.nn.GRU(input_size=input_size, hidden_size=layer_size, num_layers=1)
                self.grus.append(lstm)

            # fully-connected neural network
            self.linear = torch.nn.Linear(config["GRU_LAYER_SIZES"][-1], self.n_nodes*self.n_pred)
        torch.nn.init.xavier_uniform_(self.linear.weight)

    def forward(self, data, device):
        """
        Forward pass of the ST-GNN model
        :param data Data to make a pass on
        :param device Device to operate on
        """
        x, edge_index = data.x, data.edge_index
        if device == 'cpu':
            x = torch.FloatTensor(x)
        else:
            x = torch.cuda.FloatTensor(x)

        if config['USE_GAT']:
            x = self.gat(x, edge_index)
        else:
            x = self.gcn(x, edge_index)
        x = F.dropout(x, self.dropout, training=self.training)


        batch_size = data.num_graphs
        n_node = int(data.num_nodes/batch_size)
        x = torch.reshape(x, (batch_size, n_node, data.num_features))
        x = torch.movedim(x, 2, 0)
        if config["USE_LSTM"]:
            for lstm in self.lstms:
                x, _ = lstm(x)
        else:
            for gru in self.grus:
                x, _ = gru(x)


        x = torch.squeeze(x[-1, :, :])
        x = self.linear(x)

        s = x.shape
        x = torch.reshape(x, (s[0], self.n_nodes, self.n_pred))
        x = torch.reshape(x, (s[0]*self.n_nodes, self.n_pred))
        return x

In [22]:
@torch.no_grad()
def eval(model, device, dataloader, type=''):
    model.eval()
    model.to(device)

    mae = 0
    rmse = 0
    mape = 0
    n = 0

    # Evaluate model on all data
    for i, batch in enumerate(dataloader):
        batch = batch.to(device)
        if batch.x.shape[0] == 1:
            pass
        else:
            with torch.no_grad():
                pred = model(batch, device)
            truth = batch.y.view(pred.shape)
            if i == 0:
                y_pred = torch.zeros(len(dataloader), pred.shape[0], pred.shape[1])
                y_truth = torch.zeros(len(dataloader), pred.shape[0], pred.shape[1])
            #truth = un_z_score(truth, dataloader.dataset.mean, dataloader.dataset.std_dev)
            #pred = un_z_score(pred, dataloader.dataset.mean, dataloader.dataset.std_dev)
            y_pred[i, :pred.shape[0], :] = pred
            y_truth[i, :pred.shape[0], :] = truth
            rmse += RMSE(truth, pred)
            mae += MAE(truth, pred)
            mape += MAPE(truth, pred)
            n += 1
    rmse, mae, mape = rmse / n, mae / n, mape / n

    logging.info(f'{type}, MAE: {round(int(mae),2)}, RMSE: {round(int(rmse),2)}, MAPE: {round(int(mape),2)}')

    #get the average score for each metric in each batch
    return rmse, mae, mape, y_pred, y_truth


def epoch_train(model, device, dataloader, optimizer, loss_fn, epoch):    
    scheduler = StepLR(optimizer, step_size=30, gamma=0.1)

    model.train()
    for _, batch in enumerate(tqdm(dataloader, desc=f"Epoch {epoch}")):
        batch = batch.to(device)
        optimizer.zero_grad()
        y_pred = torch.squeeze(model(batch, device))
        loss = loss_fn()(y_pred.float(), torch.squeeze(batch.y).float())
        if config["use_tensorboard"]:
            writer.add_scalar("Loss/train", loss, epoch)
        loss.backward()
        optimizer.step()

        # multiplicative decay
        scheduler.step()

    return loss

## Train the model

In [23]:
def model_train(train_dataloader, val_dataloader, config, device, save_test_results = False, test_dataloader = None):
    """
    Train the ST-GAT model. Evaluate on validation dataset as you go.
    :param train_dataloader Data loader of training dataset
    :param val_dataloader Dataloader of val dataset
    :param config configuration to use
    :param device Device to evaluate on
    """

    # Make the model. Each datapoint in the graph is 228x12: N x F (N = # nodes, F = time window)
    in_channels=config['F_IN']
    if config["USE_HOLIDAY_FEATURES"]: in_channels += 1
    
    model = ST_GNN(in_channels=in_channels, out_channels=config['F_OUT'], n_nodes=config['N_NODE'], heads=config['ATTENTION_HEADS'], dropout=config['DROPOUT'])
    logging.info("Model initialized")
    optimizer = optim.Adam(model.parameters(), lr=config['INITIAL_LR'], weight_decay=config['WEIGHT_DECAY'])
    loss_fn = torch.nn.MSELoss
    model.to(device)

    # For every epoch, train the model on training dataset. Evaluate model on validation dataset
    for epoch in range(config['EPOCHS']):
        loss = epoch_train(model, device, train_dataloader, optimizer, loss_fn, epoch)
        logging.info(f"Loss: {loss:.3f}")
        if epoch % 5 == 0:
            train_mae, train_rmse, train_mape, _, _ = eval(model, device, train_dataloader, 'Train')
            val_mae, val_rmse, val_mape, _, _ = eval(model, device, val_dataloader, 'Valid')
            if config["use_tensorboard"]:
                writer.add_scalar(f"MAE/train", train_mae, epoch)
                writer.add_scalar(f"RMSE/train", train_rmse, epoch)
                writer.add_scalar(f"MAPE/train", train_mape, epoch)
                writer.add_scalar(f"MAE/val", val_mae, epoch)
                writer.add_scalar(f"RMSE/val", val_rmse, epoch)
                writer.add_scalar(f"MAPE/val", val_mape, epoch)
    logging.info("All epochs done, finished training")

    if config["use_tensorboard"]:
        writer.flush()
    # Save the model
    timestr = time.strftime("%m-%d-%H%M%S")
    os.mkdir(os.path.join(config["CHECKPOINT_DIR"], f"run_{timestr}"))
    torch.save({
            "epoch": epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "loss": loss,
            }, os.path.join(config["CHECKPOINT_DIR"], f"run_{timestr}/model.pt"))
    
    with open(os.path.join(config["CHECKPOINT_DIR"], f"run_{timestr}/config.json"), "w") as fp:
        json.dump(config, fp)

    if save_test_results:
        test_mae, test_rmse, test_mape, y_pred, y_truth = eval(model, device, test_dataloader, 'Test')
        logging.info(f"Test RMSE:{test_rmse}")
        results = {'MAE': test_mae.item(),
                    'RMSE': test_rmse.item(),
                    'MAPE': test_mape.item()}
        with open(os.path.join(config["CHECKPOINT_DIR"], f"run_{timestr}/results.json"), "w") as fp:
            json.dump(results, fp)
    return model

### Start training

In [24]:
import importlib
importlib.reload(data_preparation)

# Make runs directory if it does not exist
if not os.path.exists(config["CHECKPOINT_DIR"]):
    os.mkdir(config["CHECKPOINT_DIR"])

dataset = data_preparation.prepare_pyg_dataset(config)
train_g, val_g, test_g = data_preparation.split_dataset(dataset, config)

# Split the Data instances in 
train_dataloader = DataLoader(train_g, batch_size=config['BATCH_SIZE'], shuffle=False)
val_dataloader = DataLoader(val_g, batch_size=config['BATCH_SIZE'], shuffle=False)
test_dataloader = DataLoader(test_g, batch_size=config['BATCH_SIZE'], shuffle=False)

# Get gpu if you can
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using {device}")

# Configure and train model
model = model_train(train_dataloader, val_dataloader, config, device, True, test_dataloader)

INFO:root:Preparing data...
INFO:root:Holiday features successfully prepared
INFO:root:Historical counter data successfully read
INFO:root:Edge index constructed
INFO:root:Final dataset constructed
INFO:root:Dataset splitted to train,val,test
INFO:root:Model initialized


Size of train data: 432
Size of validation data: 72
Size of test data: 216
Using cpu


Epoch 0: 100%|██████████| 7/7 [00:13<00:00,  1.91s/it]
INFO:root:Loss: 637193.250
INFO:root:Train, MAE: 581, RMSE: 824, MAPE: 5678434780971008
INFO:root:Valid, MAE: 584, RMSE: 814, MAPE: 8540944047538176
Epoch 1: 100%|██████████| 7/7 [00:13<00:00,  1.86s/it]
INFO:root:Loss: 627443.875
Epoch 2: 100%|██████████| 7/7 [00:13<00:00,  1.88s/it]
INFO:root:Loss: 618400.938
Epoch 3: 100%|██████████| 7/7 [00:12<00:00,  1.78s/it]
INFO:root:Loss: 610485.000
Epoch 4: 100%|██████████| 7/7 [00:12<00:00,  1.74s/it]
INFO:root:Loss: 599341.875
Epoch 5: 100%|██████████| 7/7 [00:12<00:00,  1.74s/it]
INFO:root:Loss: 590876.250
INFO:root:Train, MAE: 529, RMSE: 783, MAPE: 31611975058325504
INFO:root:Valid, MAE: 529, RMSE: 771, MAPE: 48298805619064832
Epoch 6: 100%|██████████| 7/7 [00:12<00:00,  1.73s/it]
INFO:root:Loss: 583875.125
Epoch 7: 100%|██████████| 7/7 [00:12<00:00,  1.73s/it]
INFO:root:Loss: 576506.750
Epoch 8: 100%|██████████| 7/7 [00:12<00:00,  1.74s/it]
INFO:root:Loss: 565794.250
Epoch 9: 100%|██

KeyboardInterrupt: 