## Load libraries

In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import glob
import preprocessing as preprocessing
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using {device}")
print(torch.__version__)
from torch_geometric.data import Data

from torch.utils.tensorboard import SummaryWriter

# Make a tensorboard writer
writer = SummaryWriter()

#Lets start at src location
if os.path.exists("./src"):
    os.chdir("./src")

config = {
    "counter_files_path"                : "../data/counters_temporal_data_2023-03-03T09-24-06/",
    "counters_nontemporal_aggregated"   : "../data/counters_non_temporal_aggregated_data.csv",
    "N_GRAPHS"                          : 30*24,
    "F_IN"                              : 7*24,
    "F_OUT"                             : 7*24,
    "target_col"                        : "Sum"
}

  from .autonotebook import tqdm as notebook_tqdm


Using cuda
1.13.1+cu117


## Data preparation

In [2]:
import importlib
importlib.reload(preprocessing)

counters_df = pd.DataFrame()
for fname in glob.glob(config["counter_files_path"] + "*.csv"):
    counter_data = pd.read_csv(fname)
    counter_data = preprocessing.fill_gaps(counter_data)
    counter_data['Date'] = pd.to_datetime(counter_data['Date']) 
    counter_data.index = counter_data['Date']
    counter_data = counter_data.sort_index(ascending=False)
    # We don't need to work with all past data.
    # Select enough data points to extract N_GRAPHS with F_IN and F_OUT timepoints
    
    counter_data = counter_data.iloc[0:(config["F_IN"]+config["F_OUT"]+config["N_GRAPHS"]-1), :]
    counter_id = fname.split('/')[-1].split('.csv')[0]

    if counters_df.empty:
        counters_df = pd.DataFrame(counter_data[config['target_col']])
        counters_df.columns = [counter_id]
    else:
        columns = list(counters_df.columns) + [counter_id]
        counters_df = pd.concat([counters_df, counter_data[config['target_col']]], axis=1)
        counters_df.columns = columns 


#Prepare edge_index matrix
counters_aggregated = pd.read_csv(config['counters_nontemporal_aggregated'])
edge_index, n_node, num_edges = preprocessing.construct_edge_index(counters_aggregated)

In [14]:
#Prepare matrices X [N_GRAPHS, N_NODES, F_IN] and Y [N_GRAPHS, N_NODES, F_OUT] 
graphs = []
for i in range(1, config["N_GRAPHS"]+1):
    
    g = Data()
    g.__num_nodes__ = n_node
    g.edge_index = edge_index

    train_test_chunk = counters_df.iloc[(-i-(config['F_IN']+config['F_OUT'])):(-i),:]
    g.x = torch.FloatTensor(train_test_chunk.iloc[:config['F_IN'],:].to_numpy().T)
    g.y = torch.FloatTensor(train_test_chunk.iloc[config['F_IN']:,:].to_numpy().T)
    graphs += [g]

## Train-Test Split

In [15]:
splits = (0.6, 0.1, 0.3)
split_train, split_val, _ = splits
index_train = int(np.floor(config["N_GRAPHS"]*split_train))
index_val = int(index_train + np.floor(config["N_GRAPHS"]*split_val))
train_g = graphs[:index_train]
val_g = graphs[index_train:index_val]
test_g = graphs[index_val:]

print("Size of train data:", len(train_g))
print("Size of validation data:", len(val_g))
print("Size of test data:", len(test_g))

Size of train data: 432
Size of validation data: 72
Size of test data: 216


## Model

In [16]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
class ST_GAT(torch.nn.Module):
    """
    Spatio-Temporal Graph Attention Network as presented in https://ieeexplore.ieee.org/document/8903252
    """
    def __init__(self, in_channels, out_channels, n_nodes, heads=8, dropout=0.0):
        """
        Initialize the ST-GAT model
        :param in_channels Number of input channels
        :param out_channels Number of output channels
        :param n_nodes Number of nodes in the graph
        :param heads Number of attention heads to use in graph
        :param dropout Dropout probability on output of Graph Attention Network
        """
        super(ST_GAT, self).__init__()
        self.n_pred = out_channels
        self.heads = heads
        self.dropout = dropout
        self.n_nodes = n_nodes

        self.n_preds = 9
        lstm1_hidden_size = 32
        lstm2_hidden_size = 128

        # single graph attentional layer with 8 attention heads
        self.gat = GATConv(in_channels=in_channels, out_channels=in_channels,
            heads=heads, dropout=0, concat=False)

        # add two LSTM layers
        self.lstm1 = torch.nn.LSTM(input_size=self.n_nodes, hidden_size=lstm1_hidden_size, num_layers=1)
        for name, param in self.lstm1.named_parameters():
            if 'bias' in name:
                torch.nn.init.constant_(param, 0.0)
            elif 'weight' in name:
                torch.nn.init.xavier_uniform_(param)
        self.lstm2 = torch.nn.LSTM(input_size=lstm1_hidden_size, hidden_size=lstm2_hidden_size, num_layers=1)
        for name, param in self.lstm1.named_parameters():
            if 'bias' in name:
                torch.nn.init.constant_(param, 0.0)
            elif 'weight' in name:
                torch.nn.init.xavier_uniform_(param)

        # fully-connected neural network
        self.linear = torch.nn.Linear(lstm2_hidden_size, self.n_nodes*self.n_pred)
        torch.nn.init.xavier_uniform_(self.linear.weight)

    def forward(self, data, device):
        """
        Forward pass of the ST-GAT model
        :param data Data to make a pass on
        :param device Device to operate on
        """
        x, edge_index = data.x, data.edge_index
        # apply dropout
        if device == 'cpu':
            x = torch.FloatTensor(x)
        else:
            x = torch.cuda.FloatTensor(x)

        x = self.gat(x, edge_index)
        x = F.dropout(x, self.dropout, training=self.training)

        # RNN: 2 LSTM
        batch_size = data.num_graphs
        n_node = int(data.num_nodes/batch_size)
        x = torch.reshape(x, (batch_size, n_node, data.num_features))
        x = torch.movedim(x, 2, 0)
        x, _ = self.lstm1(x)
        x, _ = self.lstm2(x)

        x = torch.squeeze(x[-1, :, :])
        x = self.linear(x)

        s = x.shape
        x = torch.reshape(x, (s[0], self.n_nodes, self.n_pred))
        x = torch.reshape(x, (s[0]*self.n_nodes, self.n_pred))
        return x


## Train the model

In [17]:
import torch
import torch.optim as optim
from tqdm import tqdm
import time
import os
import matplotlib.pyplot as plt

from torch.utils.tensorboard import SummaryWriter

# Make a tensorboard writer
writer = SummaryWriter()

def model_train(train_dataloader, val_dataloader, config, device):
    """
    Train the ST-GAT model. Evaluate on validation dataset as you go.
    :param train_dataloader Data loader of training dataset
    :param val_dataloader Dataloader of val dataset
    :param config configuration to use
    :param device Device to evaluate on
    """

    # Make the model. Each datapoint in the graph is 228x12: N x F (N = # nodes, F = time window)
    model = ST_GAT(in_channels=config['F_IN'], out_channels=config['F_OUT'], n_nodes=config['N_NODE'], dropout=config['DROPOUT'])
    optimizer = optim.Adam(model.parameters(), lr=config['INITIAL_LR'], weight_decay=config['WEIGHT_DECAY'])
    loss_fn = torch.nn.MSELoss

    model.to(device)

    # For every epoch, train the model on training dataset. Evaluate model on validation dataset
    for epoch in range(config['EPOCHS']):
        loss = train(model, device, train_dataloader, optimizer, loss_fn, epoch)
        print(f"Loss: {loss:.3f}")
        if epoch % 5 == 0:
            train_mae, train_rmse, train_mape, _, _ = eval(model, device, train_dataloader, 'Train')
            val_mae, val_rmse, val_mape, _, _ = eval(model, device, val_dataloader, 'Valid')
            writer.add_scalar(f"MAE/train", train_mae, epoch)
            writer.add_scalar(f"RMSE/train", train_rmse, epoch)
            writer.add_scalar(f"MAPE/train", train_mape, epoch)
            writer.add_scalar(f"MAE/val", val_mae, epoch)
            writer.add_scalar(f"RMSE/val", val_rmse, epoch)
            writer.add_scalar(f"MAPE/val", val_mape, epoch)

    writer.flush()
    # Save the model
    timestr = time.strftime("%m-%d-%H%M%S")
    torch.save({
            "epoch": epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "loss": loss,
            }, os.path.join(config["CHECKPOINT_DIR"], f"model_{timestr}.pt"))

    return model

def model_test(model, test_dataloader, device, config):
    """
    Test the ST-GAT model
    :param test_dataloader Data loader of test dataset
    :param device Device to evaluate on
    """
    _, _, _, y_pred, y_truth = eval(model, device, test_dataloader, 'Test')


In [18]:
def z_score(x, mean, std):
    return (x - mean) / std
def un_z_score(x_normed, mean, std):
    return x_normed * std  + mean
def MAPE(v, v_):
    return torch.mean(torch.abs((v_ - v)) /(v + 1e-15) * 100)
def RMSE(v, v_):
    return torch.sqrt(torch.mean((v_ - v) ** 2))
def MAE(v, v_):
    return torch.mean(torch.abs(v_ - v))

In [28]:
@torch.no_grad()
def eval(model, device, dataloader, type=''):
    """
    Evaluation function to evaluate model on data
    :param model Model to evaluate
    :param device Device to evaluate on
    :param dataloader Data loader
    :param type Name of evaluation type, e.g. Train/Val/Test
    """
    model.eval()
    model.to(device)

    mae = 0
    rmse = 0
    mape = 0
    n = 0

    # Evaluate model on all data
    for i, batch in enumerate(dataloader):
        batch = batch.to(device)
        if batch.x.shape[0] == 1:
            pass
        else:
            with torch.no_grad():
                pred = model(batch, device)
            truth = batch.y.view(pred.shape)
            if i == 0:
                y_pred = torch.zeros(len(dataloader), pred.shape[0], pred.shape[1])
                y_truth = torch.zeros(len(dataloader), pred.shape[0], pred.shape[1])
            #truth = un_z_score(truth, dataloader.dataset.mean, dataloader.dataset.std_dev)
            #pred = un_z_score(pred, dataloader.dataset.mean, dataloader.dataset.std_dev)
            y_pred[i, :pred.shape[0], :] = pred
            y_truth[i, :pred.shape[0], :] = truth
            rmse += RMSE(truth, pred)
            mae += MAE(truth, pred)
            mape += MAPE(truth, pred)
            n += 1
    rmse, mae, mape = rmse / n, mae / n, mape / n

    print(f'{type}, MAE: {mae}, RMSE: {rmse}, MAPE: {mape}')

    #get the average score for each metric in each batch
    return rmse, mae, mape, y_pred, y_truth

def train(model, device, dataloader, optimizer, loss_fn, epoch):
    """
    Evaluation function to evaluate model on data
    :param model Model to evaluate
    :param device Device to evaluate on
    :param dataloader Data loader
    :param optimizer Optimizer to use
    :param loss_fn Loss function
    :param epoch Current epoch
    """
    model.train()
    for _, batch in enumerate(tqdm(dataloader, desc=f"Epoch {epoch}")):
        batch = batch.to(device)
        optimizer.zero_grad()
        y_pred = torch.squeeze(model(batch, device))
        loss = loss_fn()(y_pred.float(), torch.squeeze(batch.y).float())
        writer.add_scalar("Loss/train", loss, epoch)
        loss.backward()
        optimizer.step()

    return loss

In [29]:
from torch_geometric.loader import DataLoader

def distance_to_weight(W, sigma2=0.1, epsilon=0.5, gat_version=False):
    """"
    Given distances between all nodes, convert into a weight matrix
    :param W distances
    :param sigma2 User configurable parameter to adjust sparsity of matrix
    :param epsilon User configurable parameter to adjust sparsity of matrix
    :param gat_version If true, use 0/1 weights with self loops. Otherwise, use float
    """
    n = W.shape[0]
    W = W / 10000.
    W2, W_mask = W * W, np.ones([n, n]) - np.identity(n)
    # refer to Eq.10
    W = np.exp(-W2 / sigma2) * (np.exp(-W2 / sigma2) >= epsilon) * W_mask

    # If using the gat version of this, round to 0/1 and include self loops
    if gat_version:
        W[W>0] = 1
        W += np.identity(n)

    return W

# Constant config to use throughout
config = {
    'BATCH_SIZE': 50,
    'EPOCHS': 60,
    'WEIGHT_DECAY': 5e-5,
    'INITIAL_LR': 3e-4,
    'CHECKPOINT_DIR': '../runs',
    'DROPOUT': 0.2,
    # If false, use GCN paper weight matrix, if true, use GAT paper weight matrix
    'USE_GAT_WEIGHTS': False,
    "counter_files_path"                : "../data/counters_temporal_data_2023-03-03T09-24-06/",
    "counters_nontemporal_aggregated"   : "../data/counters_non_temporal_aggregated_data.csv",
    "N_GRAPHS"                          : 30*24,
    "F_IN"                              : 7*24,
    "F_OUT"                             : 7*24,
    "target_col"                        : "Sum"
}

train_dataloader = DataLoader(train_g, batch_size=config['BATCH_SIZE'], shuffle=False)
val_dataloader = DataLoader(val_g, batch_size=config['BATCH_SIZE'], shuffle=False)
test_dataloader = DataLoader(test_g, batch_size=config['BATCH_SIZE'], shuffle=False)

# Get gpu if you can
device = 'cpu'#cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using {device}")

# Configure and train model
config['N_NODE'] = n_node
model = model_train(train_dataloader, val_dataloader, config, device)

Using cpu


Epoch 0: 100%|██████████| 9/9 [00:06<00:00,  1.32it/s]


Loss: 641392.750
Train, MAE: 590.2749633789062, RMSE: 830.9781494140625, MAPE: 57905986404352.0
Valid, MAE: 590.8264770507812, RMSE: 818.2146606445312, MAPE: 90645821652992.0


Epoch 1: 100%|██████████| 9/9 [00:05<00:00,  1.56it/s]


Loss: 641331.562


Epoch 2: 100%|██████████| 9/9 [00:05<00:00,  1.55it/s]


Loss: 641066.062


Epoch 3: 100%|██████████| 9/9 [00:05<00:00,  1.59it/s]


Loss: 640308.375


Epoch 4: 100%|██████████| 9/9 [00:05<00:00,  1.53it/s]


Loss: 639608.812


Epoch 5: 100%|██████████| 9/9 [00:06<00:00,  1.47it/s]


Loss: 638916.875
Train, MAE: 588.205322265625, RMSE: 829.4130249023438, MAPE: 1772169809887232.0
Valid, MAE: 588.7334594726562, RMSE: 816.6260986328125, MAPE: 2537759809994752.0


Epoch 6: 100%|██████████| 9/9 [00:06<00:00,  1.36it/s]


Loss: 638251.562


Epoch 7: 100%|██████████| 9/9 [00:06<00:00,  1.45it/s]


Loss: 637631.125


Epoch 8: 100%|██████████| 9/9 [00:05<00:00,  1.52it/s]


Loss: 637050.625


Epoch 9: 100%|██████████| 9/9 [00:06<00:00,  1.39it/s]


Loss: 636499.875


Epoch 10: 100%|██████████| 9/9 [00:06<00:00,  1.30it/s]


Loss: 635972.812
Train, MAE: 585.8417358398438, RMSE: 827.6145629882812, MAPE: 2928136903720960.0
Valid, MAE: 586.3336791992188, RMSE: 814.801025390625, MAPE: 4183457596964864.0


Epoch 11: 100%|██████████| 9/9 [00:06<00:00,  1.37it/s]


Loss: 635463.000


Epoch 12: 100%|██████████| 9/9 [00:06<00:00,  1.39it/s]


Loss: 634966.812


Epoch 13: 100%|██████████| 9/9 [00:06<00:00,  1.36it/s]


Loss: 634481.562


Epoch 14: 100%|██████████| 9/9 [00:07<00:00,  1.24it/s]


Loss: 634005.562


Epoch 15: 100%|██████████| 9/9 [00:08<00:00,  1.06it/s]


Loss: 633537.188
Train, MAE: 583.8745727539062, RMSE: 826.115966796875, MAPE: 3967258070089728.0
Valid, MAE: 584.3356323242188, RMSE: 813.27978515625, MAPE: 5663894706061312.0


Epoch 16: 100%|██████████| 9/9 [00:07<00:00,  1.18it/s]


Loss: 633075.875


Epoch 17: 100%|██████████| 9/9 [00:07<00:00,  1.23it/s]


Loss: 632620.312


Epoch 18: 100%|██████████| 9/9 [00:06<00:00,  1.38it/s]


Loss: 632170.000


Epoch 19: 100%|██████████| 9/9 [00:06<00:00,  1.46it/s]


Loss: 631724.500


Epoch 20: 100%|██████████| 9/9 [00:07<00:00,  1.21it/s]


Loss: 631283.000
Train, MAE: 582.0499267578125, RMSE: 824.7247924804688, MAPE: 4912820353236992.0
Valid, MAE: 582.4815673828125, RMSE: 811.8677978515625, MAPE: 7013072782753792.0


Epoch 21: 100%|██████████| 9/9 [00:07<00:00,  1.25it/s]


Loss: 630845.125


Epoch 22: 100%|██████████| 9/9 [00:06<00:00,  1.36it/s]


Loss: 630409.938


Epoch 23: 100%|██████████| 9/9 [00:06<00:00,  1.37it/s]


Loss: 629977.938


Epoch 24: 100%|██████████| 9/9 [00:07<00:00,  1.24it/s]


Loss: 629548.938


Epoch 25: 100%|██████████| 9/9 [00:07<00:00,  1.26it/s]


Loss: 629122.250
Train, MAE: 580.298095703125, RMSE: 823.38818359375, MAPE: 5810758965264384.0
Valid, MAE: 580.700927734375, RMSE: 810.510986328125, MAPE: 8294221798703104.0


Epoch 26: 100%|██████████| 9/9 [00:06<00:00,  1.34it/s]


Loss: 628697.500


Epoch 27: 100%|██████████| 9/9 [00:07<00:00,  1.25it/s]


Loss: 628275.438


Epoch 28: 100%|██████████| 9/9 [00:07<00:00,  1.24it/s]


Loss: 627854.875


Epoch 29: 100%|██████████| 9/9 [00:06<00:00,  1.29it/s]


Loss: 627436.438


Epoch 30: 100%|██████████| 9/9 [00:06<00:00,  1.44it/s]


Loss: 627019.750
Train, MAE: 578.5923461914062, RMSE: 822.0852661132812, MAPE: 6676434856706048.0
Valid, MAE: 578.9664306640625, RMSE: 809.1885986328125, MAPE: 9529396410974208.0


Epoch 31: 100%|██████████| 9/9 [00:06<00:00,  1.30it/s]


Loss: 626604.812


Epoch 32: 100%|██████████| 9/9 [00:07<00:00,  1.18it/s]


Loss: 626191.125


Epoch 33: 100%|██████████| 9/9 [00:07<00:00,  1.18it/s]


Loss: 625779.125


Epoch 34: 100%|██████████| 9/9 [00:06<00:00,  1.40it/s]


Loss: 625368.625


Epoch 35: 100%|██████████| 9/9 [00:07<00:00,  1.27it/s]


Loss: 624959.188
