# **Basic GNN Model Hyperparameter Optimization**

This notebook search for the best hyperparameters by implementing a Cross-Validation Setup. The Hyperparameters that will be searched are the Learning Rate, the Weight Decay and the Embedding Size of the Graph Neural Network.
An Early Stopping Mechanism will help terminate trials that are not improving.

The data from the UPFD Framework has been already split in Training, Validation and Test Set and is downloadable by using simple commands inside the Pytorch Geometric environment.

## Download and import Libraries for the Environment 


---



Download the right libraries depending if we are using a CPU or a GPU.

In [1]:

import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F


#if torch.cuda.is_available():
  #!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.10.0+cu102.html
#else:
  #!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.10.0+cpu.html

#!pip install optuna
import optuna


from torch.nn import Linear, LogSoftmax
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, TopKPooling, global_mean_pool, global_max_pool

from tool_box.upfd_dataset import ext_UPFD
from tool_box.GNN_train import train_step, val_step

# Set GPU as Device if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
print(f"Training Device: {device}")


Training Device: cpu


### Import data

In [2]:
path = './data/'

In [3]:
train_dataset = ext_UPFD(name = 'original', root = path, n_features=[], g_features=[], split = 'train')
val_dataset = ext_UPFD(name = 'original', root = path, n_features=[], g_features=[], split='val')
test_dataset = ext_UPFD(name = 'original', root = path, n_features=[], g_features=[], split='test')

# Training Functions

In [4]:
class GNN(torch.nn.Module):

    def __init__(self, num_n_feature, num_g_feature, emb_size):
        super(GNN, self).__init__()
        torch.manual_seed(42)
        self.emb_size = emb_size

        self.in_layer = GCNConv(num_n_feature, emb_size)
        self.conv1 = GCNConv(emb_size, emb_size)
       
       # check if we have graph features to concatenate or not
        i = 2
        if num_g_feature:
             self.lin_g = Linear(num_g_feature, emb_size)
             i = 3

        self.out_layer = Linear(i * emb_size, 2)
        self.act = LogSoftmax(dim=-1)
    
    def forward(self, x, edges_idx, batch_idx, g_features):
        #pdb.set_trace()
        x = self.in_layer(x, edges_idx)

        x = self.conv1(x, edges_idx)

        flatten = torch.cat([global_mean_pool(x, batch_idx),
                             global_max_pool(x, batch_idx)], axis=1)

        if g_features.size()[-1]:
            g_ft = self.lin_g(g_features)
            flatten = torch.cat([flatten, g_ft], axis=1)

        out = self.act(self.out_layer(flatten))

        return out

In [5]:
def train(trial,model,optimizer,loss_f, train_loader, val_loader, epochs = 60):
    
  acc_losses_t, acc_losses_v = [], []
  acc_v = 999
  for epoch in range(epochs):
    loss_t, acc_t = train_step(model, train_loader, optimizer, loss_f)
    loss_v, acc_v = val_step(model, val_loader, loss_f)
        
    acc_losses_t.append([loss_t, acc_t])
    acc_losses_v.append([loss_v, acc_v])

    trial.report(acc_v, epoch)

    # Early Stopping
    if trial.should_prune():
      raise optuna.exceptions.TrialPruned()

  return acc_v

# **Training**

Training Function that will be executed every time a trial is done. In this function the hyperparameters are chosen based on suggestions from the hyperparameter optimizer chosen with Optuna.


In [6]:
epochs_max = 60

def objective(trial):

  embedding_space_dim = trial.suggest_categorical("embedding_space_dim",[40,60,80,100,120,140,160])
  learning_rate = trial.suggest_categorical("learning_rate",[0.001, 0.005, 0.01])
  weight_decay = trial.suggest_categorical("weight_decay",[0.001, 0.005, 0.01])

  # If the trial has already been explored, prune it. It may happen because 
  # the hyperparameter optimizer searchs near the most promising values.
  for t in trial.study.trials:
    if t.state != optuna.trial.TrialState.COMPLETE:
      continue
    if t.params == trial.params:
      raise optuna.exceptions.TrialPruned('Duplicate Parameter Set')

  train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
  val_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

  num_node_features = train_dataset.num_features
  num_graph_features = train_dataset.g_features
  model = GNN(num_n_feature = num_node_features, num_g_feature = num_graph_features, emb_size = embedding_space_dim).to(device)
  optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay = weight_decay)
  loss_f = torch.nn.NLLLoss()

  return train(trial,model,optimizer,loss_f, train_loader, val_loader, epochs_max)


In [7]:
study = optuna.create_study(direction = "maximize")
study.optimize(objective,n_trials = 40)

[32m[I 2021-12-30 19:53:55,839][0m A new study created in memory with name: no-name-0bea7114-f75f-48ae-93fc-06303b3db4a8[0m
[32m[I 2021-12-30 19:54:37,792][0m Trial 0 finished with value: 0.8837316176470588 and parameters: {'embedding_space_dim': 80, 'learning_rate': 0.001, 'weight_decay': 0.001}. Best is trial 0 with value: 0.8837316176470588.[0m
[32m[I 2021-12-30 19:55:12,721][0m Trial 1 finished with value: 0.8720894607843137 and parameters: {'embedding_space_dim': 60, 'learning_rate': 0.001, 'weight_decay': 0.001}. Best is trial 0 with value: 0.8837316176470588.[0m
[32m[I 2021-12-30 19:56:05,378][0m Trial 2 finished with value: 0.8731107026143792 and parameters: {'embedding_space_dim': 100, 'learning_rate': 0.001, 'weight_decay': 0.005}. Best is trial 0 with value: 0.8837316176470588.[0m
[32m[I 2021-12-30 19:57:06,075][0m Trial 3 finished with value: 0.8420649509803921 and parameters: {'embedding_space_dim': 120, 'learning_rate': 0.01, 'weight_decay': 0.01}. Best is t

After the training, we pick the model that performed best on the validation set and test it on the Test Set. Obviously no hyperparameters have been chosen by looking on the Test Set.

In [17]:
pruned_trials = study.get_trials(deepcopy = False, states = [optuna.trial.TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy = False, states = [optuna.trial.TrialState.COMPLETE])

# Extract the best hyperparameters 
best_parameters = study.best_params
embedding_space_dim = best_parameters["embedding_space_dim"]
learning_rate = best_parameters["learning_rate"]
weight_decay = best_parameters["weight_decay"]

# Initialize Dataloaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Initialize Model with best Hyperparameters
num_node_features = train_dataset.num_features
num_graph_features = train_dataset.g_features
best_model = GNN(num_n_feature = num_node_features, num_g_feature = num_graph_features, emb_size = embedding_space_dim).to(device)
best_optimizer = torch.optim.Adam(best_model.parameters(), lr = learning_rate, weight_decay = weight_decay)
loss_f = torch.nn.NLLLoss()



# Train the Final Model
acc_losses_train, acc_losses_val, acc_losses_test = [], [], []
epochs = 60

for epoch in range(epochs):
  loss_train, acc_train = train_step(best_model, train_loader, best_optimizer, loss_f)
  loss_val, acc_val = val_step(best_model, val_loader, loss_f)
  loss_test, acc_test = val_step(best_model, test_loader, loss_f)
        
  acc_losses_train.append([loss_train, acc_train])
  acc_losses_val.append([loss_val, acc_val])
  acc_losses_test.append([loss_test, acc_test])


print(f"BEST HYPERPARAMETERS:")
print(f"Embedding Space Dimension = {embedding_space_dim}")
print(f"Learning Rate = {learning_rate}")
print(f"Weight Decay = {weight_decay}")

print(f"Best model final Test loss: {acc_losses_test[-1][1]}")
print(f"Best model final Test accuracy: {acc_losses_test[-1][0]}")
    


BEST HYPERPARAMETERS:
Embedding Space Dimension = 100
Learning Rate = 0.01
Weight Decay = 0.001
Best model final Test loss: 0.8830820540935672
Best model final Test accuracy: 0.3064073716600736


# Plots