In [1]:
import sys
import platform
import time

import torch
import torch_geometric
from torch_geometric.loader import DataLoader
from torchinfo import summary
import numpy as np

from functions import create_loaders, scale_target, train_loop, test_loop
from processed_datasets import FG_dataset, BM_dataset
from nets import SantyxNet
from post_training import create_model_report

## Hyperparameters of the learning process

The hyperparameters are all those parameters that are initialized before performing the model training (i.e., everything different from the model parameters). Hyperparameters can be categorized into model-related and process-related: Model-related hyperparameters are the activation function and the depth of the hidden layers, while the process-related ones are for example the batch size, the number of epochs and the loss function for the model optimization.

N.B. The optimizer and the learning rate scheduler are potentially additional hyperparameters. In this project, for sake of simplicity, these algorithms are fixed.

In [6]:
HYPERPARAMS = {}

# Learning process
HYPERPARAMS["test set"] = True          # True=Generate train-val-test sets. False=Generate train-val (train with whole FG-dataset)
HYPERPARAMS["splits"] = 10              # Splits among which the dataset is partitioned to create train-val-test sets
HYPERPARAMS["target scaling"] = "std"   # Target scaling approach (std=standardization, norm=normalization, etc.)
HYPERPARAMS["batch size"] = 32           
HYPERPARAMS["epochs"] = 100              
HYPERPARAMS["loss function"] = torch.nn.functional.l1_loss   
HYPERPARAMS["lr0"] = 1e-3               # Initial learning rate (lr)
HYPERPARAMS["patience"] = 5             # Epochs with no improvement after which lr is reduced 
HYPERPARAMS["factor"] = 0.7             # Decreasing factor applied by the lr scheduler
HYPERPARAMS["minlr"] = 1e-7             
HYPERPARAMS["betas"] = (0.9, 0.999)     # Adam optimizer: betas
HYPERPARAMS["eps"] = 1e-8               # Adam optimizer: eps
HYPERPARAMS["weight decay"] = 0         # Adam optimizer: weight decay
HYPERPARAMS["amsgrad"] = False          # Adam optimizer: amsgrad

# Model structure
HYPERPARAMS["dim"] = 128                # Depth of the GNN layers
HYPERPARAMS["sigma"] = torch.nn.ReLU()  # Activation function of the GNN model
HYPERPARAMS["bias"] = True              # Bias presence in GNN layers
HYPERPARAMS["conv normalize"] = False   # GraphSAGE
HYPERPARAMS["conv root weight"] = True
HYPERPARAMS["pool ratio"] = 0.25        # Graph MultiSet Transormer
HYPERPARAMS["pool heads"] = 2
HYPERPARAMS["pool seq"] = ["GMPool_G", "SelfAtt", "GMPool_I"]
HYPERPARAMS["pool layer norm"] = False 

## Data Splitting and target scaling

The FG-dataset is split among the train, validation and test sets.
The target scaling must be applied using parameters independent of the test set, as this would lead to "data leakage".
Here, we apply the target scaling with the scale_target function, providing the optional parameter mode="std" in order to apply standardization. Normalization can be applied optionally, providing the parameter mode="norm". 

In [7]:
train_loader, val_loader, test_loader = create_loaders(FG_dataset,
                                                       batch_size=HYPERPARAMS["batch size"],
                                                       split=HYPERPARAMS["splits"], 
                                                       test=HYPERPARAMS["test set"])
train_loader, val_loader, test_loader, mean, std = scale_target(train_loader,
                                                                val_loader,
                                                                test_loader,
                                                                mode=HYPERPARAMS["target scaling"],
                                                                test=HYPERPARAMS["test set"])        
BM_dataloader = DataLoader(BM_dataset)  # For testing extrapolation performance

Data split (train/val): 90/10 %
Training data = 2271 Validation data = 242 (Total = 2513)
Target Scaling (Standardization) applied successfully
(Train+Val) mean: -73.77 eV
(Train+Val) standard deviation: 19.02 eV


### Device selection (GPU/CPU)

Having a CUDA capable GPU is optimal for working with Deep Learning models, as its structure can be exploited in order to speed up the training.

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Current device: {}".format(device))
if device == "cuda":
    print("Device name: {}".format(torch.cuda.get_device_name(0)))
    print("CUDA Version: {}".format(torch.version.cuda))
    print("CuDNN Version: {}".format(torch.backends.cudnn.version()))
else: # cpu
    print("Architecture: {}".format(platform.machine()))
    print("Platform: {}".format(platform.platform()))
    
print("Python version: {}".format(sys.version[:7]))
print("Pytorch version: {}".format(torch.__version__))
print("Pytorch Geometric version: {}".format(torch_geometric.__version__))

Current device: cuda
Device name: NVIDIA GeForce MX450
CUDA Version: 11.3
CuDNN Version: 8200
Python version: 3.9.12 
Pytorch version: 1.10.1
Pytorch Geometric version: 2.0.3


### GNN model instantiation  

In [9]:
model = SantyxNet(dim=HYPERPARAMS["dim"],
                  sigma=HYPERPARAMS["sigma"], 
                  bias=HYPERPARAMS["bias"], 
                  conv_normalize=HYPERPARAMS["conv normalize"], 
                  conv_root_weight=HYPERPARAMS["conv root weight"], 
                  pool_ratio=HYPERPARAMS["pool ratio"], 
                  pool_layer_norm=HYPERPARAMS["pool layer norm"], 
                  pool_seq=HYPERPARAMS["pool seq"], 
                  pool_heads=HYPERPARAMS["pool heads"]).to(device)
#summary(model)  # Print model building blocks (not architecture!)

## GNN Training

### Optimizer

Used optimizer for the training is Adam, algorithm for first-order gradient-based optimization of
stochastic objective functions, based on adaptive estimates of lower-order mo-
ments.

In [10]:
optimizer = torch.optim.Adam(model.parameters(),
                             lr=HYPERPARAMS["lr0"], 
                             betas=HYPERPARAMS["betas"], 
                             eps=HYPERPARAMS["eps"], 
                             weight_decay=HYPERPARAMS["weight decay"], 
                             amsgrad=HYPERPARAMS["amsgrad"])

### Learning Rate (LR) Scheduler

Helps steering the learning rate during the training, providing faster convergence and higher accuracy. The used scheduler is the "Reduce On Loss Plateau Decay".

In [11]:
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       mode='min',
                                                       factor=HYPERPARAMS["factor"],
                                                       patience=HYPERPARAMS["patience"],
                                                       min_lr=HYPERPARAMS["minlr"])

### Run Training 

In [12]:
best_val_error = None
loss_list = []  # Store loss function trend during training
train_list = [] # Store training score during training
val_list = []   # Store validation score during training
test_list = []  # Store test score during training
t0 = time.time()
for epoch in range(1, HYPERPARAMS["epochs"]+1):
    lr = lr_scheduler.optimizer.param_groups[0]['lr']
    loss, train_MAE = train_loop(model, device, train_loader, optimizer, HYPERPARAMS["loss function"])  # Run epoch and update params
    val_MAE = test_loop(model, val_loader, device, std)                                                 # Run epoch on validation set
    lr_scheduler.step(val_MAE)                                                                          # Adjust lr based on val. error
    
    if HYPERPARAMS["test set"]:
        test_MAE = test_loop(model, BM_dataloader, device, std, mean, scaled_graph_label=False)                                           # Run epoch on test set
        print('Epoch {:03d}: LR={:.7f}  Train MAE: {:.4f} eV  Validation MAE: {:.4f} eV '             
              'Test MAE: {:.4f} eV'.format(epoch, lr, train_MAE*std, val_MAE, test_MAE))
    else:
        print('Epoch {:03d}: LR={:.7f}  Train MAE: {:.6f} eV  Validation MAE: {:.6f} eV '
              .format(epoch, lr, train_MAE*std, val_MAE))

    loss_list.append(loss)
    train_list.append(train_MAE * std)
    val_list.append(val_MAE)
    if HYPERPARAMS["test set"]:
        test_list.append(test_MAE)
print("-----------------------------------------------------------------------------------------")
print("device: {}    Training time: {:.2f} s".format(device, time.time() - t0))


Epoch 001: LR=0.0010000  Train MAE: 25.223948 eV  Validation MAE: 5.685780 eV 
Epoch 002: LR=0.0010000  Train MAE: 4.870587 eV  Validation MAE: 3.906790 eV 
Epoch 003: LR=0.0010000  Train MAE: 5.937455 eV  Validation MAE: 2.404832 eV 
Epoch 004: LR=0.0010000  Train MAE: 4.104703 eV  Validation MAE: 2.389748 eV 


KeyboardInterrupt: 

In [15]:
test_loop(model, BM_dataloader, device, std, mean, scaled_graph_label=False, verbose=1)

Dataset size = 40
Mean Absolute Error = 11.123753547668457 eV


11.123753547668457

<method-wrapper '__repr__' of DataLoader object at 0x7f6687537970>

In [None]:
abs_err_BM = [] 
model.to("cpu")
model.eval()
for sample in BM_dataloader.dataset:
    E_DFT = sample.y
    sample.y_GNN = model(sample).item() * std + mean
    E_GNN = sample.y_GNN
    abs_err_BM.append(abs(E_GNN - E_DFT))
BM_MAE = np.mean(abs_err_BM)
for graph in BM_dataloader.dataset:
    print("{} DFT = {:.2f} eV, GNN = {:.2f} eV".format(graph.formula, graph.y, graph.y_GNN))

### Save model and performance analysis

In [None]:
create_model_report("test",
                    model,
                    (train_loader, val_loader, test_loader), 
                    (mean, std), 
                    HYPERPARAMS, 
                    (train_list, val_list, test_list))
                               

In [None]:
example = torch.load("./Models/test/model.pth")

In [None]:
print(example)

In [None]:
example.state_dict()