In [3]:
"""Perform GNN model training with uncertainty quantification."""

import argparse
from os.path import isdir
import os
import time
import sys
sys.path.insert(0, "../src")

import torch
torch.backends.cudnn.deterministic = True 
import toml
from torch_geometric.seed import seed_everything
seed_everything(42)
import numpy as np
from numpy import random

from oxides_ml.training import create_loaders, scale_target, train_loop, test_loop, nll_loss, nll_loss_warmup
from oxides_ml.classes import EarlyStopper
from oxides_ml.nets import GameNetUQ
from oxides_ml.post_training import create_model_report
from oxides_ml.dataset import OxidesGraphDataset

if __name__ == "__main__":
    output_directory = "/home/tvanhout/oxides_ML/models/DATALOADERS"    # Root directory of loaders
    output_name = "Set3"                                                # Name of folder containing particular set

    if isdir("{}/{}".format(output_directory, output_name)):
        output_name = input("There is already a model with the chosen name in the provided directory, provide a new one: ")
    else:
        os.makedirs(os.path.join(output_directory, output_name), exist_ok=True)
        
    
    # Upload training hyperparameters from .toml file
    hyperparameters = {'graph': {'target': 'adsorption_energy', 
                                 'structure': {'tolerance': 0.25, 'scaling_factor': 1.25, 'second_order': True}, 
                                 'features': {'adsorbate': False, 'radical': False, 'valence': False, 'gcn': False, 'magnetization': False}}, 
                        'train': {'splits': 5, 'test_set': True, 'batch_size': 16}, 
                        'data': {'initial_state': True, 'augment': True, 'force_reload': False, 'vasp_directory': '/BACKUP/database', 'graph_dataset_path': '/home/tvanhout/oxides_ML/models/graph_datasets'}}
    vasp_directory = hyperparameters["data"]["vasp_directory"]
    graph_dataset_dir = hyperparameters["data"]["graph_dataset_path"]
    initial_state = hyperparameters['data']['initial_state']
    augment = hyperparameters['data']['augment']
    force_reload = hyperparameters['data']['force_reload']
    graph_settings = hyperparameters["graph"]
    train = hyperparameters["train"]

    # Select device
    device_dict = {}
    device = "cuda" if torch.cuda.is_available() else "cpu"
    if device == "cuda":
        print("Device name: {} (GPU)".format(torch.cuda.get_device_name(0)))
        device_dict["name"] = torch.cuda.get_device_name(0)
        device_dict["CudaDNN_enabled"] = torch.backends.cudnn.enabled
        device_dict["CUDNN_version"] = torch.backends.cudnn.version()
        device_dict["CUDA_version"] = torch.version.cuda
    else:
        print("Device name: CPU")
        device_dict["name"] = "CPU"     

    # Load graph dataset 
    dataset = OxidesGraphDataset(vasp_directory, graph_dataset_dir, graph_settings, initial_state=initial_state, augment=augment, force_reload=force_reload)
    ohe_elements = dataset.ohe_elements
    node_feature_list = dataset.node_feature_list
    num_node_features = len(node_feature_list)

    # Create train/validation/test dataloaders (apply oversampling here for gas)
    train_loader, val_loader, test_loader = create_loaders(dataset,
                                                           batch_size=train["batch_size"],
                                                           split=train["splits"], 
                                                           test=train["test_set"], 
                                                           balance_func=None)
    
# Save dataloaders
torch.save(train_loader, "{}/{}/train_loader.pth".format(output_directory, output_name))
torch.save(val_loader, "{}/{}/val_loader.pth".format(output_directory, output_name))
torch.save(test_loader, "{}/{}/test_loader.pth".format(output_directory, output_name))
    


Device name: NVIDIA RTX A2000 12GB (GPU)
Data split (train/val/test): 60/20/20 %
Training data = 702 Validation data = 234 Test data = 234 (Total = 1170)
