## Main Driver Notebook for Training Graph NNs on TU Datasets

### MODELS
- GatedGCN 
- GCN 
- GAT 
- GraphSage 
- GIN  
- MoNet  
- MLP  
- RingGNN  
- 3WLGNN   

### DATASET
- DD 
- ENZYMES
- PROTEINS_full   

### TASK
- Graph Classification

In [1]:
"""
    IMPORTING LIBS
"""
import dgl

import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils.data import DataLoader

from tensorboardX import SummaryWriter
from tqdm import tqdm

class DotDict(dict):
    def __init__(self, **kwds):
        self.update(kwds)
        self.__dict__ = self
        

In [2]:
# """
#     AUTORELOAD IPYTHON EXTENSION FOR RELOADING IMPORTED MODULES
# """

def in_ipynb():
    try:
        cfg = get_ipython().config 
        return True
    except NameError:
        return False
    
notebook_mode = in_ipynb()
print(notebook_mode)

if notebook_mode == True:
    %load_ext autoreload
    %autoreload 2


True


In [3]:
"""
    IMPORTING CUSTOM MODULES/METHODS
"""

from nets.TUs_graph_classification.load_net import gnn_model # import GNNs
from data.data import LoadData # import dataset


In [4]:
"""
    GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)  

    if torch.cuda.is_available() and use_gpu:
        print('cuda available with GPU:',torch.cuda.get_device_name(0))
        device = torch.device("cuda")
    else:
        print('cuda not available')
        device = torch.device("cpu")
    return device


# select GPU or CPU
#use_gpu = True; gpu_id = 0; device = None # default GPU
use_gpu = False; gpu_id = -1; device = None # CPU


In [5]:
# """
#     USER CONTROLS
# """
if notebook_mode == True:
    
    #MODEL_NAME = '3WLGNN'
    #MODEL_NAME = 'RingGNN'
    #MODEL_NAME = 'GatedGCN'
    #MODEL_NAME = 'MoNet'
    #MODEL_NAME = 'GCN'
    MODEL_NAME = 'GATTop'
    #MODEL_NAME = 'GraphSage'
    #MODEL_NAME = 'GIN'
    #MODEL_NAME = 'MLP'

    DATASET_NAME = 'ENZYMES'
    #DATASET_NAME = 'PROTEINS_full'
    #DATASET_NAME = 'DD'

    out_dir = 'out/TUs_graph_classification/'
    root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')

    print("[I] Loading data (notebook) ...")
    dataset = LoadData(DATASET_NAME)
    trainset, valset, testset = dataset.train, dataset.val, dataset.test
    print("[I] Finished loading.")


[I] Loading data (notebook) ...
[!] Dataset:  ENZYMES
Time taken: 0.5786s
[I] Finished loading.


In [6]:
# """
#     PARAMETERS
# """
if notebook_mode == True:

    n_heads = -1
    edge_feat = False
    pseudo_dim_MoNet = -1
    kernel = -1
    gnn_per_block = -1
    embedding_dim = -1
    pool_ratio = -1
    n_mlp_GIN = -1
    gated = False
    self_loop = False
    #self_loop = True
    max_time = 120
    
    if MODEL_NAME == 'GatedGCN':
        seed=41; epochs=1000; batch_size=5; init_lr=5e-5; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; hidden_dim=70; out_dim=hidden_dim; dropout=0.0; readout='mean'

    if MODEL_NAME == 'GCN':
        seed=41; epochs=1000; batch_size=5; init_lr=5e-5; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; hidden_dim=146; out_dim=hidden_dim; dropout=0.0; readout='mean'

    if MODEL_NAME == 'GAT':
        #seed=41; epochs=1000; batch_size=50; init_lr=5e-5; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        #L=4; n_heads=8; hidden_dim=19; out_dim=n_heads*hidden_dim; dropout=0.0; readout='mean'
        #print('True hidden dim:',out_dim)
        
        seed=41; epochs=1000; batch_size=20; init_lr=1e-3; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; n_heads=8; hidden_dim=18; out_dim=n_heads*hidden_dim; dropout=0.0; readout='mean'
        print('True hidden dim:',out_dim)
        
    if MODEL_NAME == 'GATTop':
        #seed=41; epochs=1000; batch_size=50; init_lr=5e-5; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        #L=4; n_heads=8; hidden_dim=19; out_dim=n_heads*hidden_dim; dropout=0.0; readout='mean'
        #print('True hidden dim:',out_dim)
        
        seed=41; epochs=1000; batch_size=20; init_lr=1e-3; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; n_heads=8; hidden_dim=18; out_dim=n_heads*hidden_dim; dropout=0.0; readout='mean'
        print('True hidden dim:',out_dim)

    if MODEL_NAME == 'GraphSage':
        seed=41; epochs=1000; batch_size=50; init_lr=5e-5; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; hidden_dim=108; out_dim=hidden_dim; dropout=0.0; readout='mean'

    if MODEL_NAME == 'MLP':
        seed=41; epochs=1000; batch_size=50; init_lr=5e-4; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; hidden_dim=165; out_dim=hidden_dim; dropout=0.0; readout='mean'
        
    if MODEL_NAME == 'DiffPool':
        seed=41; epochs=1000; batch_size=50; init_lr=5e-4; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; hidden_dim=32; out_dim=hidden_dim; dropout=0.0; readout='mean'
        n_heads=8; gnn_per_block=3; embedding_dim=32; batch_size=128; pool_ratio=0.15

    if MODEL_NAME == 'GIN':
        seed=41; epochs=1000; batch_size=50; init_lr=5e-4; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; hidden_dim=110; out_dim=hidden_dim; dropout=0.0; readout='mean'
        n_mlp_GIN = 2; learn_eps_GIN=True; neighbor_aggr_GIN='sum'

    if MODEL_NAME == 'MoNet':
        seed=41; epochs=1000; batch_size=50; init_lr=5e-4; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; hidden_dim=90; out_dim=hidden_dim; dropout=0.0; readout='mean'
        pseudo_dim_MoNet=2; kernel=3;
      
    if MODEL_NAME == 'RingGNN':
        seed=41; epochs=1000; batch_size=1; init_lr=5e-5; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        #L=4; hidden_dim=145; out_dim=hidden_dim; dropout=0.0; readout='mean'
        L=4; hidden_dim=22; out_dim=hidden_dim; dropout=0.0;
    
    if MODEL_NAME == '3WLGNN':
        seed=41; epochs=1000; batch_size=1; init_lr=5e-5; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        #L=4; hidden_dim=145; out_dim=hidden_dim; dropout=0.0; readout='mean'
        L=3; hidden_dim=76; out_dim=hidden_dim; dropout=0.0;
    

    # generic new_params
    net_params = {}
    net_params['device'] = device
    net_params['gated'] = False  # for mlpnet baseline
    net_params['in_dim'] = dataset.all.graph_lists[0].ndata['feat'][0].shape[0]
    net_params['residual'] = True
    net_params['hidden_dim'] = hidden_dim
    net_params['out_dim'] = out_dim
    num_classes = len(np.unique(dataset.all.graph_labels))
    net_params['n_classes'] = num_classes
    net_params['n_heads'] = n_heads
    net_params['L'] = L  # min L should be 2
    net_params['readout'] = "mean"
    net_params['layer_norm'] = True
    net_params['batch_norm'] = True
    net_params['in_feat_dropout'] = 0.0
    net_params['dropout'] = 0.0
    net_params['edge_feat'] = edge_feat
    net_params['self_loop'] = self_loop

    # specific for MoNet
    net_params['pseudo_dim_MoNet'] = pseudo_dim_MoNet
    net_params['kernel'] = kernel
    
    # specific for GIN
    net_params['n_mlp_GIN'] = n_mlp_GIN
    net_params['learn_eps_GIN'] = True
    net_params['neighbor_aggr_GIN'] = 'sum'
    
    # specific for graphsage
    net_params['sage_aggregator'] = 'meanpool'    

    # specific for diffpoolnet
    net_params['data_mode'] = 'default'
    net_params['gnn_per_block'] = gnn_per_block
    net_params['embedding_dim'] = embedding_dim     
    net_params['pool_ratio'] = pool_ratio
    net_params['linkpred'] = True
    net_params['num_pool'] = 1
    net_params['cat'] = False
    net_params['batch_size'] = batch_size 
    
    # specific for RingGNN
    net_params['radius'] = 2
    num_nodes = [dataset.all[i][0].number_of_nodes() for i in range(len(dataset.all))]
    net_params['avg_node_num'] = int(np.ceil(np.mean(num_nodes)))
    
    # specific for 3WLGNN
    net_params['depth_of_mlp'] = 2
    
    # calculate assignment dimension: pool_ratio * largest graph's maximum
    # number of nodes  in the dataset
    max_num_node = max(num_nodes)
    net_params['assign_dim'] = int(max_num_node * net_params['pool_ratio']) * net_params['batch_size']


True hidden dim: 144


In [7]:

"""
    VIEWING MODEL CONFIG AND PARAMS
"""
def view_model_param(MODEL_NAME, net_params):
    model = gnn_model(MODEL_NAME, net_params)
    total_param = 0
    print("MODEL DETAILS:\n")
    #print(model)
    for param in model.parameters():
        # print(param.data.size())
        total_param += np.prod(list(param.data.size()))
    print('MODEL/Total parameters:', MODEL_NAME, total_param)
    return total_param

if notebook_mode == True:
    view_model_param(MODEL_NAME, net_params)


MODEL DETAILS:

MODEL/Total parameters: GATTop 101922


In [8]:
"""
    TRAINING CODE
"""

def train_val_pipeline(MODEL_NAME, DATASET_NAME, params, net_params, dirs):
    avg_test_acc = []
    avg_train_acc = []
    avg_convergence_epochs = []

    t0 = time.time()
    per_epoch_time = []

    dataset = LoadData(DATASET_NAME)
    
    if MODEL_NAME in ['GCN', 'GAT', 'GATTop']:
        if net_params['self_loop']:
            print("[!] Adding graph self-loops for GCN/GAT models (central node trick).")
            dataset._add_self_loops()
    
    trainset, valset, testset = dataset.train, dataset.val, dataset.test
    
    root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs
    device = net_params['device']
    
    # Write the network and optimization hyper-parameters in folder config/
    with open(write_config_file + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n"""\
                .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param']))
    
    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for split_number in range(10):
            t0_split = time.time()
            log_dir = os.path.join(root_log_dir, "RUN_" + str(split_number))
            writer = SummaryWriter(log_dir=log_dir)

            # setting seeds
            random.seed(params['seed'])
            np.random.seed(params['seed'])
            torch.manual_seed(params['seed'])
            if device.type == 'cuda':
                torch.cuda.manual_seed(params['seed'])

            print("RUN NUMBER: ", split_number)
            trainset, valset, testset = dataset.train[split_number], dataset.val[split_number], dataset.test[split_number]
            print("Training Graphs: ", len(trainset))
            print("Validation Graphs: ", len(valset))
            print("Test Graphs: ", len(testset))
            print("Number of Classes: ", net_params['n_classes'])

            model = gnn_model(MODEL_NAME, net_params)
            
            print(model.h0_sum)
            print(model.top_feat_active)
            
            model = model.to(device)
            optimizer = optim.Adam(model.parameters(), lr=params['init_lr'], weight_decay=params['weight_decay'])
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                             factor=params['lr_reduce_factor'],
                                                             patience=params['lr_schedule_patience'],
                                                             verbose=True)

            epoch_train_losses, epoch_val_losses = [], []
            epoch_train_accs, epoch_val_accs = [], [] 

            # batching exception for Diffpool
            drop_last = True if MODEL_NAME == 'DiffPool' else False

            if MODEL_NAME in ['RingGNN', '3WLGNN']:
                # import train functions specific for WL-GNNs
                from train.train_TUs_graph_classification import train_epoch_dense as train_epoch, evaluate_network_dense as evaluate_network

                train_loader = DataLoader(trainset, shuffle=True, collate_fn=dataset.collate_dense_gnn)
                val_loader = DataLoader(valset, shuffle=False, collate_fn=dataset.collate_dense_gnn)
                test_loader = DataLoader(testset, shuffle=False, collate_fn=dataset.collate_dense_gnn)

            else:
                # import train functions for all other GCNs
                from train.train_TUs_graph_classification import train_epoch_sparse as train_epoch, evaluate_network_sparse as evaluate_network

                train_loader = DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, drop_last=drop_last, collate_fn=dataset.collate)
                val_loader = DataLoader(valset, batch_size=params['batch_size'], shuffle=False, drop_last=drop_last, collate_fn=dataset.collate)
                test_loader = DataLoader(testset, batch_size=params['batch_size'], shuffle=False, drop_last=drop_last, collate_fn=dataset.collate)
  
            with tqdm(range(params['epochs'])) as t:
                for epoch in t:

                    t.set_description('Epoch %d' % epoch)
                    
                    if epoch < 100:
                        model.top_feat_active = 0.0
                    else:
                        model.top_feat_active = 1.0

                    start = time.time()
                    
                    #with torch.autograd.set_detect_anomaly(True):
                    if MODEL_NAME in ['RingGNN', '3WLGNN']: # since different batch training function for dense GNNs
                        epoch_train_loss, epoch_train_acc, optimizer = train_epoch(model, optimizer, device, train_loader, epoch, params['batch_size'])
                    else:   # for all other models common train function
                        epoch_train_loss, epoch_train_acc, optimizer = train_epoch(model, optimizer, device, train_loader, epoch)

                    epoch_val_loss, epoch_val_acc = evaluate_network(model, device, val_loader, epoch)
                    _, epoch_test_acc = evaluate_network(model, device, test_loader, epoch)

                    epoch_train_losses.append(epoch_train_loss)
                    epoch_val_losses.append(epoch_val_loss)
                    epoch_train_accs.append(epoch_train_acc)
                    epoch_val_accs.append(epoch_val_acc)

                    writer.add_scalar('train/_loss', epoch_train_loss, epoch)
                    writer.add_scalar('val/_loss', epoch_val_loss, epoch)
                    writer.add_scalar('train/_acc', epoch_train_acc, epoch)
                    writer.add_scalar('val/_acc', epoch_val_acc, epoch)
                    writer.add_scalar('test/_acc', epoch_test_acc, epoch)
                    writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch)

                    _, epoch_test_acc = evaluate_network(model, device, test_loader, epoch)
                    t.set_postfix(time=time.time()-start, lr=optimizer.param_groups[0]['lr'],
                                  train_loss=epoch_train_loss, val_loss=epoch_val_loss,
                                  train_acc=epoch_train_acc, val_acc=epoch_val_acc,
                                  test_acc=epoch_test_acc)  

                    per_epoch_time.append(time.time()-start)

                    # Saving checkpoint
                    ckpt_dir = os.path.join(root_ckpt_dir, "RUN_" + str(split_number))
                    if not os.path.exists(ckpt_dir):
                        os.makedirs(ckpt_dir)
                    torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))

                    files = glob.glob(ckpt_dir + '/*.pkl')
                    for file in files:
                        epoch_nb = file.split('_')[-1]
                        epoch_nb = int(epoch_nb.split('.')[0])
                        if epoch_nb < epoch-1:
                            os.remove(file)

                    scheduler.step(epoch_val_loss)

                    if optimizer.param_groups[0]['lr'] < params['min_lr']:
                        print("\n!! LR EQUAL TO MIN LR SET.")
                        break
                        
                    # Stop training after params['max_time'] hours
                    if time.time()-t0_split > params['max_time']*3600/10:       # Dividing max_time by 10, since there are 10 runs in TUs
                        print('-' * 89)
                        print("Max_time for one train-val-test split experiment elapsed {:.3f} hours, so stopping".format(params['max_time']/10))
                        break

            _, test_acc = evaluate_network(model, device, test_loader, epoch)   
            _, train_acc = evaluate_network(model, device, train_loader, epoch)    
            avg_test_acc.append(test_acc)   
            avg_train_acc.append(train_acc)
            avg_convergence_epochs.append(epoch)

            print("Test Accuracy [LAST EPOCH]: {:.4f}".format(test_acc))
            print("Train Accuracy [LAST EPOCH]: {:.4f}".format(train_acc))
            print("Convergence Time (Epochs): {:.4f}".format(epoch))
    
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early because of KeyboardInterrupt')
        
    
    print("TOTAL TIME TAKEN: {:.4f}hrs".format((time.time()-t0)/3600))
    print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))
    print("AVG CONVERGENCE Time (Epochs): {:.4f}".format(np.mean(np.array(avg_convergence_epochs))))
    # Final test accuracy value averaged over 10-fold
    print("""\n\n\nFINAL RESULTS\n\nTEST ACCURACY averaged: {:.4f} with s.d. {:.4f}"""\
          .format(np.mean(np.array(avg_test_acc))*100, np.std(avg_test_acc)*100))
    print("\nAll splits Test Accuracies:\n", avg_test_acc)
    print("""\n\n\nFINAL RESULTS\n\nTRAIN ACCURACY averaged: {:.4f} with s.d. {:.4f}"""\
          .format(np.mean(np.array(avg_train_acc))*100, np.std(avg_train_acc)*100))
    print("\nAll splits Train Accuracies:\n", avg_train_acc)

    writer.close()

    """
        Write the results in out/results folder
    """
    with open(write_file_name + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
    FINAL RESULTS\nTEST ACCURACY averaged: {:.4f} with s.d. {:.4f}\nTRAIN ACCURACY averaged: {:.4f} with s.d. {:.4f}\n\n
    Average Convergence Time (Epochs): {:.4f} with s.d. {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\nAll Splits Test Accuracies: {}"""\
          .format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
                  np.mean(np.array(avg_test_acc))*100, np.std(avg_test_acc)*100,
                  np.mean(np.array(avg_train_acc))*100, np.std(avg_train_acc)*100,
                  np.mean(avg_convergence_epochs), np.std(avg_convergence_epochs),
               (time.time()-t0)/3600, np.mean(per_epoch_time), avg_test_acc))

In [9]:
def main(notebook_mode=False,config=None):
    
    """
        USER CONTROLS
    """
    
    # terminal mode
    if notebook_mode==False:
        
        parser = argparse.ArgumentParser()
        parser.add_argument('--config', help="Please give a config.json file with training/model/data/param details")
        parser.add_argument('--gpu_id', help="Please give a value for gpu id")
        parser.add_argument('--model', help="Please give a value for model name")
        parser.add_argument('--dataset', help="Please give a value for dataset name")
        parser.add_argument('--out_dir', help="Please give a value for out_dir")
        parser.add_argument('--seed', help="Please give a value for seed")
        parser.add_argument('--epochs', help="Please give a value for epochs")
        parser.add_argument('--batch_size', help="Please give a value for batch_size")
        parser.add_argument('--init_lr', help="Please give a value for init_lr")
        parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor")
        parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience")
        parser.add_argument('--min_lr', help="Please give a value for min_lr")
        parser.add_argument('--weight_decay', help="Please give a value for weight_decay")
        parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval")    
        parser.add_argument('--L', help="Please give a value for L")
        parser.add_argument('--hidden_dim', help="Please give a value for hidden_dim")
        parser.add_argument('--out_dim', help="Please give a value for out_dim")
        parser.add_argument('--residual', help="Please give a value for residual")
        parser.add_argument('--edge_feat', help="Please give a value for edge_feat")
        parser.add_argument('--readout', help="Please give a value for readout")
        parser.add_argument('--kernel', help="Please give a value for kernel")
        parser.add_argument('--n_heads', help="Please give a value for n_heads")
        parser.add_argument('--gated', help="Please give a value for gated")
        parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout")
        parser.add_argument('--dropout', help="Please give a value for dropout")
        parser.add_argument('--layer_norm', help="Please give a value for layer_norm")
        parser.add_argument('--batch_norm', help="Please give a value for batch_norm")
        parser.add_argument('--sage_aggregator', help="Please give a value for sage_aggregator")
        parser.add_argument('--data_mode', help="Please give a value for data_mode")
        parser.add_argument('--num_pool', help="Please give a value for num_pool")
        parser.add_argument('--gnn_per_block', help="Please give a value for gnn_per_block")
        parser.add_argument('--embedding_dim', help="Please give a value for embedding_dim")
        parser.add_argument('--pool_ratio', help="Please give a value for pool_ratio")
        parser.add_argument('--linkpred', help="Please give a value for linkpred")
        parser.add_argument('--cat', help="Please give a value for cat")
        parser.add_argument('--self_loop', help="Please give a value for self_loop")
        parser.add_argument('--max_time', help="Please give a value for max_time")
        args = parser.parse_args()
        
        with open(args.config) as f:
            config = json.load(f)
            

        # device
        if args.gpu_id is not None:
            config['gpu']['id'] = int(args.gpu_id)
            config['gpu']['use'] = True
        device = gpu_setup(config['gpu']['use'], config['gpu']['id'])

        # model, dataset, out_dir
        if args.model is not None:
            MODEL_NAME = args.model
        else:
            MODEL_NAME = config['model']
        if args.dataset is not None:
            DATASET_NAME = args.dataset
        else:
            DATASET_NAME = config['dataset']
        dataset = LoadData(DATASET_NAME)
        if args.out_dir is not None:
            out_dir = args.out_dir
        else:
            out_dir = config['out_dir']

        # parameters
        params = config['params']
        if args.seed is not None:
            params['seed'] = int(args.seed)
        if args.epochs is not None:
            params['epochs'] = int(args.epochs)
        if args.batch_size is not None:
            params['batch_size'] = int(args.batch_size)
        if args.init_lr is not None:
            params['init_lr'] = float(args.init_lr)
        if args.lr_reduce_factor is not None:
            params['lr_reduce_factor'] = float(args.lr_reduce_factor)
        if args.lr_schedule_patience is not None:
            params['lr_schedule_patience'] = int(args.lr_schedule_patience)
        if args.min_lr is not None:
            params['min_lr'] = float(args.min_lr)
        if args.weight_decay is not None:
            params['weight_decay'] = float(args.weight_decay)
        if args.print_epoch_interval is not None:
            params['print_epoch_interval'] = int(args.print_epoch_interval)
        if args.max_time is not None:
            params['max_time'] = float(args.max_time)

        # network parameters
        net_params = config['net_params']
        net_params['device'] = device
        net_params['gpu_id'] = config['gpu']['id']
        net_params['batch_size'] = params['batch_size']
        if args.L is not None:
            net_params['L'] = int(args.L)
        if args.hidden_dim is not None:
            net_params['hidden_dim'] = int(args.hidden_dim)
        if args.out_dim is not None:
            net_params['out_dim'] = int(args.out_dim)   
        if args.residual is not None:
            net_params['residual'] = True if args.residual=='True' else False
        if args.edge_feat is not None:
            net_params['edge_feat'] = True if args.edge_feat=='True' else False
        if args.readout is not None:
            net_params['readout'] = args.readout
        if args.kernel is not None:
            net_params['kernel'] = int(args.kernel)
        if args.n_heads is not None:
            net_params['n_heads'] = int(args.n_heads)
        if args.gated is not None:
            net_params['gated'] = True if args.gated=='True' else False
        if args.in_feat_dropout is not None:
            net_params['in_feat_dropout'] = float(args.in_feat_dropout)
        if args.dropout is not None:
            net_params['dropout'] = float(args.dropout)
        if args.layer_norm is not None:
            net_params['layer_norm'] = True if args.layer_norm=='True' else False
        if args.batch_norm is not None:
            net_params['batch_norm'] = True if args.batch_norm=='True' else False
        if args.sage_aggregator is not None:
            net_params['sage_aggregator'] = args.sage_aggregator
        if args.data_mode is not None:
            net_params['data_mode'] = args.data_mode
        if args.num_pool is not None:
            net_params['num_pool'] = int(args.num_pool)
        if args.gnn_per_block is not None:
            net_params['gnn_per_block'] = int(args.gnn_per_block)
        if args.embedding_dim is not None:
            net_params['embedding_dim'] = int(args.embedding_dim)
        if args.pool_ratio is not None:
            net_params['pool_ratio'] = float(args.pool_ratio)
        if args.linkpred is not None:
            net_params['linkpred'] = True if args.linkpred=='True' else False
        if args.cat is not None:
            net_params['cat'] = True if args.cat=='True' else False
        if args.self_loop is not None:
            net_params['self_loop'] = True if args.self_loop=='True' else False

            
    # notebook mode
    if notebook_mode:
        
        # parameters
        params = config['params']
        
        # dataset
        DATASET_NAME = config['dataset']
        dataset = LoadData(DATASET_NAME)
        
        # device
        device = gpu_setup(config['gpu']['use'], config['gpu']['id'])
        out_dir = config['out_dir']
        
        # GNN model
        MODEL_NAME = config['model']
        
        # network parameters
        net_params = config['net_params']
        net_params['device'] = device
        net_params['gpu_id'] = config['gpu']['id']
        net_params['batch_size'] = params['batch_size']
      
    
    # TUs
    net_params['in_dim'] = dataset.all.graph_lists[0].ndata['feat'][0].shape[0]
    num_classes = len(np.unique(dataset.all.graph_labels))
    net_params['n_classes'] = num_classes
    
    if MODEL_NAME == 'DiffPool':
        # calculate assignment dimension: pool_ratio * largest graph's maximum
        # number of nodes  in the dataset
        num_nodes = [dataset.all[i][0].number_of_nodes() for i in range(len(dataset.all))]
        max_num_node = max(num_nodes)
        net_params['assign_dim'] = int(max_num_node * net_params['pool_ratio']) * net_params['batch_size']
        
    if MODEL_NAME == 'RingGNN':
        num_nodes = [dataset.all[i][0].number_of_nodes() for i in range(len(dataset.all))]
        net_params['avg_node_num'] = int(np.ceil(np.mean(num_nodes)))
    
    root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file

    if not os.path.exists(out_dir + 'results'):
        os.makedirs(out_dir + 'results')
        
    if not os.path.exists(out_dir + 'configs'):
        os.makedirs(out_dir + 'configs')

    net_params['total_param'] = view_model_param(MODEL_NAME, net_params)
    train_val_pipeline(MODEL_NAME, DATASET_NAME, params, net_params, dirs)

if notebook_mode==True:
    
    config = {}
    # gpu config
    gpu = {}
    gpu['use'] = use_gpu
    gpu['id'] = gpu_id
    config['gpu'] = gpu
    # GNN model, dataset, out_dir
    config['model'] = MODEL_NAME
    config['dataset'] = DATASET_NAME
    config['out_dir'] = out_dir
    # parameters
    params = {}
    params['seed'] = seed
    params['epochs'] = epochs
    params['batch_size'] = batch_size
    params['init_lr'] = init_lr
    params['lr_reduce_factor'] = lr_reduce_factor 
    params['lr_schedule_patience'] = lr_schedule_patience
    params['min_lr'] = min_lr
    params['weight_decay'] = weight_decay
    params['print_epoch_interval'] = 5
    params['max_time'] = max_time
    config['params'] = params
    # network parameters
    config['net_params'] = net_params
    
    # convert to .py format
    from utils.cleaner_main import *
    cleaner_main('main_TUs_graph_classification')
    
    main(True,config)
    
else:
    
    main()
    

Convert main_TUs_graph_classification.ipynb to main_TUs_graph_classification.py


[NbConvertApp] Converting notebook main_TUs_graph_classification.ipynb to script
[NbConvertApp] Writing 29814 bytes to main_TUs_graph_classification.py


Clean main_TUs_graph_classification.py
Done. 
[!] Dataset:  ENZYMES
Time taken: 0.8533s
cuda not available
MODEL DETAILS:

MODEL/Total parameters: GATTop 101922
[!] Dataset:  ENZYMES
Time taken: 0.5560s
RUN NUMBER:  0
Training Graphs:  480
Validation Graphs:  60
Test Graphs:  60
Number of Classes:  6
True
1.0


Epoch 81:   8%|▍     | 81/1000 [09:26<2:08:40,  8.40s/it, lr=0.001, test_acc=0.55, time=7.66, train_acc=0.94, train_loss=0.192, val_acc=0.567, val_loss=1.72]

Epoch 00081: reducing learning rate of group 0 to 5.0000e-04.


Epoch 107:  11%| | 107/1000 [12:28<1:26:29,  5.81s/it, lr=0.0005, test_acc=0.65, time=4.53, train_acc=0.998, train_loss=0.0227, val_acc=0.717, val_loss=1.67]

Epoch 00107: reducing learning rate of group 0 to 2.5000e-04.


Epoch 133:  13%|▏| 133/1000 [15:24<1:57:03,  8.10s/it, lr=0.00025, test_acc=0.65, time=9.36, train_acc=0.998, train_loss=0.0131, val_acc=0.667, val_loss=1.73

Epoch 00133: reducing learning rate of group 0 to 1.2500e-04.


Epoch 159:  16%|▍  | 159/1000 [18:50<1:36:16,  6.87s/it, lr=0.000125, test_acc=0.6, time=5.85, train_acc=1, train_loss=0.00656, val_acc=0.683, val_loss=1.78]

Epoch 00159: reducing learning rate of group 0 to 6.2500e-05.


Epoch 185:  18%|▎ | 185/1000 [22:27<2:05:28,  9.24s/it, lr=6.25e-5, test_acc=0.6, time=10.3, train_acc=0.998, train_loss=0.00846, val_acc=0.7, val_loss=1.78]

Epoch 00185: reducing learning rate of group 0 to 3.1250e-05.


Epoch 211:  21%|█▋      | 211/1000 [25:53<1:32:40,  7.05s/it, lr=3.13e-5, test_acc=0.6, time=5.6, train_acc=1, train_loss=0.0059, val_acc=0.7, val_loss=1.78]

Epoch 00211: reducing learning rate of group 0 to 1.5625e-05.


Epoch 237:  24%|▏| 237/1000 [29:39<2:26:55, 11.55s/it, lr=1.56e-5, test_acc=0.6, time=14.9, train_acc=0.998, train_loss=0.00729, val_acc=0.717, val_loss=1.8]

Epoch 00237: reducing learning rate of group 0 to 7.8125e-06.


Epoch 263:  26%|██▎      | 263/1000 [33:30<1:42:40,  8.36s/it, lr=7.81e-6, test_acc=0.6, time=9.48, train_acc=1, train_loss=0.008, val_acc=0.7, val_loss=1.8]

Epoch 00263: reducing learning rate of group 0 to 3.9063e-06.


Epoch 289:  29%|█▋    | 289/1000 [37:18<2:20:04, 11.82s/it, lr=3.91e-6, test_acc=0.6, time=13.6, train_acc=1, train_loss=0.00403, val_acc=0.7, val_loss=1.82]

Epoch 00289: reducing learning rate of group 0 to 1.9531e-06.


Epoch 314:  31%|█▉    | 314/1000 [41:16<1:30:09,  7.89s/it, lr=1.95e-6, test_acc=0.6, time=11.8, train_acc=1, train_loss=0.00345, val_acc=0.7, val_loss=1.81]


Epoch 00315: reducing learning rate of group 0 to 9.7656e-07.

!! LR EQUAL TO MIN LR SET.
Test Accuracy [LAST EPOCH]: 0.6000
Train Accuracy [LAST EPOCH]: 1.0000
Convergence Time (Epochs): 314.0000
RUN NUMBER:  1
Training Graphs:  480
Validation Graphs:  60
Test Graphs:  60
Number of Classes:  6
True
1.0


Epoch 67:   7%|▎    | 67/1000 [10:25<2:26:38,  9.43s/it, lr=0.001, test_acc=0.617, time=9.1, train_acc=0.908, train_loss=0.253, val_acc=0.617, val_loss=1.93]

Epoch 00067: reducing learning rate of group 0 to 5.0000e-04.


Epoch 93:   9%|▎  | 93/1000 [13:58<1:37:02,  6.42s/it, lr=0.0005, test_acc=0.733, time=4.94, train_acc=0.996, train_loss=0.054, val_acc=0.683, val_loss=1.85]

Epoch 00093: reducing learning rate of group 0 to 2.5000e-04.


Epoch 119:  12%|▍   | 119/1000 [18:04<2:22:11,  9.68s/it, lr=0.00025, test_acc=0.717, time=11.5, train_acc=1, train_loss=0.028, val_acc=0.683, val_loss=1.84]

Epoch 00119: reducing learning rate of group 0 to 1.2500e-04.


Epoch 145:  14%|▏| 145/1000 [22:04<2:03:35,  8.67s/it, lr=0.000125, test_acc=0.733, time=7.47, train_acc=0.998, train_loss=0.016, val_acc=0.7, val_loss=1.86]

Epoch 00145: reducing learning rate of group 0 to 6.2500e-05.


Epoch 171:  17%|█▎      | 171/1000 [26:08<2:21:02, 10.21s/it, lr=6.25e-5, test_acc=0.733, time=11, train_acc=1, train_loss=0.0114, val_acc=0.7, val_loss=1.9]

Epoch 00171: reducing learning rate of group 0 to 3.1250e-05.


Epoch 197:  20%|▉    | 197/1000 [30:21<2:13:00,  9.94s/it, lr=3.13e-5, test_acc=0.733, time=12.1, train_acc=1, train_loss=0.0114, val_acc=0.7, val_loss=1.98]

Epoch 00197: reducing learning rate of group 0 to 1.5625e-05.


Epoch 223:  22%|▉   | 223/1000 [34:39<2:09:38, 10.01s/it, lr=1.56e-5, test_acc=0.733, time=11, train_acc=1, train_loss=0.00653, val_acc=0.683, val_loss=1.95]

Epoch 00223: reducing learning rate of group 0 to 7.8125e-06.


Epoch 249:  25%|▉   | 249/1000 [38:16<1:46:51,  8.54s/it, lr=7.81e-6, test_acc=0.733, time=9.35, train_acc=1, train_loss=0.00726, val_acc=0.7, val_loss=1.97]

Epoch 00249: reducing learning rate of group 0 to 3.9063e-06.


Epoch 275:  28%|█▋    | 275/1000 [42:17<1:46:05,  8.78s/it, lr=3.91e-6, test_acc=0.717, time=7.71, train_acc=1, train_loss=0.0123, val_acc=0.683, val_loss=2]

Epoch 00275: reducing learning rate of group 0 to 1.9531e-06.


Epoch 300:  30%|█▏  | 300/1000 [46:12<1:47:49,  9.24s/it, lr=1.95e-6, test_acc=0.733, time=10.5, train_acc=1, train_loss=0.011, val_acc=0.683, val_loss=1.96]


Epoch 00301: reducing learning rate of group 0 to 9.7656e-07.

!! LR EQUAL TO MIN LR SET.
Test Accuracy [LAST EPOCH]: 0.7333
Train Accuracy [LAST EPOCH]: 1.0000
Convergence Time (Epochs): 300.0000
RUN NUMBER:  2
Training Graphs:  480
Validation Graphs:  60
Test Graphs:  60
Number of Classes:  6
True
1.0


Epoch 62:   6%|▍      | 62/1000 [09:36<2:14:39,  8.61s/it, lr=0.001, test_acc=0.45, time=7.45, train_acc=0.863, train_loss=0.399, val_acc=0.5, val_loss=2.79]

Epoch 00062: reducing learning rate of group 0 to 5.0000e-04.


Epoch 88:   9%|▎  | 88/1000 [13:39<2:15:15,  8.90s/it, lr=0.0005, test_acc=0.667, time=7.76, train_acc=0.981, train_loss=0.0827, val_acc=0.55, val_loss=2.47]

Epoch 00088: reducing learning rate of group 0 to 2.5000e-04.


Epoch 114:  11%| | 114/1000 [17:17<1:30:48,  6.15s/it, lr=0.00025, test_acc=0.75, time=4.92, train_acc=0.996, train_loss=0.0329, val_acc=0.583, val_loss=2.53

Epoch 00114: reducing learning rate of group 0 to 1.2500e-04.


Epoch 140:  14%|▎ | 140/1000 [20:12<1:41:00,  7.05s/it, lr=0.000125, test_acc=0.7, time=5.32, train_acc=0.994, train_loss=0.0253, val_acc=0.6, val_loss=2.81]

Epoch 00140: reducing learning rate of group 0 to 6.2500e-05.


Epoch 166:  17%|▏| 166/1000 [22:57<1:15:38,  5.44s/it, lr=6.25e-5, test_acc=0.733, time=5.05, train_acc=0.998, train_loss=0.0236, val_acc=0.633, val_loss=2.5

Epoch 00166: reducing learning rate of group 0 to 3.1250e-05.


Epoch 192:  19%|▏| 192/1000 [25:40<1:26:53,  6.45s/it, lr=3.13e-5, test_acc=0.733, time=7.21, train_acc=0.996, train_loss=0.0162, val_acc=0.65, val_loss=2.74

Epoch 00192: reducing learning rate of group 0 to 1.5625e-05.


Epoch 218:  22%|█    | 218/1000 [29:25<1:56:06,  8.91s/it, lr=1.56e-5, test_acc=0.733, time=6.25, train_acc=1, train_loss=0.0105, val_acc=0.65, val_loss=2.9]

Epoch 00218: reducing learning rate of group 0 to 7.8125e-06.


Epoch 244:  24%|▋  | 244/1000 [33:52<2:10:24, 10.35s/it, lr=7.81e-6, test_acc=0.733, time=9.48, train_acc=1, train_loss=0.0153, val_acc=0.633, val_loss=2.82]

Epoch 00244: reducing learning rate of group 0 to 3.9063e-06.


Epoch 270:  27%|█▎   | 270/1000 [37:08<1:29:29,  7.36s/it, lr=3.91e-6, test_acc=0.733, time=8.8, train_acc=1, train_loss=0.0139, val_acc=0.65, val_loss=2.72]

Epoch 00270: reducing learning rate of group 0 to 1.9531e-06.


Epoch 295:  30%|▎| 295/1000 [40:03<1:35:45,  8.15s/it, lr=1.95e-6, test_acc=0.733, time=6.65, train_acc=0.998, train_loss=0.0171, val_acc=0.65, val_loss=2.75


Epoch 00296: reducing learning rate of group 0 to 9.7656e-07.

!! LR EQUAL TO MIN LR SET.
Test Accuracy [LAST EPOCH]: 0.7333
Train Accuracy [LAST EPOCH]: 1.0000
Convergence Time (Epochs): 295.0000
RUN NUMBER:  3
Training Graphs:  480
Validation Graphs:  60
Test Graphs:  60
Number of Classes:  6
True
1.0


Epoch 93:   9%|▋       | 93/1000 [10:36<1:52:25,  7.44s/it, lr=0.001, test_acc=0.6, time=7.31, train_acc=0.96, train_loss=0.13, val_acc=0.517, val_loss=2.16]

Epoch 00093: reducing learning rate of group 0 to 5.0000e-04.


Epoch 126:  13%|▏| 126/1000 [14:21<1:25:55,  5.90s/it, lr=0.0005, test_acc=0.717, time=5.35, train_acc=0.992, train_loss=0.0207, val_acc=0.65, val_loss=1.45]

Epoch 00126: reducing learning rate of group 0 to 2.5000e-04.


Epoch 152:  15%|▊    | 152/1000 [17:07<1:30:47,  6.42s/it, lr=0.00025, test_acc=0.717, time=7, train_acc=1, train_loss=0.00301, val_acc=0.717, val_loss=1.36]

Epoch 00152: reducing learning rate of group 0 to 1.2500e-04.


Epoch 178:  18%|▏| 178/1000 [20:04<1:28:26,  6.46s/it, lr=0.000125, test_acc=0.717, time=6.73, train_acc=1, train_loss=0.00372, val_acc=0.717, val_loss=1.37]

Epoch 00178: reducing learning rate of group 0 to 6.2500e-05.


Epoch 204:  20%|▊   | 204/1000 [22:48<1:28:44,  6.69s/it, lr=6.25e-5, test_acc=0.717, time=8.27, train_acc=1, train_loss=0.00212, val_acc=0.7, val_loss=1.39]

Epoch 00204: reducing learning rate of group 0 to 3.1250e-05.


Epoch 230:  23%|▍ | 230/1000 [25:33<1:17:23,  6.03s/it, lr=3.13e-5, test_acc=0.717, time=5.45, train_acc=1, train_loss=0.00271, val_acc=0.733, val_loss=1.37]

Epoch 00230: reducing learning rate of group 0 to 1.5625e-05.


Epoch 256:  26%|▌ | 256/1000 [28:05<1:19:36,  6.42s/it, lr=1.56e-5, test_acc=0.717, time=7.46, train_acc=1, train_loss=0.00105, val_acc=0.717, val_loss=1.37]

Epoch 00256: reducing learning rate of group 0 to 7.8125e-06.


Epoch 282:  28%|▌ | 282/1000 [31:08<1:12:13,  6.04s/it, lr=7.81e-6, test_acc=0.717, time=5.49, train_acc=1, train_loss=0.00219, val_acc=0.733, val_loss=1.37]

Epoch 00282: reducing learning rate of group 0 to 3.9063e-06.


Epoch 308:  31%|█▌   | 308/1000 [34:35<1:41:42,  8.82s/it, lr=3.91e-6, test_acc=0.717, time=9.27, train_acc=1, train_loss=0.00291, val_acc=0.7, val_loss=1.4]

Epoch 00308: reducing learning rate of group 0 to 1.9531e-06.


Epoch 333:  33%|▋ | 333/1000 [37:55<1:15:57,  6.83s/it, lr=1.95e-6, test_acc=0.717, time=5.61, train_acc=1, train_loss=0.00179, val_acc=0.717, val_loss=1.41]


Epoch 00334: reducing learning rate of group 0 to 9.7656e-07.

!! LR EQUAL TO MIN LR SET.
Test Accuracy [LAST EPOCH]: 0.7167
Train Accuracy [LAST EPOCH]: 1.0000
Convergence Time (Epochs): 333.0000
RUN NUMBER:  4
Training Graphs:  480
Validation Graphs:  60
Test Graphs:  60
Number of Classes:  6
True
1.0


Epoch 54:   5%|▎     | 54/1000 [05:56<1:55:05,  7.30s/it, lr=0.001, test_acc=0.517, time=7.82, train_acc=0.873, train_loss=0.403, val_acc=0.5, val_loss=2.08]

Epoch 00054: reducing learning rate of group 0 to 5.0000e-04.


Epoch 90:   9%|▎  | 90/1000 [10:02<1:39:43,  6.58s/it, lr=0.0005, test_acc=0.617, time=6.74, train_acc=0.992, train_loss=0.054, val_acc=0.617, val_loss=1.61]

Epoch 00090: reducing learning rate of group 0 to 2.5000e-04.


Epoch 116:  12%| | 116/1000 [13:08<1:59:27,  8.11s/it, lr=0.00025, test_acc=0.583, time=9.32, train_acc=0.983, train_loss=0.052, val_acc=0.683, val_loss=1.59

Epoch 00116: reducing learning rate of group 0 to 1.2500e-04.


Epoch 142:  14%|▏| 142/1000 [16:44<2:04:13,  8.69s/it, lr=0.000125, test_acc=0.6, time=8.42, train_acc=0.998, train_loss=0.0138, val_acc=0.633, val_loss=1.74

Epoch 00142: reducing learning rate of group 0 to 6.2500e-05.


Epoch 168:  17%|▏| 168/1000 [19:43<1:24:18,  6.08s/it, lr=6.25e-5, test_acc=0.65, time=5.9, train_acc=0.998, train_loss=0.0123, val_acc=0.633, val_loss=1.68]

Epoch 00168: reducing learning rate of group 0 to 3.1250e-05.


Epoch 194:  19%|▊   | 194/1000 [22:35<1:25:42,  6.38s/it, lr=3.13e-5, test_acc=0.617, time=6.32, train_acc=1, train_loss=0.00889, val_acc=0.6, val_loss=1.73]

Epoch 00194: reducing learning rate of group 0 to 1.5625e-05.


Epoch 220:  22%|▏| 220/1000 [25:09<1:14:30,  5.73s/it, lr=1.56e-5, test_acc=0.617, time=5.76, train_acc=0.998, train_loss=0.0106, val_acc=0.6, val_loss=1.76]

Epoch 00220: reducing learning rate of group 0 to 7.8125e-06.


Epoch 246:  25%|▋  | 246/1000 [27:45<1:15:09,  5.98s/it, lr=7.81e-6, test_acc=0.65, time=5.77, train_acc=1, train_loss=0.00779, val_acc=0.583, val_loss=1.81]

Epoch 00246: reducing learning rate of group 0 to 3.9063e-06.


Epoch 272:  27%|▊  | 272/1000 [30:19<1:19:03,  6.52s/it, lr=3.91e-6, test_acc=0.65, time=5.72, train_acc=1, train_loss=0.00516, val_acc=0.617, val_loss=1.71]

Epoch 00272: reducing learning rate of group 0 to 1.9531e-06.


Epoch 297:  30%|█▍   | 297/1000 [33:24<1:19:05,  6.75s/it, lr=1.95e-6, test_acc=0.65, time=7.76, train_acc=1, train_loss=0.00756, val_acc=0.6, val_loss=1.77]


Epoch 00298: reducing learning rate of group 0 to 9.7656e-07.

!! LR EQUAL TO MIN LR SET.
Test Accuracy [LAST EPOCH]: 0.6500
Train Accuracy [LAST EPOCH]: 1.0000
Convergence Time (Epochs): 297.0000
RUN NUMBER:  5
Training Graphs:  480
Validation Graphs:  60
Test Graphs:  60
Number of Classes:  6
True
1.0


Epoch 46:   5%|▍        | 46/1000 [05:50<2:22:12,  8.94s/it, lr=0.001, test_acc=0.4, time=11, train_acc=0.792, train_loss=0.57, val_acc=0.383, val_loss=2.51]

Epoch 00046: reducing learning rate of group 0 to 5.0000e-04.


Epoch 72:   7%|▎   | 72/1000 [09:25<2:30:30,  9.73s/it, lr=0.0005, test_acc=0.367, time=8.31, train_acc=0.95, train_loss=0.173, val_acc=0.433, val_loss=3.14]

Epoch 00072: reducing learning rate of group 0 to 2.5000e-04.


Epoch 98:  10%| | 98/1000 [13:22<2:04:29,  8.28s/it, lr=0.00025, test_acc=0.683, time=7.02, train_acc=0.992, train_loss=0.0655, val_acc=0.683, val_loss=2.04]

Epoch 00098: reducing learning rate of group 0 to 1.2500e-04.


Epoch 124:  12%|▍   | 124/1000 [17:15<2:08:32,  8.80s/it, lr=0.000125, test_acc=0.667, time=8.72, train_acc=1, train_loss=0.039, val_acc=0.65, val_loss=2.18]

Epoch 00124: reducing learning rate of group 0 to 6.2500e-05.


Epoch 150:  15%|▎ | 150/1000 [21:10<1:58:13,  8.34s/it, lr=6.25e-5, test_acc=0.7, time=6.26, train_acc=0.983, train_loss=0.0519, val_acc=0.65, val_loss=2.14]

Epoch 00150: reducing learning rate of group 0 to 3.1250e-05.


Epoch 176:  18%|▏| 176/1000 [23:57<1:27:35,  6.38s/it, lr=3.13e-5, test_acc=0.667, time=7.51, train_acc=0.996, train_loss=0.0346, val_acc=0.65, val_loss=2.14

Epoch 00176: reducing learning rate of group 0 to 1.5625e-05.


Epoch 202:  20%|█    | 202/1000 [26:48<1:42:21,  7.70s/it, lr=1.56e-5, test_acc=0.7, time=7.23, train_acc=1, train_loss=0.0273, val_acc=0.617, val_loss=2.17]

Epoch 00202: reducing learning rate of group 0 to 7.8125e-06.


Epoch 228:  23%|▉   | 228/1000 [29:48<1:16:24,  5.94s/it, lr=7.81e-6, test_acc=0.683, time=5.94, train_acc=1, train_loss=0.022, val_acc=0.633, val_loss=2.23]

Epoch 00228: reducing learning rate of group 0 to 3.9063e-06.


Epoch 254:  25%|█▊     | 254/1000 [32:34<1:24:38,  6.81s/it, lr=3.91e-6, test_acc=0.683, time=7, train_acc=1, train_loss=0.0218, val_acc=0.65, val_loss=2.16]

Epoch 00254: reducing learning rate of group 0 to 1.9531e-06.


Epoch 279:  28%|▎| 279/1000 [35:17<1:31:11,  7.59s/it, lr=1.95e-6, test_acc=0.683, time=8.68, train_acc=0.998, train_loss=0.0294, val_acc=0.65, val_loss=2.21


Epoch 00280: reducing learning rate of group 0 to 9.7656e-07.

!! LR EQUAL TO MIN LR SET.
Test Accuracy [LAST EPOCH]: 0.6833
Train Accuracy [LAST EPOCH]: 1.0000
Convergence Time (Epochs): 279.0000
RUN NUMBER:  6
Training Graphs:  480
Validation Graphs:  60
Test Graphs:  60
Number of Classes:  6
True
1.0


Epoch 47:   5%|▎     | 47/1000 [05:47<1:50:41,  6.97s/it, lr=0.001, test_acc=0.45, time=6.14, train_acc=0.821, train_loss=0.55, val_acc=0.333, val_loss=2.94]

Epoch 00047: reducing learning rate of group 0 to 5.0000e-04.


Epoch 73:   7%|▎    | 73/1000 [09:05<1:49:48,  7.11s/it, lr=0.0005, test_acc=0.6, time=6.56, train_acc=0.938, train_loss=0.193, val_acc=0.467, val_loss=3.35]

Epoch 00073: reducing learning rate of group 0 to 2.5000e-04.


Epoch 99:  10%|▏ | 99/1000 [11:49<1:35:37,  6.37s/it, lr=0.00025, test_acc=0.667, time=6.7, train_acc=0.988, train_loss=0.0602, val_acc=0.583, val_loss=2.89]

Epoch 00099: reducing learning rate of group 0 to 1.2500e-04.


Epoch 125:  12%|▏| 125/1000 [14:33<1:27:59,  6.03s/it, lr=0.000125, test_acc=0.717, time=5.2, train_acc=0.996, train_loss=0.045, val_acc=0.617, val_loss=2.98

Epoch 00125: reducing learning rate of group 0 to 6.2500e-05.


Epoch 151:  15%|▊    | 151/1000 [17:09<1:23:05,  5.87s/it, lr=6.25e-5, test_acc=0.733, time=5.3, train_acc=1, train_loss=0.031, val_acc=0.633, val_loss=3.11]

Epoch 00151: reducing learning rate of group 0 to 3.1250e-05.


Epoch 177:  18%|▏| 177/1000 [19:55<1:22:36,  6.02s/it, lr=3.13e-5, test_acc=0.717, time=5.51, train_acc=0.998, train_loss=0.0197, val_acc=0.617, val_loss=3.1

Epoch 00177: reducing learning rate of group 0 to 1.5625e-05.


Epoch 203:  20%|▏| 203/1000 [22:31<1:22:55,  6.24s/it, lr=1.56e-5, test_acc=0.733, time=7.03, train_acc=0.992, train_loss=0.0395, val_acc=0.633, val_loss=3.1

Epoch 00203: reducing learning rate of group 0 to 7.8125e-06.


Epoch 229:  23%|█▏   | 229/1000 [25:09<1:16:07,  5.92s/it, lr=7.81e-6, test_acc=0.733, time=5.8, train_acc=1, train_loss=0.0181, val_acc=0.633, val_loss=3.2]

Epoch 00229: reducing learning rate of group 0 to 3.9063e-06.


Epoch 255:  26%|▎| 255/1000 [28:16<1:44:01,  8.38s/it, lr=3.91e-6, test_acc=0.733, time=8.2, train_acc=0.996, train_loss=0.0332, val_acc=0.633, val_loss=3.18

Epoch 00255: reducing learning rate of group 0 to 1.9531e-06.


Epoch 280:  28%|▊  | 280/1000 [31:08<1:20:03,  6.67s/it, lr=1.95e-6, test_acc=0.733, time=5.85, train_acc=1, train_loss=0.0231, val_acc=0.633, val_loss=3.14]


Epoch 00281: reducing learning rate of group 0 to 9.7656e-07.

!! LR EQUAL TO MIN LR SET.
Test Accuracy [LAST EPOCH]: 0.7333
Train Accuracy [LAST EPOCH]: 1.0000
Convergence Time (Epochs): 280.0000
RUN NUMBER:  7
Training Graphs:  480
Validation Graphs:  60
Test Graphs:  60
Number of Classes:  6
True
1.0


Epoch 110:  11%|▎  | 110/1000 [12:39<1:28:53,  5.99s/it, lr=0.001, test_acc=0.733, time=4.97, train_acc=0.99, train_loss=0.0218, val_acc=0.75, val_loss=1.53]

Epoch 00110: reducing learning rate of group 0 to 5.0000e-04.


Epoch 136:  14%|▏| 136/1000 [15:24<1:26:59,  6.04s/it, lr=0.0005, test_acc=0.75, time=5.53, train_acc=0.998, train_loss=0.00738, val_acc=0.783, val_loss=1.43

Epoch 00136: reducing learning rate of group 0 to 2.5000e-04.


Epoch 162:  16%|▎ | 162/1000 [18:47<2:07:22,  9.12s/it, lr=0.00025, test_acc=0.733, time=8.89, train_acc=1, train_loss=0.00537, val_acc=0.767, val_loss=1.52]

Epoch 00162: reducing learning rate of group 0 to 1.2500e-04.


Epoch 188:  19%|▍ | 188/1000 [22:04<1:36:12,  7.11s/it, lr=0.000125, test_acc=0.75, time=7.76, train_acc=1, train_loss=0.00169, val_acc=0.783, val_loss=1.57]

Epoch 00188: reducing learning rate of group 0 to 6.2500e-05.


Epoch 214:  21%|▏| 214/1000 [24:46<1:19:07,  6.04s/it, lr=6.25e-5, test_acc=0.733, time=5.43, train_acc=0.998, train_loss=0.00522, val_acc=0.8, val_loss=1.56

Epoch 00214: reducing learning rate of group 0 to 3.1250e-05.


Epoch 240:  24%|▉   | 240/1000 [27:30<1:23:20,  6.58s/it, lr=3.13e-5, test_acc=0.75, time=6.89, train_acc=1, train_loss=0.00289, val_acc=0.767, val_loss=1.6]

Epoch 00240: reducing learning rate of group 0 to 1.5625e-05.


Epoch 266:  27%|▌ | 266/1000 [30:21<1:36:01,  7.85s/it, lr=1.56e-5, test_acc=0.733, time=8.56, train_acc=1, train_loss=0.00139, val_acc=0.783, val_loss=1.57]

Epoch 00266: reducing learning rate of group 0 to 7.8125e-06.


Epoch 292:  29%|▉  | 292/1000 [33:44<1:12:49,  6.17s/it, lr=7.81e-6, test_acc=0.75, time=5.83, train_acc=1, train_loss=0.00149, val_acc=0.783, val_loss=1.59]

Epoch 00292: reducing learning rate of group 0 to 3.9063e-06.


Epoch 318:  32%|▉  | 318/1000 [36:58<1:16:25,  6.72s/it, lr=3.91e-6, test_acc=0.733, time=7.97, train_acc=1, train_loss=0.00153, val_acc=0.75, val_loss=1.58]

Epoch 00318: reducing learning rate of group 0 to 1.9531e-06.


Epoch 343:  34%|█  | 343/1000 [40:36<1:17:47,  7.10s/it, lr=1.95e-6, test_acc=0.733, time=8.51, train_acc=1, train_loss=0.00163, val_acc=0.767, val_loss=1.6]


Epoch 00344: reducing learning rate of group 0 to 9.7656e-07.

!! LR EQUAL TO MIN LR SET.
Test Accuracy [LAST EPOCH]: 0.7333
Train Accuracy [LAST EPOCH]: 1.0000
Convergence Time (Epochs): 343.0000
RUN NUMBER:  8
Training Graphs:  480
Validation Graphs:  60
Test Graphs:  60
Number of Classes:  6
True
1.0


Epoch 55:   6%|▎    | 55/1000 [07:00<1:50:37,  7.02s/it, lr=0.001, test_acc=0.55, time=6.49, train_acc=0.829, train_loss=0.493, val_acc=0.483, val_loss=1.96]

Epoch 00055: reducing learning rate of group 0 to 5.0000e-04.


Epoch 81:   8%|▍    | 81/1000 [10:34<1:59:23,  7.79s/it, lr=0.0005, test_acc=0.6, time=7.43, train_acc=0.931, train_loss=0.197, val_acc=0.617, val_loss=1.78]

Epoch 00081: reducing learning rate of group 0 to 2.5000e-04.


Epoch 107:  11%| | 107/1000 [13:55<1:53:28,  7.62s/it, lr=0.00025, test_acc=0.617, time=7.84, train_acc=0.988, train_loss=0.0623, val_acc=0.717, val_loss=1.5

Epoch 00107: reducing learning rate of group 0 to 1.2500e-04.


Epoch 133:  13%|▎ | 133/1000 [17:10<1:41:29,  7.02s/it, lr=0.000125, test_acc=0.667, time=6.63, train_acc=1, train_loss=0.0192, val_acc=0.683, val_loss=1.61]

Epoch 00133: reducing learning rate of group 0 to 6.2500e-05.


Epoch 159:  16%|▏| 159/1000 [20:12<1:43:18,  7.37s/it, lr=6.25e-5, test_acc=0.6, time=7.97, train_acc=0.998, train_loss=0.0241, val_acc=0.633, val_loss=1.67]

Epoch 00159: reducing learning rate of group 0 to 3.1250e-05.


Epoch 185:  18%|▉    | 185/1000 [23:12<1:30:53,  6.69s/it, lr=3.13e-5, test_acc=0.6, time=7.22, train_acc=1, train_loss=0.0184, val_acc=0.667, val_loss=1.64]

Epoch 00185: reducing learning rate of group 0 to 1.5625e-05.


Epoch 211:  21%|▏| 211/1000 [26:27<1:28:18,  6.72s/it, lr=1.56e-5, test_acc=0.617, time=6.35, train_acc=0.998, train_loss=0.0189, val_acc=0.633, val_loss=1.6

Epoch 00211: reducing learning rate of group 0 to 7.8125e-06.


Epoch 237:  24%|█▏   | 237/1000 [29:42<1:42:28,  8.06s/it, lr=7.81e-6, test_acc=0.617, time=6.27, train_acc=1, train_loss=0.0167, val_acc=0.65, val_loss=1.7]

Epoch 00237: reducing learning rate of group 0 to 3.9063e-06.


Epoch 263:  26%|█   | 263/1000 [32:59<1:35:09,  7.75s/it, lr=3.91e-6, test_acc=0.617, time=8.41, train_acc=1, train_loss=0.0156, val_acc=0.65, val_loss=1.67]

Epoch 00263: reducing learning rate of group 0 to 1.9531e-06.


Epoch 288:  29%|▎| 288/1000 [36:02<1:29:07,  7.51s/it, lr=1.95e-6, test_acc=0.617, time=8.38, train_acc=0.998, train_loss=0.0205, val_acc=0.633, val_loss=1.7


Epoch 00289: reducing learning rate of group 0 to 9.7656e-07.

!! LR EQUAL TO MIN LR SET.
Test Accuracy [LAST EPOCH]: 0.6167
Train Accuracy [LAST EPOCH]: 0.9979
Convergence Time (Epochs): 288.0000
RUN NUMBER:  9
Training Graphs:  480
Validation Graphs:  60
Test Graphs:  60
Number of Classes:  6
True
1.0


Epoch 68:   7%|▍      | 68/1000 [08:33<1:41:07,  6.51s/it, lr=0.001, test_acc=0.517, time=7.27, train_acc=0.9, train_loss=0.307, val_acc=0.55, val_loss=2.03]

Epoch 00068: reducing learning rate of group 0 to 5.0000e-04.


Epoch 100:  10%|▏ | 100/1000 [11:53<1:29:05,  5.94s/it, lr=0.0005, test_acc=0.617, time=6.1, train_acc=0.992, train_loss=0.0435, val_acc=0.65, val_loss=1.55]

Epoch 00100: reducing learning rate of group 0 to 2.5000e-04.


Epoch 127:  13%|▍  | 127/1000 [14:42<1:23:09,  5.71s/it, lr=0.00025, test_acc=0.583, time=5.35, train_acc=1, train_loss=0.0115, val_acc=0.683, val_loss=1.49]

Epoch 00127: reducing learning rate of group 0 to 1.2500e-04.


Epoch 153:  15%|▏| 153/1000 [17:33<1:33:55,  6.65s/it, lr=0.000125, test_acc=0.6, time=6.18, train_acc=0.998, train_loss=0.0109, val_acc=0.683, val_loss=1.61

Epoch 00153: reducing learning rate of group 0 to 6.2500e-05.


Epoch 179:  18%|▉    | 179/1000 [20:19<1:28:31,  6.47s/it, lr=6.25e-5, test_acc=0.617, time=5.8, train_acc=1, train_loss=0.00706, val_acc=0.7, val_loss=1.57]

Epoch 00179: reducing learning rate of group 0 to 3.1250e-05.


Epoch 205:  20%|█▏    | 205/1000 [23:10<1:20:30,  6.08s/it, lr=3.13e-5, test_acc=0.6, time=6.29, train_acc=1, train_loss=0.00703, val_acc=0.7, val_loss=1.56]

Epoch 00205: reducing learning rate of group 0 to 1.5625e-05.


Epoch 257:  26%|█   | 257/1000 [28:26<1:16:56,  6.21s/it, lr=7.81e-6, test_acc=0.6, time=6.08, train_acc=1, train_loss=0.00563, val_acc=0.733, val_loss=1.56]

Epoch 00257: reducing learning rate of group 0 to 3.9063e-06.


Epoch 283:  28%|▎| 283/1000 [31:10<1:12:03,  6.03s/it, lr=3.91e-6, test_acc=0.6, time=6.3, train_acc=0.998, train_loss=0.00931, val_acc=0.733, val_loss=1.59]

Epoch 00283: reducing learning rate of group 0 to 1.9531e-06.


Epoch 308:  31%|▎| 308/1000 [33:53<1:16:08,  6.60s/it, lr=1.95e-6, test_acc=0.6, time=6.66, train_acc=0.998, train_loss=0.00828, val_acc=0.733, val_loss=1.56


Epoch 00309: reducing learning rate of group 0 to 9.7656e-07.

!! LR EQUAL TO MIN LR SET.
Test Accuracy [LAST EPOCH]: 0.6000
Train Accuracy [LAST EPOCH]: 1.0000
Convergence Time (Epochs): 308.0000
TOTAL TIME TAKEN: 6.2716hrs
AVG TIME PER EPOCH: 7.3937s
AVG CONVERGENCE Time (Epochs): 303.7000



FINAL RESULTS

TEST ACCURACY averaged: 68.0000 with s.d. 5.5176

All splits Test Accuracies:
 [0.6, 0.7333333333333333, 0.7333333333333333, 0.7166666666666667, 0.65, 0.6833333333333333, 0.7333333333333333, 0.7333333333333333, 0.6166666666666667, 0.6]



FINAL RESULTS

TRAIN ACCURACY averaged: 99.9792 with s.d. 0.0625

All splits Train Accuracies:
 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9979166666666667, 1.0]
