In [1]:
import os
import random
import warnings
warnings.filterwarnings(action='ignore')

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn

from torch_geometric.datasets import CoraFull, Planetoid, CitationFull
from torch_geometric.transforms import NormalizeFeatures
import optuna
from optuna.trial import TrialState

import torchmetrics


from models import GAT, GraphSAGE, GIN
from utils import train_model, test_model, train_constrative_model, valid_model
from mean_average_distance import MAD, MADGap
from virtualnode import VirtualClassNode_init, UnidirectionalVirtualClassNode_init, VirtualClassNode, UnidirectionalVirtualClassNode

torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [2]:
SAVE_PATH = 'results'
EARLY_STOPPING = 50

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

dataset = CitationFull(root='dataset/Cora', name='Cora', transform=NormalizeFeatures())
device

device(type='cuda', index=0)

In [3]:
data = dataset[0]
df = pd.DataFrame(data.x)
df['y'] = data.y
train, valid = train_test_split(df, stratify=df.y, test_size=0.4)
valid, test = train_test_split(valid, stratify=valid.y, test_size=0.5)
data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[train.index]=True
data.valid_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.valid_mask[valid.index]=True
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.test_mask[test.index]=True
data.to(device)

Data(x=[19793, 8710], edge_index=[2, 126842], y=[19793], train_mask=[19793], valid_mask=[19793], test_mask=[19793])

In [4]:
mad = MAD(device=device, global_flag=True)
madgap = MADGap(device, 3, 8)

In [5]:
f1score = torchmetrics.F1Score(task = "multiclass", average="micro", num_classes=dataset.num_classes).to(device)

In [6]:
tuning_result = pd.DataFrame({
                            'trial' : [],
                            'model' : [],
                            'virtualnode' : [],
                            'vnode_init' : [],
                            'temperature' : [],
                            'constrative coef' : [],
                            'gaussian_noise_scale' : [],
                            'lr' : [],
                            'train_acc' : [],
                            'train_loss' : [],
                            'val_acc' : [],
                            'val_loss' : [],
                            'test_acc' : [],
                            'macro f1' : [],
                            'micro f1'
                            'minor f1' : [],
                            'mad' : [],
                            'madgap' : []                            
                            })

tuning_result.to_csv("training_res.csv", index=False)

In [7]:
def objective(trial):

    Vnodes = trial.suggest_categorical("vnode", ["uni", "bi"])

    # vnode_init = trial.suggest_categorical("class_mean", ["random", "zero"])
    
    if Vnodes == "uni":
        vc = UnidirectionalVirtualClassNode()
        data_for_tuning = vc.forward(data)
    else:
        vc = VirtualClassNode()
        data_for_tuning = vc.forward(data)
        
    temperature = trial.suggest_float("temperature", 0.1, 10)
    constrative_coef = trial.suggest_loguniform("contrastive_loss_coef", 1e-5, 1)
    cnode_weight = trial.suggest_loguniform("cnode_weight", 1, 10)
    lr = trial.suggest_loguniform("lr", 5e-4, 1e-2)
    positive_sampling = trial.suggest_categorical("positive_sampling", [True, False])

    g_noise = trial.suggest_categorical("g_noise", [0.0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2])

    model_ = trial.suggest_categorical("model", ["sage", "gat", "gin"])
    if model_ == "sage":
        model = GraphSAGE(in_channels=dataset.num_features, hidden_channels=256, number_of_classes=dataset.num_classes, num_of_hidden_layers=4, device=device, noise_level=g_noise)
    elif model_ == "gat":
        model = GAT(in_channels=dataset.num_features, hidden_channels=476, number_of_classes=dataset.num_classes, num_of_hidden_layers=4, device=device, heads=1, noise_level=g_noise)
    elif model_ == "gin":
        model = GIN(in_channels=dataset.num_features, hidden_channels=415, number_of_classes=dataset.num_classes, num_of_hidden_layers=4, device=device, noise_level=g_noise)
    print('='*110)
    print(f'VC : {Vnodes}, lr : {lr:.5f}, temp : {temperature:.5f}, constrative coef : {constrative_coef:.5f}')
    print(f'noise : {g_noise:.4f}, cnode weight: {cnode_weight:.2f}, positive_sampling: {positive_sampling} ')
    max_loss = 10000
    early_stopping_count = 0
    print(f'Model: {model.name} | Number of parameters: {model.get_n_params()}')
    print('')
    model = model.to(device)
    data_for_tuning = data_for_tuning.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=5e-4)
    losses = []
    accs = []
    val_losses = []
    val_accs = []
    for epoch in range(2000):
        loss, acc = train_constrative_model(model, data_for_tuning, optimizer, criterion, 
                                            constrative_coef=constrative_coef, 
                                            temperature=temperature, 
                                            cnode_weight=cnode_weight,
                                            positive_sampling=positive_sampling)
        losses.append(loss.item())
        accs.append(100*acc)
        val_loss, val_acc = valid_model(model, data_for_tuning, criterion, 
                                        constrative_coef=constrative_coef, temperature=temperature, 
                                        cnode_weight=cnode_weight,
                                        positive_sampling = positive_sampling)
        val_accs.append(100*val_acc)
        if val_loss < max_loss:
            max_loss = val_loss
            early_stopping_count = 0
        else:
            early_stopping_count += 1
            if early_stopping_count > EARLY_STOPPING:
                print("Early stopping..")
                break
        if epoch%20==0:
            print(f'Epoch: {epoch:03d}, Train Loss: {loss:.4f}, Train Acc: {100*acc:.2f}, Valid Loss: {val_loss:.4f}, Valid Acc: {100*val_acc:.2f}')
        if epoch > 1000:
            if val_acc < 0.4:
                print('underfitting...')
                break
    report = test_model(model, data_for_tuning)
    result = pd.DataFrame(report).T
    result_sliced = result.iloc[:-3 if len(result) < 23 else 20, :]
    test_acc = result.loc['accuracy'][0]
    result.loc['minorities-f1',:] = result_sliced.mean(axis=0)
    result.to_csv(os.path.join(SAVE_PATH, f'{model.name}_layers{model.num_of_hidden_layers}_neurons{model.hidden_channels}'+'.csv'))
    result = model(data_for_tuning.x.to(device), data_for_tuning.edge_index.to(device))[0].cpu()
    global_mad = mad(result).item()
    mad_gap = madgap(result, data_for_tuning.edge_index).item()

    model.eval()
    out, _ = model(data_for_tuning.x, data_for_tuning.edge_index)
    pred = out.argmax(dim=-1)
    f1 = f1score(data_for_tuning.y[data_for_tuning.valid_mask], pred[data_for_tuning.valid_mask])

    exp_result_dict = {
        'trial' : trial.number,
        'model' : model.name,
        'virtualnode' : vc,
        #'vnode_init' : vnode_init,
        'temperature' : temperature,
        'constrative coef' : constrative_coef,
        'gaussian_noise_scale'  : g_noise,
        'lr' : lr,
        'train_acc' : acc,
        'train_loss' : loss,
        'val_acc' : val_acc,
        'val_loss' : val_loss,
        'test_acc' : test_acc,
        'macro f1' : pd.DataFrame(report).T.loc['macro avg', 'f1-score'],
        'micro f1' : pd.DataFrame(report).T.loc['weighted avg', 'f1-score'],
        'minor f1' : pd.DataFrame(report).T[:-3].sort_values(by='support', ascending=False)[-11:].mean()['f1-score'],
        'mad' : global_mad,
        'madgap' : mad_gap                            
    }
    
    
    tuning_result = pd.read_csv("training_res.csv")
    exp_result_dict = pd.DataFrame(exp_result_dict, index=[trial.number])
    pd.concat([tuning_result, exp_result_dict], axis=0).to_csv("training_res.csv", index=False)
    
    torch.save(model.state_dict() , "model_file/{}_{}.pt".format(model.name, str(trial.number)))
                    
    print(f'global_mad: {global_mad}')
    print(f'madgap: {mad_gap}')
    print(f'Test Acc: {100*test_acc}')
    
    print('==========================================', end='\n\n')

    return f1


In [8]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)


pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

[32m[I 2023-06-06 18:46:55,930][0m A new study created in memory with name: no-name-b81fdf8c-3802-4f5d-8e50-f0778a9a1cec[0m


VC : bi, lr : 0.00153, temp : 3.85288, constrative coef : 0.01068
noise : 0.0000, cnode weight: 5.48, positive_sampling: False 
Model: GIN | Number of parameters: 4826865

Epoch: 000, Train Loss: 42.1312, Train Acc: 0.09, Valid Loss: 46.6906, Valid Acc: 2.31
Epoch: 020, Train Loss: 4.1099, Train Acc: 45.45, Valid Loss: 123.3125, Valid Acc: 0.69
Epoch: 040, Train Loss: 2.0100, Train Acc: 66.67, Valid Loss: 84.7812, Valid Acc: 2.43
Epoch: 060, Train Loss: 1.3225, Train Acc: 88.87, Valid Loss: 3.8145, Valid Acc: 17.47
Epoch: 080, Train Loss: 0.8644, Train Acc: 90.94, Valid Loss: 0.5685, Valid Acc: 24.27
Epoch: 100, Train Loss: 0.5239, Train Acc: 94.83, Valid Loss: 0.3204, Valid Acc: 27.53
Epoch: 120, Train Loss: 0.3917, Train Acc: 95.94, Valid Loss: 0.2185, Valid Acc: 29.31
Epoch: 140, Train Loss: 0.3040, Train Acc: 97.46, Valid Loss: 0.1490, Valid Acc: 29.64
Epoch: 160, Train Loss: 0.2163, Train Acc: 98.09, Valid Loss: 0.1076, Valid Acc: 30.70
Epoch: 180, Train Loss: 0.1884, Train Acc: 9

In [None]:


print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))



In [5]:
hyperparameters = {
    'virtualnode' : [UnidirectionalVirtualClassNode(), VirtualClassNode()],
    'temperature' : np.linspace(0.1, 1, num=10),
    'constrative_coef' : np.logspace(-4, -1, 6),
    'lr': np.logspace(-4, -2, 5)
}

tuning_result = pd.DataFrame({
                            'model' : [],
                            'virtualnode' : [],
                            'temperature' : [],
                            'constrative coef' : [],
                            'lr' : [],
                            'train_acc' : [],
                            'train_loss' : [],
                            'val_acc' : [],
                            'val_loss' : [],
                            'test_acc' : [],
                            'macro f1' : [],
                            'micro f1'
                            'minor f1' : [],
                            'mad' : [],
                            'madgap' : []                            
                            })
tuning_result.to_csv('')

In [6]:
for virtualnode in hyperparameters['virtualnode']:
    if virtualnode is None:
        vc = 'None'
        data_for_tuning = data
        constrative_flag = False
    else:
        vc = virtualnode
        data_for_tuning = vc.forward(data)
        constrative_flag = True
        
    for temperature in hyperparameters['temperature']:
        for constrative_coef in hyperparameters['constrative_coef']:
            for lr in hyperparameters['lr']:
                models = [GraphSAGE(in_channels=dataset.num_features, hidden_channels=256, number_of_classes=dataset.num_classes, num_of_hidden_layers=4, device=device)]
                for model in models:
                    print(f'VC : {vc}, temp : {temperature:.5f}, constrative coef : {constrative_coef:.5f}, lr : {lr:.5f} ')
                    max_loss = 10000
                    early_stopping_count = 0
                    print(f'Model: {model.name} | Number of parameters: {model.get_n_params()}')
                    model = model.to(device)
                    data_for_tuning = data_for_tuning.to(device)
                    criterion = nn.CrossEntropyLoss()
                    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=5e-4)
                    losses = []
                    accs = []
                    val_losses = []
                    val_accs = []
                    for epoch in range(5000):
                        loss, acc = train_constrative_model(model, data_for_tuning, optimizer, criterion, 
                                                            constrative_coef=constrative_coef, temperature=temperature)
                        losses.append(loss.item())
                        accs.append(100*acc)
                        val_loss, val_acc = valid_model(model, data_for_tuning, criterion, constrative_flag=constrative_flag, 
                                                        constrative_coef=constrative_coef, temperature=temperature)
                        val_accs.append(100*val_acc)
                        if val_loss < max_loss:
                            max_loss = val_loss
                            early_stopping_count = 0
                        else:
                            early_stopping_count += 1
                            if early_stopping_count > EARLY_STOPPING:
                                print("Early stopping..")
                                break
                        if epoch%10==0:
                            print(f'Epoch: {epoch:03d}, Train Loss: {loss:.4f}, Train Acc: {100*acc:.2f}, Valid Loss: {val_loss:.4f}, Valid Acc: {100*val_acc:.2f}')
                        if epoch > 200:
                            if val_acc < 0.1:
                                print('underfitting...')
                                break
                    report = test_model(model, data_for_tuning)
                    result = pd.DataFrame(report).T
                    result_sliced = result.iloc[:-3 if len(result) < 23 else 20, :]
                    test_acc = result.loc['accuracy'][0]
                    result.loc['minorities-f1',:] = result_sliced.mean(axis=0)
                    result.to_csv(os.path.join(SAVE_PATH, f'{model.name}_layers{model.num_of_hidden_layers}_neurons{model.hidden_channels}'+'.csv'))
                    result = model(data_for_tuning.x.to(device), data_for_tuning.edge_index.to(device))[0].cpu()
                    global_mad = mad(result).item()
                    mad_gap = madgap(result, data_for_tuning.edge_index).item()
                    
                    exp_result_dict = {
                        'model' : model.name,
                        'virtualnode' : vc,
                        'temperature' : temperature,
                        'constrative coef' : constrative_coef,
                        'lr' : lr,
                        'train_acc' : acc,
                        'train_loss' : loss,
                        'val_acc' : val_acc,
                        'val_loss' : val_loss,
                        'test_acc' : test_acc,
                        'macro f1' : pd.DataFrame(report).T.loc['macro avg', 'f1-score'],
                        'micro f1' : pd.DataFrame(report).T.loc['weighted avg', 'f1-score'],
                        'minor f1' : pd.DataFrame(report).T[:-3].sort_values(by='support', ascending=False)[-11:].mean()['f1-score'],
                        'mad' : global_mad,
                        'madgap' : mad_gap                            
                    }
                    
                    tuning_result = tuning_result.append(exp_result_dict, ignore_index=True)
                    
                    print(f'global_mad: {global_mad}')
                    print(f'madgap: {mad_gap}')
                    print(f'Test Acc: {100*test_acc}')
                    
                    print('==========================================', end='\n\n')
                    del model
                    torch.cuda.empty_cache()   
                    
    del data_for_tuning     
    torch.cuda.empty_cache()        


VC : UnidirectionalVirtualClassNode(), temp : 0.10000, constrative coef : 0.00010, lr : 0.00010 
Model: GraphSAGE | Number of parameters: 4853760
Epoch: 000, Train Loss: 4.3002, Train Acc: 1.00, Valid Loss: 4.3164, Valid Acc: 0.79
Epoch: 010, Train Loss: 4.2717, Train Acc: 4.07, Valid Loss: 4.2959, Valid Acc: 3.92
Epoch: 020, Train Loss: 4.2500, Train Acc: 4.24, Valid Loss: 4.2786, Valid Acc: 4.64
Epoch: 030, Train Loss: 4.2269, Train Acc: 4.59, Valid Loss: 4.2478, Valid Acc: 4.64
Epoch: 040, Train Loss: 4.1707, Train Acc: 4.63, Valid Loss: 4.1765, Valid Acc: 4.27
Epoch: 050, Train Loss: 4.0989, Train Acc: 4.31, Valid Loss: 4.1030, Valid Acc: 4.27
Epoch: 060, Train Loss: 4.0783, Train Acc: 4.52, Valid Loss: 4.0855, Valid Acc: 4.64
Epoch: 070, Train Loss: 4.0666, Train Acc: 4.69, Valid Loss: 4.0743, Valid Acc: 4.64
Epoch: 080, Train Loss: 4.0565, Train Acc: 4.93, Valid Loss: 4.0664, Valid Acc: 4.64
Epoch: 090, Train Loss: 4.0510, Train Acc: 4.65, Valid Loss: 4.0586, Valid Acc: 4.64
Epoc