In [55]:
from copy import deepcopy

import torch
import torch.optim as optim
import torch.nn.functional as F

from dhg import Hypergraph
from dhg.data import Cooking200, CocitationCora, CoauthorshipCora, CocitationPubmed, CoauthorshipDBLP, CocitationCiteseer, News20, Yelp3k, Tencent2k
from dhg.random import set_seed
from dhg.metrics import HypergraphVertexClassificationEvaluator as Evaluator

from utils import *
from models import *
import pandas as pd

import matplotlib.pyplot as plt

In [57]:
dataset_names = ['CocitationCora', 'CocitationCiteseer', 'CoauthorshipCora', 'CoauthorshipDBLP', 'CocitationPubmed', 'Cooking200', 'Tencent2k']


for dataset in dataset_names:
    data = load_dataset(dataset)
    print(dataset)
    print(f"Number of classes : {data['num_classes']}")
    print(f"Number of nodes : {data['num_vertices']}")
    try:
        print(f"Feature size : {len(data['features'])}")    
    except AssertionError as e:
        print(f"No features for dataset: {dataset}")
    print(f"Number of edges : {data['num_edges']}")
    print('\n')

CocitationCora
Number of classes : 7
Number of nodes : 2708
Feature size : 2708
Number of edges : 1579


CocitationCiteseer
Number of classes : 6
Number of nodes : 3312
Feature size : 3312
Number of edges : 1079


CoauthorshipCora
Number of classes : 7
Number of nodes : 2708
Feature size : 2708
Number of edges : 1072


CoauthorshipDBLP
Number of classes : 6
Number of nodes : 41302
Feature size : 41302
Number of edges : 22363


CocitationPubmed
Number of classes : 3
Number of nodes : 19717
Feature size : 19717
Number of edges : 7963


Cooking200
Number of classes : 20
Number of nodes : 7403
No features for dataset: Cooking200
Number of edges : 2755


Tencent2k
Number of classes : 2
Number of nodes : 2146
Feature size : 2146
Number of edges : 6378




In [58]:
"""drop_rates = [0.05, 0.1, 0.2, 0.5, 0.8]
drop_methods = ['dropnode', 'dropedge', 'drophyperedge']
depths = [1, 2, 4, 8, 12]
epochs = 500"""
drop_rates = [0.5]
drop_methods = ['no dropout', 'dropnode', 'dropedge', 'drophyperedge']
depths = [1]
epochs = 200
lambda_smoothing = 0.5

selected_params = [{
    'depth' : depth,
    'drop_method': drop_method,
    'drop_rate' : drop_rate
} for drop_rate in drop_rates
for drop_method in drop_methods
for depth in depths]

print(len(selected_params))

4


In [None]:
set_seed(0)



model_names = ['HGNNP', 'UniSAGE', 'UniGCN', 'UniGIN', 'UniGAT', 'HNHN']
#model_names = ['UniSAGE']
dataset_names = ['CocitationCora', 'CocitationCiteseer', 'CoauthorshipCora', 'CoauthorshipDBLP', 'CocitationPubmed', 'Cooking200', 'Tencent2k']
#dataset_names = ['CocitationCiteseer']



df_results = pd.DataFrame()


for dataset in dataset_names:
    data = load_dataset(dataset)
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    evaluator = Evaluator(["accuracy"])
    # Add code to check for features before using the adjacency matrix
    try:
        X, lbl = data["features"], data["labels"]  
    except AssertionError as e:
        X, lbl = torch.eye(data["num_vertices"]), data["labels"]

    ids = list(range(data['num_vertices']))
    relations = data["edge_list"]
    
    #G = Hypergraph(data["num_vertices"], data["edge_list"])
    
    train_mask = data["train_mask"]
    val_mask = data["val_mask"]
    test_mask = data["test_mask"]

    for model in model_names:
        for params in selected_params:
            depth = params['depth']
            drop_method = params['drop_method']
            init_drop_rate = params['drop_rate']
            oversmoothing = []
            loss_values = []
            val_accuracy = []
            epoch_values = []
            

            net = load_model(model, X, data, depth)
            optimizer = optim.Adam(net.parameters(), lr=0.01, weight_decay=5e-4)
            X, lbl = X.to(device), lbl.to(device)
            G = Hypergraph(data["num_vertices"], data["edge_list"])
            G = G.to(device)
            net = net.to(device)

            best_state = None
            best_epoch, best_val = 0, 0

            '''for epoch in range(epochs):
                # Dropout settings
                set_seed(epoch)

                if epoch == 0:
                    energy = dirichlet_energy(G,X)
                drop_rate = torch.sigmoid(energy) * init_drop_rate
                

                G = dropout_hgnn(drop_method, drop_rate, ids, relations)
                X = G.smoothing(X, G.L_HGNN, lambda_smoothing)
                energy = dirichlet_energy(G,X)
                oversmoothing.append(energy.item())

                print(f"Epoch {epoch}:")
                print(f"Oversmoothing measure : {energy}")
                print(f"Old drop rate : {init_drop_rate} New drop rate : {drop_rate}")

            plt.plot(oversmoothing,'-x', label = 'oversmoothing')
            plt.show()
            oversmoothing = []'''

            for epoch in range(epochs): 
                
                set_seed(epoch)

                if epoch == 0:
                    energy = dirichlet_energy(G,X)
                    
                drop_rate = torch.sigmoid(energy) * init_drop_rate

                
                print(f"Oversmoothing measure : {energy}")
                print(f"Old drop rate : {init_drop_rate} New drop rate : {drop_rate}")
                
                # train
                G = dropout_hgnn(drop_method, drop_rate, ids, relations)
                loss_item, energy = train(net, X, G, lbl, train_mask, optimizer, epoch)

                

                # validation
                if epoch % 10 == 0:
                    
                    with torch.no_grad():
                        val_res = infer(net, X, G, lbl, val_mask, evaluator)
                    if val_res > best_val:
                        print(f"update best: {val_res:.5f}")
                        best_epoch = epoch
                        best_val = val_res
                        best_state = deepcopy(net.state_dict())
                    oversmoothing.append(energy.item())
                    loss_values.append(loss_item)
                    val_accuracy.append(val_res)
                    epoch_values.append(epoch)
                print("\n")
                
                if epoch % 20 == 0:
                    
                    res = infer(net, X, G, lbl, test_mask, evaluator, test=True)
                    results = {
                        'model' : model,
                        'data' : dataset,
                        'depth' : depth,
                        'epochs' : epoch,
                        'best_val_accuracy' : best_val,
                        'test_accuracy' : res['accuracy'],
                        'drop_method': drop_method,
                        'drop_rate' : init_drop_rate,
                        'oversmoothing' : oversmoothing,
                        'val_accuracy' : val_accuracy
                    }
                    df_results = pd.concat([df_results, pd.DataFrame([results])], ignore_index = True)
                    print("\n")
                

            print("\ntrain finished")
            print(f"best val: {best_val:.5f}")
            # test
            print("test...")
            net.load_state_dict(best_state)        
            res = infer(net, X, G, lbl, test_mask, evaluator, test=True)
            print(f"final result: epoch: {best_epoch}")
            print(f"parameters used: {params}")
            print(res)
            results = {
                'model' : model,
                'data' : dataset,
                'depth' : depth,
                'epochs' : epoch,
                'best_val_accuracy' : best_val,
                'test_accuracy' : res['accuracy'],
                'drop_method': drop_method,
                'drop_rate' : init_drop_rate,
                'oversmoothing' : oversmoothing,
                'val_accuracy' : val_accuracy
            }

            df_results = pd.concat([df_results, pd.DataFrame([results])], ignore_index = True)

            #plt.plot(epoch_values, oversmoothing,'-x', label = 'oversmoothing')
            #plt.title(f"Oversmoothing variations following epochs using {drop_method}")
            #plt.show()
            #plt.plot(epoch_values, loss_values,'-o', label = 'loss')
            #plt.plot(epoch_values, val_accuracy,'-+', label = 'Accuracy')
            #plt.legend(prop={'size': 12}, title_fontsize=12)
            #plt.title(f"Accuracy variations following epochs using {drop_method}")
            #plt.show()

Oversmoothing measure : 0.028680048882961273
Old drop rate : 0.5 New drop rate : 0.25358474254608154
Epoch: 0, Time: 0.12800s, Loss: 1.94596
update best: 0.16861




Oversmoothing measure : 60.41443634033203
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 1, Time: 0.12397s, Loss: 2.32825


Oversmoothing measure : 21093.982421875
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 2, Time: 0.13238s, Loss: 1.12595


Oversmoothing measure : 14025.412109375
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 3, Time: 0.12924s, Loss: 0.80919


Oversmoothing measure : 15086.171875
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 4, Time: 0.12896s, Loss: 0.86148


Oversmoothing measure : 17810.767578125
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 5, Time: 0.13530s, Loss: 0.81446


Oversmoothing measure : 18906.765625
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 6, Time: 0.12634s, Loss: 0.79906


Oversmoothing measure : 19368.26171875
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 7, Time: 0.12595s, L

  _De = _De.scatter_reduce(0, index=hg.v2e_dst, src=hg.D_v.clone()._values()[hg.v2e_src], reduce="mean")




Oversmoothing measure : 262.3531188964844
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 1, Time: 0.11749s, Loss: 1.95343


Oversmoothing measure : 51063.0625
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 2, Time: 0.11720s, Loss: 1.26350


Oversmoothing measure : 45764.28515625
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 3, Time: 0.11384s, Loss: 0.89894


Oversmoothing measure : 45390.62890625
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 4, Time: 0.11692s, Loss: 0.87221


Oversmoothing measure : 56743.66015625
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 5, Time: 0.12188s, Loss: 0.84676


Oversmoothing measure : 62383.5078125
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 6, Time: 0.12461s, Loss: 0.83556


Oversmoothing measure : 65826.8046875
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 7, Time: 0.12377s, Loss: 0.82640


Oversmoothing measure : 67288.8359375
Old drop rate : 0.5 New drop rate : 0.5
Epoch: 8, Time: 0.12108s, Loss: 0.81492


Oversmoothing measure : 68039.7031

In [None]:
for dataset in dataset_names:
    for model in model_names:
        oversmoothing_values = df_results[(df_results['epochs']== epochs-1) & (df_results['data']== dataset) & (df_results['model']== model)]
        for method in drop_methods:
            values = oversmoothing_values[oversmoothing_values['drop_method']== method]['oversmoothing'].values[0]
            
            plt.plot(values,'-x', label = method)
            plt.title(f"Oversmoothing variations following epochs on {model} - {dataset}")
        plt.legend(prop={'size': 12}, title_fontsize=12)
        plt.show()

In [None]:
for dataset in dataset_names:
    for model in model_names:
        accuracy_values = df_results[(df_results['epochs']== epochs-1) & (df_results['data']== dataset) & (df_results['model']== model)]
        for method in drop_methods:
            values = accuracy_values[accuracy_values['drop_method']== method]['val_accuracy'].values[0]
            
            plt.plot(values,'-x', label = method)
            plt.title(f"Accuracy variations following epochs on {model} - {dataset}")
        plt.legend(prop={'size': 12}, title_fontsize=12)
        plt.show()
        

In [124]:
#for dataset in dataset_names:

accuracy_values = df_results[(df_results['epochs']== epochs-1)]
accuracy_values[164:].head(4)
        
        

Unnamed: 0,model,data,depth,epochs,best_val_accuracy,test_accuracy,drop_method,drop_rate,oversmoothing,val_accuracy
1814,HNHN,Tencent2k,1,199,0.65,0.911793,no dropout,0.5,"[300.66986083984375, 226.75689697265625, 227.9...","[0.5, 0.5, 0.5, 0.5, 0.6000000238418579, 0.625..."
1825,HNHN,Tencent2k,1,199,0.675,0.904602,dropnode,0.5,"[159.92495727539062, 177.8432159423828, 219.99...","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5500000119209..."
1836,HNHN,Tencent2k,1,199,0.7,0.894535,dropedge,0.5,"[288.6014099121094, 337.2334289550781, 382.043...","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.4000000059604..."
1847,HNHN,Tencent2k,1,199,0.7,0.892138,drophyperedge,0.5,"[287.60882568359375, 375.4235534667969, 341.21...","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.6999999880790..."
