In [35]:
from copy import deepcopy

import torch
import torch.optim as optim
import torch.nn.functional as F

from dhg import Hypergraph
from dhg.data import Cooking200, CocitationCora, CoauthorshipCora, CocitationPubmed, CoauthorshipDBLP, CocitationCiteseer, News20, Yelp3k, Tencent2k
from dhg.models import HGNN, HGNNP, HyperGCN, UniSAGE, UniGCN, UniGAT
from dhg.random import set_seed
from dhg.metrics import HypergraphVertexClassificationEvaluator as Evaluator

In [54]:
def load_dataset(name):
    match name:
        case 'CocitationCora':
            return CocitationCora()
        case 'CoauthorshipCora':
            return CoauthorshipCora()
        case 'CoauthorshipDBLP':
            return CoauthorshipDBLP()
        case 'CocitationCiteseer':
            return CocitationCiteseer()
        case 'CocitationPubmed':
            return CocitationPubmed()
        case 'Cooking200':
            return Cooking200()
        case 'News20':
            return News20()
        case 'Yelp3k':
            return Yelp3k()
        case 'Tencent2k':
            return Tencent2k()
    
def load_model(name, X, data, depth):
    match name:
        case 'HGNN':
            return HGNN(X.shape[1], depth, data["num_classes"], use_bn=True)
        case 'HGNNP':
            return HGNNP(X.shape[1], depth, data["num_classes"], use_bn=True)
        case 'HyperGCN':
            return HyperGCN(X.shape[1], depth, data["num_classes"], use_bn=True)
        case 'UniSAGE':
            return UniSAGE(X.shape[1], depth, data["num_classes"], use_bn=True)
        case 'UniGCN':
            return UniGCN(X.shape[1], depth, data["num_classes"], use_bn=True)
        case 'UniGAT':
            return UniGAT(X.shape[1], depth, data["num_classes"], num_heads = 2, use_bn=True)

In [45]:
dataset_names = ['CocitationCora', 'CocitationCiteseer', 'CoauthorshipCora', 'CoauthorshipDBLP', 'CocitationPubmed', 'Cooking200', 'Tencent2k']


for dataset in dataset_names:
    data = load_dataset(dataset)
    print(dataset)
    print(f"Number of classes : {data['num_classes']}")
    print(f"Number of nodes : {data['num_vertices']}")
    print(f"Number of edges : {data['num_edges']}")
    try:
        print(f"Feature size : {len(data['features'])}")    
    except AssertionError as e:
        print(f"No features for dataset: {dataset}")
    print('\n')

CocitationCora
Number of classes : 7
Number of nodes : 2708
Number of edges : 1579
Feature size : 2708


CocitationCiteseer
Number of classes : 6
Number of nodes : 3312
Number of edges : 1079
Feature size : 3312


CoauthorshipCora
Number of classes : 7
Number of nodes : 2708
Number of edges : 1072
Feature size : 2708


CoauthorshipDBLP
Number of classes : 6
Number of nodes : 41302
Number of edges : 22363
Feature size : 41302


CocitationPubmed
Number of classes : 3
Number of nodes : 19717
Number of edges : 7963
Feature size : 19717


Cooking200
Number of classes : 20
Number of nodes : 7403
Number of edges : 2755
No features for dataset: Cooking200


Tencent2k
Number of classes : 2
Number of nodes : 2146
Number of edges : 6378
Feature size : 2146




In [52]:
from utils import train, infer
import pandas as pd

set_seed(0)

#dataset_names = ['CocitationCora', 'CoauthorshipCora', 'CoauthorshipDBLP', 'CocitationCiteseer', 'CocitationPubmed', 'Cooking200', 'News20']
#model_names = ['UniSAGE', 'UniGCN', 'UniGAT']
model_names = ['HGNN', 'HGNNP', 'HyperGCN', 'UniSAGE', 'UniGCN', 'UniGAT']
#dataset_names = ['CocitationCora']
#model_names = ['UniSAGE', 'UniGCN', 'UniGAT', 'HGNNP']
df_results = pd.DataFrame()

for dataset in dataset_names:
    data = load_dataset(dataset)
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    evaluator = Evaluator(["accuracy", "f1_score", {"f1_score": {"average": "micro"}}])
    # Add code to check for features before using the adjacency matrix
    try:
        X, lbl = data["features"], data["labels"]  
    except AssertionError as e:
        X, lbl = torch.eye(data["num_vertices"]), data["labels"]
    G = Hypergraph(data["num_vertices"], data["edge_list"])
    train_mask = data["train_mask"]
    val_mask = data["val_mask"]
    test_mask = data["test_mask"]
    depth = 8
    for model in model_names:
        net = load_model(model, X, data, depth)
        optimizer = optim.Adam(net.parameters(), lr=0.01, weight_decay=5e-4)

        X, lbl = X.to(device), lbl.to(device)
        G = G.to(device)
        net = net.to(device)

        best_state = None
        best_epoch, best_val = 0, 0
        epochs = 100
        for epoch in range(epochs):
            # train
            train(net, X, G, lbl, train_mask, optimizer, epoch)
            # validation
            if epoch % 10 == 0:
                with torch.no_grad():
                    val_res = infer(net, X, G, lbl, val_mask, evaluator)
                if val_res > best_val:
                    print(f"update best: {val_res:.5f}")
                    best_epoch = epoch
                    best_val = val_res
                    best_state = deepcopy(net.state_dict())
        print("\ntrain finished")
        print(f"best val: {best_val:.5f}")
        # test
        print("test...")
        net.load_state_dict(best_state)        
        res = infer(net, X, G, lbl, test_mask, evaluator, test=True)
        print(f"final result: epoch: {best_epoch}")
        print(res)
        results = {
            'model' : model,
            'data' : dataset,
            'depth' : ,
            'epochs' : epochs,
            'val_accuracy' : best_val,
            'test_accuracy' : res['accuracy']
            
        }
        df_results = pd.concat([df_results, pd.DataFrame([results])], ignore_index = True)
    del data


Epoch: 0, Time: 0.01019s, Loss: 1.97627
update best: 0.10164
Epoch: 1, Time: 0.00355s, Loss: 1.83854
Epoch: 2, Time: 0.00304s, Loss: 1.61787
Epoch: 3, Time: 0.00276s, Loss: 1.57382
Epoch: 4, Time: 0.00249s, Loss: 1.51825
Epoch: 5, Time: 0.00270s, Loss: 1.45406
Epoch: 6, Time: 0.00265s, Loss: 1.41142
Epoch: 7, Time: 0.00287s, Loss: 1.37888
Epoch: 8, Time: 0.00342s, Loss: 1.40612
Epoch: 9, Time: 0.00299s, Loss: 1.33508
Epoch: 10, Time: 0.00312s, Loss: 1.30342
Epoch: 11, Time: 0.00313s, Loss: 1.29989
Epoch: 12, Time: 0.00347s, Loss: 1.26926
Epoch: 13, Time: 0.00294s, Loss: 1.25242
Epoch: 14, Time: 0.00327s, Loss: 1.25831
Epoch: 15, Time: 0.00253s, Loss: 1.21446
Epoch: 16, Time: 0.00280s, Loss: 1.17669
Epoch: 17, Time: 0.00277s, Loss: 1.22362
Epoch: 18, Time: 0.00264s, Loss: 1.15991
Epoch: 19, Time: 0.00287s, Loss: 1.15240
Epoch: 20, Time: 0.00253s, Loss: 1.10231
Epoch: 21, Time: 0.00247s, Loss: 1.06947
Epoch: 22, Time: 0.00239s, Loss: 1.12460
Epoch: 23, Time: 0.00249s, Loss: 1.09476
Epoch

In [53]:
df_results.shape

(42, 3)