In [1]:
import os
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric
from torch_geometric.datasets import CoraFull, Planetoid, CitationFull
from torch_geometric.transforms import NormalizeFeatures
import torch_geometric.nn as gnn 

from models import GAT, GraphSAGE, GIN
from utils import train_model, test_model, train_constrative_model, valid_model
from mean_average_distance import MAD, MADGap
from virtualnode import VirtualClassNode, UnidirectionalVirtualClassNode

torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

SAVE_PATH = 'results'
LR = 0.01

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
device

  warn(f"Failed to load image Python extension: {e}")


device(type='cuda', index=1)

In [2]:
dataset = CitationFull(root='dataset/Cora', name='Cora', transform=NormalizeFeatures())

In [3]:
data = dataset[0]
df = pd.DataFrame(data.x)
df['y'] = data.y
train, valid = train_test_split(df, stratify=df.y, test_size=0.4)
valid, test = train_test_split(valid, stratify=valid.y, test_size=0.5)
data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[train.index]=True
data.valid_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.valid_mask[valid.index]=True
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.test_mask[test.index]=True

In [4]:
vc = UnidirectionalVirtualClassNode()
data = vc.forward(data)

In [5]:
sage = GraphSAGE(in_channels=dataset.num_features, hidden_channels=256, number_of_classes=dataset.num_classes, num_of_hidden_layers=4, device=device)

models = [sage]

for model in models:
    print(f'model: {model.name}, params: {model.num_of_parameters}')


model: GraphSAGE, params: 4853760


In [6]:
model_losses = []
model_accs = []
for model in models:
    print(f'Model: {model.name} | Number of parameters: {model.get_n_params()}')
    model = model.to(device)
    data = data.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=5e-4)
    losses = []
    accs = []
    val_losses = []
    val_accs = []
    for epoch in range(1000):
        loss, acc = train_constrative_model(model, data, optimizer, criterion, constrative_coef=0.01, temperature=0.2)
        losses.append(loss.item())
        accs.append(100*acc)
        
        val_loss, val_acc = valid_model(model, data, criterion, constrative_flag=True, constrative_coef=0.01, temperature=0.2)
        val_accs.append(100*val_acc)
        print(f'Epoch: {epoch:03d}, Train Loss: {loss:.4f}, Train Acc: {100*acc:.2f}, Valid Loss: {val_loss:.4f}, Valid Acc: {100*val_acc:.2f}')
    model_losses.append(losses, val_losses)
    model_accs.append(accs, val_accs)
    report = test_model(model, data)
    result = pd.DataFrame(report).T
    result_sliced = result.iloc[:-3 if len(result) < 23 else 20, :]
    acc = result.loc['accuracy'][0]
    result.loc['minorities-f1',:] = result_sliced.mean(axis=0)
    result.to_csv(os.path.join(SAVE_PATH, f'{model.name}_layers{model.num_of_hidden_layers}_neurons{model.hidden_channels}'+'.csv'))
    print(f'Test Acc: {100*acc}')
    print('==========================================', end='\n\n')

Model: GraphSAGE | Number of parameters: 4853760
Epoch: 000, Train Loss: 4.2495, Train Acc: 1.00, Valid Loss: 4.0877, Valid Acc: 4.27
Epoch: 001, Train Loss: 4.0871, Train Acc: 4.55, Valid Loss: 7.6091, Valid Acc: 3.92
Epoch: 002, Train Loss: 7.5444, Train Acc: 4.04, Valid Loss: 4.1346, Valid Acc: 3.13
Epoch: 003, Train Loss: 4.1269, Train Acc: 3.15, Valid Loss: 4.2219, Valid Acc: 4.27
Epoch: 004, Train Loss: 4.2290, Train Acc: 4.42, Valid Loss: 4.8327, Valid Acc: 2.95
Epoch: 005, Train Loss: 4.8525, Train Acc: 3.26, Valid Loss: 20.7826, Valid Acc: 0.17
Epoch: 006, Train Loss: 20.8279, Train Acc: 0.15, Valid Loss: 5.9752, Valid Acc: 2.36
Epoch: 007, Train Loss: 5.9914, Train Acc: 2.35, Valid Loss: 6.2595, Valid Acc: 2.61
Epoch: 008, Train Loss: 6.2466, Train Acc: 2.69, Valid Loss: 5.3292, Valid Acc: 4.64
Epoch: 009, Train Loss: 5.3271, Train Acc: 4.68, Valid Loss: 4.8523, Valid Acc: 3.57
Epoch: 010, Train Loss: 4.8822, Train Acc: 2.90, Valid Loss: 5.1532, Valid Acc: 0.42
Epoch: 011, Tr

TypeError: list.append() takes exactly one argument (2 given)

In [None]:
data

Data(x=[19863, 8710], edge_index=[2, 150592], y=[19863], train_mask=[19863], valid_mask=[19793], test_mask=[19793], edge_type=[150592])

In [None]:
torch.arange(20).unique().shape[0]

20

In [10]:
# MAD and MADGap Usage Example
mad = MAD(device=device, global_flag=True)
result = model(dataset.data.x.to(device), dataset.data.edge_index.to(device))[0].cpu()
print(mad(result))
madgap = MADGap(device, 3, 8)
print(madgap(result, dataset.data.edge_index))

tensor(0.4984, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.2384, device='cuda:0', grad_fn=<SubBackward0>)
