In [1]:
import os
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric
from torch_geometric.datasets import CoraFull, Planetoid, CitationFull
from torch_geometric.transforms import NormalizeFeatures
import torch_geometric.nn as gnn 

from models import GAT, GraphSAGE, GIN
from utils import train_model, test_model

torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

SAVE_PATH = 'results'
LR = 0.01

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

  from .autonotebook import tqdm as notebook_tqdm


device(type='cuda')

In [2]:
dataset = CitationFull(root='dataset/Cora', name='Cora', transform=NormalizeFeatures())

In [3]:
data = dataset[0]
df = pd.DataFrame(data.x)
df['y'] = data.y
train, valid = train_test_split(df, stratify=df.y, test_size=0.33)
data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[train.index]=True

In [4]:
sage = GraphSAGE(in_channels=dataset.num_features, hidden_channels=256, number_of_classes=dataset.num_classes, num_of_hidden_layers=4, device=device)
gat = GAT(in_channels=dataset.num_features, hidden_channels=476, number_of_classes=dataset.num_classes, num_of_hidden_layers=4, device=device, heads=1)
gin = GIN(in_channels=dataset.num_features, hidden_channels=415, number_of_classes=dataset.num_classes, num_of_hidden_layers=4, device=device)

models = [sage, gat, gin]

for model in models:
    print(f'model: {model.name}, params: {model.num_of_parameters}')


model: GraphSAGE, params: 4871750
model: Graph Attention Network, params: 4864790
model: GIN, params: 4855985


In [5]:
model_losses = []
model_accs = []
for model in models:
    print(f'Model: {model.name} | Number of parameters: {model.get_n_params()}')
    model = model.to(device)
    data = data.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=5e-4)
    losses = []
    accs = []
    for epoch in range(500):
        loss, acc = train_model(model, data, optimizer, criterion)
        losses.append(loss.item())
        accs.append(100*acc)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Acc: {100*acc:.2f}')
    model_losses.append(losses)
    model_accs.append(accs)
    report = test_model(model, data)
    result = pd.DataFrame(report).T
    result_sliced = result.iloc[:-3 if len(result) < 23 else 20, :]
    acc = result.loc['accuracy'][0]
    result.loc['minorities-f1',:] = result_sliced.mean(axis=0)
    result.to_csv(os.path.join(SAVE_PATH, f'{model.name}_layers{model.num_of_hidden_layers}_neurons{model.hidden_channels}'+'.csv'))
    print(f'Test Acc: {100*acc}')
    print('==========================================', end='\n\n')

Model: GraphSAGE | Number of parameters: 4871750
Epoch: 000, Loss: 4.2495, Acc: 1.08
Epoch: 001, Loss: 4.0929, Acc: 4.46
Epoch: 002, Loss: 7.1297, Acc: 4.15
Epoch: 003, Loss: 4.0880, Acc: 3.17
Epoch: 004, Loss: 4.2078, Acc: 3.97
Epoch: 005, Loss: 4.4124, Acc: 0.15
Epoch: 006, Loss: 4.0325, Acc: 5.80
Epoch: 007, Loss: 4.5174, Acc: 7.81
Epoch: 008, Loss: 5.6003, Acc: 2.87
Epoch: 009, Loss: 6.4070, Acc: 1.97
Epoch: 010, Loss: 4.1840, Acc: 8.59
Epoch: 011, Loss: 4.7519, Acc: 5.29
Epoch: 012, Loss: 4.7890, Acc: 2.88
Epoch: 013, Loss: 4.4054, Acc: 3.07
Epoch: 014, Loss: 4.2685, Acc: 6.80
Epoch: 015, Loss: 4.1307, Acc: 8.08
Epoch: 016, Loss: 3.7512, Acc: 11.28
Epoch: 017, Loss: 3.6805, Acc: 13.48
Epoch: 018, Loss: 3.5226, Acc: 13.51
Epoch: 019, Loss: 3.4361, Acc: 14.27
Epoch: 020, Loss: 3.2933, Acc: 16.55
Epoch: 021, Loss: 3.2260, Acc: 16.35
Epoch: 022, Loss: 3.2256, Acc: 16.79
Epoch: 023, Loss: 3.1261, Acc: 16.61
Epoch: 024, Loss: 3.0416, Acc: 19.18
Epoch: 025, Loss: 3.0048, Acc: 20.35
Epoch

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Test Acc: 68.87630128597672

Model: Graph Attention Network | Number of parameters: 4864790
Epoch: 000, Loss: 4.2480, Acc: 0.30
Epoch: 001, Loss: 4.1790, Acc: 4.79
Epoch: 002, Loss: 6.3976, Acc: 4.69
Epoch: 003, Loss: 4.2762, Acc: 5.08
Epoch: 004, Loss: 4.2056, Acc: 8.62
Epoch: 005, Loss: 4.4025, Acc: 4.11
Epoch: 006, Loss: 4.0920, Acc: 4.72
Epoch: 007, Loss: 4.2698, Acc: 6.61
Epoch: 008, Loss: 5.2547, Acc: 3.51
Epoch: 009, Loss: 3.6725, Acc: 9.92
Epoch: 010, Loss: 4.1150, Acc: 8.34
Epoch: 011, Loss: 4.5452, Acc: 8.96
Epoch: 012, Loss: 4.4051, Acc: 9.26
Epoch: 013, Loss: 4.1701, Acc: 8.69
Epoch: 014, Loss: 3.5796, Acc: 16.55
Epoch: 015, Loss: 3.4721, Acc: 14.86
Epoch: 016, Loss: 3.2633, Acc: 13.92
Epoch: 017, Loss: 3.2593, Acc: 13.27
Epoch: 018, Loss: 3.0642, Acc: 22.98
Epoch: 019, Loss: 2.9761, Acc: 20.01
Epoch: 020, Loss: 2.8822, Acc: 24.12
Epoch: 021, Loss: 2.7302, Acc: 27.60
Epoch: 022, Loss: 2.5561, Acc: 29.18
Epoch: 023, Loss: 2.3745, Acc: 35.70
Epoch: 024, Loss: 2.3063, Acc: 37.

In [12]:
train_res_loss = pd.DataFrame({'Graph Attention Network_layers4_neurons476' : model_losses[1],\
                               'GraphSAGE_layers4_neurons256' : model_losses[0],\
                                'GIN_layers4_neurons415' : model_losses[2]})
train_res_loss.to_csv('./results/train_results/0506_mpnn_loss.csv', index=False)

In [13]:
train_res_acc = pd.DataFrame({'Graph Attention Network_layers4_neurons476' : model_accs[1],\
                               'GraphSAGE_layers4_neurons256' : model_accs[0],\
                                'GIN_layers4_neurons415' : model_accs[2]})
train_res_acc.to_csv('./results/train_results/0506_mpnn_acc.csv', index=False)