In [26]:
import torch
import numpy as np
from torch_geometric.datasets import Planetoid, WebKB, LINKXDataset
from torch_geometric.nn import GCNConv
from ogb.nodeproppred import NodePropPredDataset
from torch_geometric.data import Data
#dataset = WebKB(root='/tmp/WebKB', name='Cornell')
#dataset = LINKXDataset(root='./data',name='penn94')
#data = dataset[0]

dataset = NodePropPredDataset(name='ogbn-arxiv')
print(dataset)
print(dataset.num_classes)
print(dataset.graph.keys())
split_index = dataset.get_idx_split()
# Parse to tensor
data = Data(x=torch.from_numpy(dataset.graph['node_feat']).float(),
            edge_index=torch.from_numpy(dataset.graph['edge_index']).long(),
            y=torch.from_numpy(dataset.labels).long())
data.train_mask = torch.from_numpy(split_index['train']).bool()
data.val_mask = torch.from_numpy(split_index['valid']).bool()
data.test_mask = torch.from_numpy(split_index['test']).bool()
dataset.num_features = dataset.graph['node_feat'].shape[1]
dataset.num_classes = dataset.labels.max() + 1
print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')
print()
print(data) 
print('===========================================================================================================')


cornell()
5

Dataset: cornell():
Number of graphs: 1
Number of features: 1703
Number of classes: 5

Data(x=[183, 1703], edge_index=[2, 298], y=[183], train_mask=[183, 10], val_mask=[183, 10], test_mask=[183, 10])


In [27]:
import torch
from sklearn.model_selection import train_test_split
from torch_geometric.utils import to_dense_adj
import numpy as np
'''
      Same function as in LINKX paper: https://github.com/CUAI/Non-Homophily-Large-Scale/blob/82f8f05c5c3ec16bd5b505cc7ad62ab5e09051e6/d>
'''
def rand_train_test_idx(label, train_prop=.5, valid_prop=.25, ignore_negative=True,seed=1234):
      """ randomly splits label into train/valid/test splits """
      test_split, train_split = train_test_split(list(range(len(label))), test_size=train_prop,random_state=seed, shuffle=True)
      test_split, val_split = train_test_split(test_split, test_size=train_prop,random_state=seed, shuffle=True)
      
      train_mask = torch.full_like(label, False, dtype=bool)
      train_mask[train_split] = True
      
      test_mask = torch.full_like(label, False, dtype=bool)
      test_mask[test_split] = True
      
      val_mask = torch.full_like(label, False, dtype=bool)
      val_mask[val_split] = True
      # Parse the tensor to float
      #train_mask = train_mask.long()
      #test_mask = test_mask.long()
      #val_mask = val_mask.long()
      return train_mask,test_mask,val_mask
seeds = [12381, 45891, 63012, 32612, 91738]
train_mask,test_mask,val_mask = rand_train_test_idx(data.y,seed=seeds[0])
data.train_mask = train_mask.unsqueeze(1)
data.val_mask = val_mask.unsqueeze(1)
data.test_mask = test_mask.unsqueeze(1)
for i in range(1,len(seeds)):
      train_mask,test_mask,val_mask = rand_train_test_idx(data.y,seed=seeds[i])
      data.train_mask = torch.cat((data.train_mask,train_mask.unsqueeze(1)),dim=1)
      data.val_mask = torch.cat((data.val_mask,val_mask.unsqueeze(1)),dim=1)
      data.test_mask = torch.cat((data.test_mask,test_mask.unsqueeze(1)),dim=1)
print(data.train_mask.shape)
print(data.val_mask.shape)
print(data.test_mask.shape)

torch.Size([183, 5])
torch.Size([183, 5])
torch.Size([183, 5])


In [19]:
# Usaremos una arquitectura MLP para clasificar los nodos
from torch_geometric.nn.models import MLP
mlp = MLP(in_channels=data.x.shape[1], hidden_channels=16, out_channels=dataset.num_classes, num_layers=2)
print(mlp)

MLP(4814, 16, 2)


In [20]:
def train(model,data,mask,optimizer,criterion):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    train_correct = (pred[mask] == data.y[mask]).sum().item()
    train_acc = train_correct / mask.sum().item()
    loss = criterion(out[mask], data.y[mask])
    loss.backward()
    optimizer.step()
    return loss, train_acc
def test(model,data,mask):
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    test_correct = (pred[mask] == data.y[mask]).sum().item()
    test_acc = test_correct / mask.sum().item()
    return test_acc
def val(model,data,mask):
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    val_correct = (pred[mask] == data.y[mask]).sum().item()
    val_acc = val_correct / mask.sum().item()
    return val_acc

In [21]:
results = []
for i in range(5):
    mlp = MLP(in_channels=data.x.shape[1], hidden_channels=64, out_channels=dataset.num_classes, num_layers=2)
    train_mask = data.train_mask[:,i]
    test_mask = data.test_mask[:,i]
    val_mask = data.val_mask[:,i]
    optimizer = torch.optim.Adam(mlp.parameters(), lr=0.01,weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()
    best_acc = 0
    for epoch in range(500):
        loss, train_acc = train(mlp,data,train_mask,optimizer,criterion)
        test_acc = test(mlp,data,test_mask)
        val_acc = val(mlp,data,val_mask)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}, Val Acc: {val_acc:.4f}')
        if val_acc > best_acc:
            best_acc = val_acc
    results.append(best_acc)
print(f'Average accuracy: {np.mean(results):.4f} ± {np.std(results):.4f}')

Epoch: 000, Loss: 0.7213, Train Acc: 0.4835, Test Acc: 0.5280, Val Acc: 0.5194
Epoch: 001, Loss: 0.5185, Train Acc: 0.7495, Test Acc: 0.5283, Val Acc: 0.5198
Epoch: 002, Loss: 0.4473, Train Acc: 0.7946, Test Acc: 0.5624, Val Acc: 0.5558
Epoch: 003, Loss: 0.4072, Train Acc: 0.8169, Test Acc: 0.6282, Val Acc: 0.6177
Epoch: 004, Loss: 0.3767, Train Acc: 0.8328, Test Acc: 0.6722, Val Acc: 0.6617
Epoch: 005, Loss: 0.3539, Train Acc: 0.8426, Test Acc: 0.6914, Val Acc: 0.6829
Epoch: 006, Loss: 0.3342, Train Acc: 0.8497, Test Acc: 0.7006, Val Acc: 0.6941
Epoch: 007, Loss: 0.3169, Train Acc: 0.8586, Test Acc: 0.7090, Val Acc: 0.7031
Epoch: 008, Loss: 0.3018, Train Acc: 0.8659, Test Acc: 0.7225, Val Acc: 0.7127
Epoch: 009, Loss: 0.2874, Train Acc: 0.8737, Test Acc: 0.7337, Val Acc: 0.7217
Epoch: 010, Loss: 0.2737, Train Acc: 0.8842, Test Acc: 0.7413, Val Acc: 0.7279
Epoch: 011, Loss: 0.2611, Train Acc: 0.8942, Test Acc: 0.7442, Val Acc: 0.7315
Epoch: 012, Loss: 0.2491, Train Acc: 0.9011, Test Ac

KeyboardInterrupt: 

In [14]:
results = []
from torch_geometric.nn.models import GAT
for i in range(5):
    gat = GAT(in_channels=data.x.shape[1], hidden_channels=64, out_channels=dataset.num_classes, num_layers=2)
    train_mask = data.train_mask[:,i]
    test_mask = data.test_mask[:,i]
    val_mask = data.val_mask[:,i]
    optimizer = torch.optim.Adam(gat.parameters(), lr=0.01,weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()
    best_acc = 0
    for epoch in range(500):
        loss, train_acc = train(gat,data,train_mask,optimizer,criterion)
        test_acc = test(gat,data,test_mask)
        val_acc = val(gat,data,val_mask)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}, Val Acc: {val_acc:.4f}')
        if val_acc > best_acc:
            best_acc = val_acc
    results.append(best_acc)
print(f'Average accuracy: {np.mean(results):.4f} ± {np.std(results):.4f}')

Epoch: 000, Loss: 1.5885, Train Acc: 0.2184, Test Acc: 0.3514, Val Acc: 0.5085
Epoch: 001, Loss: 1.4736, Train Acc: 0.4713, Test Acc: 0.2973, Val Acc: 0.3898
Epoch: 002, Loss: 1.1075, Train Acc: 0.5862, Test Acc: 0.2162, Val Acc: 0.4068
Epoch: 003, Loss: 0.8761, Train Acc: 0.7126, Test Acc: 0.2432, Val Acc: 0.4237
Epoch: 004, Loss: 0.7662, Train Acc: 0.7356, Test Acc: 0.3243, Val Acc: 0.4407
Epoch: 005, Loss: 0.6644, Train Acc: 0.7701, Test Acc: 0.3243, Val Acc: 0.4576
Epoch: 006, Loss: 0.5709, Train Acc: 0.8161, Test Acc: 0.3514, Val Acc: 0.4407
Epoch: 007, Loss: 0.4940, Train Acc: 0.8391, Test Acc: 0.3514, Val Acc: 0.4407
Epoch: 008, Loss: 0.4266, Train Acc: 0.8276, Test Acc: 0.3784, Val Acc: 0.4407
Epoch: 009, Loss: 0.3524, Train Acc: 0.8506, Test Acc: 0.4324, Val Acc: 0.4746
Epoch: 010, Loss: 0.2994, Train Acc: 0.8851, Test Acc: 0.4324, Val Acc: 0.4915
Epoch: 011, Loss: 0.2678, Train Acc: 0.8966, Test Acc: 0.4324, Val Acc: 0.4915
Epoch: 012, Loss: 0.2306, Train Acc: 0.8966, Test Ac

In [15]:
results = []
from torch_geometric.nn.models import GCN
for i in range(5):
    gcn = GCN(in_channels=data.x.shape[1], hidden_channels=64, out_channels=dataset.num_classes, num_layers=2)
    train_mask = data.train_mask[:,i]
    test_mask = data.test_mask[:,i]
    val_mask = data.val_mask[:,i]
    optimizer = torch.optim.Adam(gcn.parameters(), lr=0.01,weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()
    best_acc = 0
    for epoch in range(500):
        loss, train_acc = train(gcn,data,train_mask,optimizer,criterion)
        test_acc = test(gcn,data,test_mask)
        val_acc = val(gcn,data,val_mask)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}, Val Acc: {val_acc:.4f}')
        if val_acc > best_acc:
            best_acc = val_acc
    results.append(best_acc)
print(f'Average accuracy: {np.mean(results):.4f} ± {np.std(results):.4f}')

Epoch: 000, Loss: 1.5472, Train Acc: 0.3448, Test Acc: 0.4054, Val Acc: 0.5254
Epoch: 001, Loss: 1.8019, Train Acc: 0.4713, Test Acc: 0.3243, Val Acc: 0.3559
Epoch: 002, Loss: 1.2522, Train Acc: 0.5632, Test Acc: 0.2703, Val Acc: 0.4068
Epoch: 003, Loss: 1.0398, Train Acc: 0.6322, Test Acc: 0.2703, Val Acc: 0.4068
Epoch: 004, Loss: 0.8606, Train Acc: 0.6667, Test Acc: 0.3514, Val Acc: 0.4237
Epoch: 005, Loss: 0.7557, Train Acc: 0.7356, Test Acc: 0.3243, Val Acc: 0.4576
Epoch: 006, Loss: 0.7395, Train Acc: 0.7126, Test Acc: 0.3243, Val Acc: 0.4576
Epoch: 007, Loss: 0.6741, Train Acc: 0.7586, Test Acc: 0.3243, Val Acc: 0.4576
Epoch: 008, Loss: 0.5657, Train Acc: 0.8046, Test Acc: 0.3243, Val Acc: 0.4407
Epoch: 009, Loss: 0.4747, Train Acc: 0.8276, Test Acc: 0.3243, Val Acc: 0.5254
Epoch: 010, Loss: 0.4351, Train Acc: 0.7931, Test Acc: 0.3514, Val Acc: 0.5424
Epoch: 011, Loss: 0.4017, Train Acc: 0.8161, Test Acc: 0.3784, Val Acc: 0.5254
Epoch: 012, Loss: 0.3617, Train Acc: 0.8506, Test Ac

In [17]:
results = []
from torch_geometric.nn.models import LINKX
for i in range(5):
    linkx = LINKX(in_channels=data.x.shape[1], hidden_channels=64, out_channels=dataset.num_classes, num_layers=2,num_nodes=data.num_nodes)
    train_mask = data.train_mask[:,i]
    test_mask = data.test_mask[:,i]
    val_mask = data.val_mask[:,i]
    optimizer = torch.optim.Adam(linkx.parameters(), lr=0.01,weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()
    best_acc = 0
    for epoch in range(500):
        loss, train_acc = train(linkx,data,train_mask,optimizer,criterion)
        test_acc = test(linkx,data,test_mask)
        val_acc = val(linkx,data,val_mask)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}, Val Acc: {val_acc:.4f}')
        if val_acc > best_acc:
            best_acc = val_acc
    results.append(best_acc)
print(f'Average accuracy: {np.mean(results):.4f} ± {np.std(results):.4f}')

Epoch: 000, Loss: 1.6502, Train Acc: 0.2069, Test Acc: 0.4324, Val Acc: 0.5254
Epoch: 001, Loss: 0.8423, Train Acc: 0.7931, Test Acc: 0.4595, Val Acc: 0.5593
Epoch: 002, Loss: 0.3739, Train Acc: 0.9770, Test Acc: 0.5135, Val Acc: 0.5932
Epoch: 003, Loss: 0.2055, Train Acc: 0.9885, Test Acc: 0.6486, Val Acc: 0.7119
Epoch: 004, Loss: 0.1282, Train Acc: 0.9885, Test Acc: 0.6216, Val Acc: 0.7119
Epoch: 005, Loss: 0.0792, Train Acc: 1.0000, Test Acc: 0.6486, Val Acc: 0.6780
Epoch: 006, Loss: 0.0471, Train Acc: 1.0000, Test Acc: 0.6216, Val Acc: 0.6780
Epoch: 007, Loss: 0.0285, Train Acc: 1.0000, Test Acc: 0.6216, Val Acc: 0.6610
Epoch: 008, Loss: 0.0185, Train Acc: 1.0000, Test Acc: 0.5946, Val Acc: 0.6610
Epoch: 009, Loss: 0.0127, Train Acc: 1.0000, Test Acc: 0.5946, Val Acc: 0.6441
Epoch: 010, Loss: 0.0091, Train Acc: 1.0000, Test Acc: 0.5946, Val Acc: 0.6271
Epoch: 011, Loss: 0.0069, Train Acc: 1.0000, Test Acc: 0.5946, Val Acc: 0.6271
Epoch: 012, Loss: 0.0050, Train Acc: 1.0000, Test Ac