In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import sys
sys.path.append('..')
from Dataset_Loader_Node_Classification import Dataset_Loader
from gcn.models import GCN
from gcn.utils import accuracy

In [3]:
data_loader = Dataset_Loader(dName='cora', dDescription='Pubmed citation network')
data_loader.dataset_name = 'cora'
data_loader.dataset_source_folder_path = '../stage_5_data/cora/'

loaded_data = data_loader.load()

graph_data = loaded_data['graph']
train_test_val_indices = loaded_data['train_test_val']

adj = graph_data['utility']['A']
features = graph_data['X']
labels = graph_data['y']

idx_train = train_test_val_indices['idx_train']
idx_val = train_test_val_indices['idx_val']
idx_test = train_test_val_indices['idx_test']

print("Data Loaded:")
print(f"  Features shape: {features.shape}")
print(f"  Adjacency matrix shape: {adj.shape}")
print(f"  Labels shape: {labels.shape}")
print(f"  Number of training samples: {len(idx_train)}")
print(f"  Number of validation samples: {len(idx_val)}")
print(f"  Number of testing samples: {len(idx_test)}")

Loading cora dataset...
Data Loaded:
  Features shape: torch.Size([2708, 1433])
  Adjacency matrix shape: torch.Size([2708, 2708])
  Labels shape: torch.Size([2708])
  Number of training samples: 140
  Number of validation samples: 300
  Number of testing samples: 1000


In [4]:
n_epochs = 200
lr = 0.01
weight_decay = 5e-4
hidden_units = 16
dropout_rate = 0.5
cuda_available = torch.cuda.is_available()

n_features = features.shape[1]
n_classes = labels.max().item() + 1

model = GCN(nfeat=n_features,
            nhid=hidden_units,
            nclass=n_classes,
            dropout=dropout_rate)

optimizer = optim.Adam(model.parameters(),
                       lr=lr, weight_decay=weight_decay)

if cuda_available:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()

print("Model Initialized:")
print(model)

Model Initialized:
GCN(
  (gc1): GraphConvolution (1433 -> 16)
  (gc2): GraphConvolution (16 -> 7)
)


In [5]:
import time

def train_epoch(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    
    output = model(features, adj) 
    
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    acc_train = accuracy(output[idx_train], labels[idx_train])
    
    loss_train.backward()
    optimizer.step()

    model.eval()
    output_val = model(features, adj)
    loss_val = F.nll_loss(output_val[idx_val], labels[idx_val])
    acc_val = accuracy(output_val[idx_val], labels[idx_val])

    print(f'Epoch: {epoch+1:04d}',
          f'loss_train: {loss_train.item():.4f}',
          f'acc_train: {acc_train.item():.4f}',
          f'loss_val: {loss_val.item():.4f}',
          f'acc_val: {acc_val.item():.4f}',
          f'time: {time.time() - t:.4f}s')
    return loss_val.item()

print("Starting Training...")
t_total = time.time()
for epoch in range(n_epochs):
    train_epoch(epoch)

print("Optimization Finished!")
print(f"Total time elapsed: {time.time() - t_total:.4f}s")

Starting Training...
Epoch: 0001 loss_train: 2.0091 acc_train: 0.1214 loss_val: 1.9381 acc_val: 0.2133 time: 0.1037s
Epoch: 0002 loss_train: 1.8785 acc_train: 0.2643 loss_val: 1.8912 acc_val: 0.2867 time: 0.0051s
Epoch: 0003 loss_train: 1.7898 acc_train: 0.3857 loss_val: 1.8391 acc_val: 0.3500 time: 0.0050s
Epoch: 0004 loss_train: 1.6906 acc_train: 0.5000 loss_val: 1.7834 acc_val: 0.3367 time: 0.0056s
Epoch: 0005 loss_train: 1.5828 acc_train: 0.5571 loss_val: 1.7247 acc_val: 0.3567 time: 0.0052s
Epoch: 0006 loss_train: 1.4722 acc_train: 0.5786 loss_val: 1.6658 acc_val: 0.3567 time: 0.0044s
Epoch: 0007 loss_train: 1.3349 acc_train: 0.6571 loss_val: 1.6071 acc_val: 0.3800 time: 0.0051s
Epoch: 0008 loss_train: 1.2346 acc_train: 0.6071 loss_val: 1.5486 acc_val: 0.3867 time: 0.0046s
Epoch: 0009 loss_train: 1.1384 acc_train: 0.6357 loss_val: 1.4913 acc_val: 0.4100 time: 0.0048s
Epoch: 0010 loss_train: 1.0739 acc_train: 0.6357 loss_val: 1.4332 acc_val: 0.4300 time: 0.0053s
Epoch: 0011 loss_tr

In [6]:
def test_model():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("\nTest set results:",
          f"loss= {loss_test.item():.4f}",
          f"accuracy= {acc_test.item():.4f}")

print("\nEvaluating on Test Set...")
test_model()


Evaluating on Test Set...

Test set results: loss= 0.6196 accuracy= 0.8070
