In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import sys
sys.path.append('..')
from Dataset_Loader_Node_Classification import Dataset_Loader
from gcn.models import GCN
from gcn.utils import accuracy

In [4]:
data_loader = Dataset_Loader(dName='citeseer')
data_loader.dataset_name = 'citeseer'
data_loader.dataset_source_folder_path = '../stage_5_data/citeseer/'

loaded_data = data_loader.load()

graph_data = loaded_data['graph']
train_test_val_indices = loaded_data['train_test_val']

adj = graph_data['utility']['A']
features = graph_data['X']
labels = graph_data['y']

idx_train = train_test_val_indices['idx_train']
idx_val = train_test_val_indices['idx_val']
idx_test = train_test_val_indices['idx_test']

print("Data Loaded:")
print(f"  Features shape: {features.shape}")
print(f"  Adjacency matrix shape: {adj.shape}")
print(f"  Labels shape: {labels.shape}")
print(f"  Number of training samples: {len(idx_train)}")
print(f"  Number of validation samples: {len(idx_val)}")
print(f"  Number of testing samples: {len(idx_test)}")

Loading citeseer dataset...
Data Loaded:
  Features shape: torch.Size([3312, 3703])
  Adjacency matrix shape: torch.Size([3312, 3312])
  Labels shape: torch.Size([3312])
  Number of training samples: 120
  Number of validation samples: 300
  Number of testing samples: 1000


  return torch.sparse.FloatTensor(indices, values, shape)


In [5]:
n_epochs = 200
lr = 0.01
weight_decay = 5e-4
hidden_units = 16
dropout_rate = 0.5
cuda_available = torch.cuda.is_available()

n_features = features.shape[1]
n_classes = labels.max().item() + 1

model = GCN(nfeat=n_features,
            nhid=hidden_units,
            nclass=n_classes,
            dropout=dropout_rate)

optimizer = optim.Adam(model.parameters(),
                       lr=lr, weight_decay=weight_decay)

if cuda_available:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()

print("Model Initialized:")
print(model)

Model Initialized:
GCN(
  (gc1): GraphConvolution (3703 -> 16)
  (gc2): GraphConvolution (16 -> 6)
)


In [6]:
import time

def train_epoch(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    
    output = model(features, adj) 
    
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    acc_train = accuracy(output[idx_train], labels[idx_train])
    
    loss_train.backward()
    optimizer.step()

    model.eval()
    output_val = model(features, adj)
    loss_val = F.nll_loss(output_val[idx_val], labels[idx_val])
    acc_val = accuracy(output_val[idx_val], labels[idx_val])

    print(f'Epoch: {epoch+1:04d}',
          f'loss_train: {loss_train.item():.4f}',
          f'acc_train: {acc_train.item():.4f}',
          f'loss_val: {loss_val.item():.4f}',
          f'acc_val: {acc_val.item():.4f}',
          f'time: {time.time() - t:.4f}s')
    return loss_val.item()

print("Starting Training...")
t_total = time.time()
for epoch in range(n_epochs):
    train_epoch(epoch)

print("Optimization Finished!")
print(f"Total time elapsed: {time.time() - t_total:.4f}s")

Starting Training...
Epoch: 0001 loss_train: 1.9884 acc_train: 0.0417 loss_val: 1.7747 acc_val: 0.1967 time: 0.0530s
Epoch: 0002 loss_train: 1.7510 acc_train: 0.1583 loss_val: 1.7782 acc_val: 0.1900 time: 0.0106s
Epoch: 0003 loss_train: 1.6091 acc_train: 0.3417 loss_val: 1.7876 acc_val: 0.2533 time: 0.0087s
Epoch: 0004 loss_train: 1.4304 acc_train: 0.6667 loss_val: 1.8072 acc_val: 0.2167 time: 0.0086s
Epoch: 0005 loss_train: 1.3125 acc_train: 0.7083 loss_val: 1.8424 acc_val: 0.1900 time: 0.0085s
Epoch: 0006 loss_train: 1.1806 acc_train: 0.7417 loss_val: 1.8947 acc_val: 0.1700 time: 0.0087s
Epoch: 0007 loss_train: 1.0690 acc_train: 0.7833 loss_val: 1.9606 acc_val: 0.1500 time: 0.0090s
Epoch: 0008 loss_train: 0.9432 acc_train: 0.7750 loss_val: 2.0202 acc_val: 0.1433 time: 0.0089s
Epoch: 0009 loss_train: 0.8564 acc_train: 0.7750 loss_val: 2.0642 acc_val: 0.1533 time: 0.0088s
Epoch: 0010 loss_train: 0.8193 acc_train: 0.7833 loss_val: 2.0787 acc_val: 0.1767 time: 0.0082s
Epoch: 0011 loss_tr

In [7]:
def test_model():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("\nTest set results:",
          f"loss= {loss_test.item():.4f}",
          f"accuracy= {acc_test.item():.4f}")

print("\nEvaluating on Test Set...")
test_model()


Evaluating on Test Set...

Test set results: loss= 1.3965 accuracy= 0.6860
