# PyTorch GCN Implementation

This section implements a Graph Convolutional Network (GCN) from scratch using PyTorch, replicating the functionality from your CogDL setup.


In [116]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
import numpy as np
from sklearn.metrics import f1_score, accuracy_score
import matplotlib.pyplot as plt
from tqdm import tqdm
import json


In [117]:
class GCN(nn.Module):
    def __init__(self, num_features, hidden_size, num_classes, num_layers=2, dropout=0.5):
        super(GCN, self).__init__()
        self.num_layers = num_layers
        self.dropout = dropout
        
        # Input layer
        self.convs = nn.ModuleList()
        self.convs.append(GCNConv(num_features, hidden_size))
        
        # Hidden layers
        for _ in range(num_layers - 2):
            self.convs.append(GCNConv(hidden_size, hidden_size))
        
        # Output layer
        if num_layers > 1:
            self.convs.append(GCNConv(hidden_size, num_classes))
        else:
            self.convs.append(GCNConv(num_features, num_classes))
    
    def forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i < len(self.convs) - 1:  # Don't apply activation to last layer
                x = F.relu(x)
                x = F.dropout(x, p=self.dropout, training=self.training)
        return x


In [None]:
# Example loading

In [120]:
# Initialize model
# Training configuration (matching your CogDL setup)
graph = torch.load('./experiment_runs/run_2025-09-26_22-12-13/final_graph.pt')
pyg_data = convert_graph_to_pyg(graph)

config = {
    'num_layers': 2,
    'lr': 0.001,
    'hidden_size': 64,
    'epochs': 500,
    'weight_decay': 0,
    'patience': 100,
    'dropout': 0.5
}
# Extract data dimensions
num_features = pyg_data.x.shape[1]
num_classes = pyg_data.y.shape[1]

model = GCN(
    num_features=num_features,
    hidden_size=config['hidden_size'],
    num_classes=num_classes,
    num_layers=config['num_layers'],
    dropout=config.get('dropout', 0.5)
).to(device)

Original edge_index type: <class 'tuple'>
Edge_index is a tuple with 2 elements
First element type: <class 'torch.Tensor'>, shape: torch.Size([124056])
Second element type: <class 'torch.Tensor'>, shape: torch.Size([124056])
Converted edge_index shape: torch.Size([2, 124056])
Converted edge_index dtype: torch.int64


In [121]:
model

GCN(
  (convs): ModuleList(
    (0): GCNConv(768, 64)
    (1): GCNConv(64, 54)
  )
)

In [None]:
# Training

In [122]:
def train_model(model, data, optimizer, criterion, device):
    """Train the model for one epoch"""
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    logits = model(data.x, data.edge_index)
    
    # Get predictions for training nodes only
    train_logits = logits[data.train_mask]
    train_labels = data.y[data.train_mask]
    
    # Compute loss
    loss = criterion(train_logits, train_labels.float())
    
    # Backward pass
    loss.backward()
    optimizer.step()
    
    return loss.item()

def evaluate_model(model, data, criterion, device, mask_name='val'):
    """Evaluate the model on validation or test set"""
    model.eval()
    
    with torch.no_grad():
        logits = model(data.x, data.edge_index)
        
        if mask_name == 'val':
            mask = data.val_mask
        elif mask_name == 'test':
            mask = data.test_mask
        else:
            mask = data.train_mask
            
        eval_logits = logits[mask]
        eval_labels = data.y[mask]
        
        # Compute loss
        loss = criterion(eval_logits, eval_labels.float())
        
        # Apply sigmoid to convert logits to probabilities
        probs = torch.sigmoid(eval_logits)
        
        # Convert probabilities to binary predictions (threshold at 0.5)
        predictions = (probs > 0.5).float()
        
        # Compute accuracy as fraction of correctly predicted labels
        accuracy = (predictions == eval_labels).float().mean().item()
        
        # Compute F1 score (micro-averaged)
        f1 = f1_score(eval_labels.cpu().numpy(), predictions.cpu().numpy(), average='micro')
        
    return loss.item(), accuracy, f1


In [123]:
def train_gcn(data, config, device='cpu'):
    """Main training function"""
    
    # Extract data dimensions
    num_features = data.x.shape[1]
    num_classes = data.y.shape[1]
    
    # Initialize model
    model = GCN(
        num_features=num_features,
        hidden_size=config['hidden_size'],
        num_classes=num_classes,
        num_layers=config['num_layers'],
        dropout=config.get('dropout', 0.5)
    ).to(device)
    print(model)
    
    # Move data to device
    data = data.to(device)
    
    # Initialize optimizer and loss function
    optimizer = optim.AdamW(model.parameters(), lr=config['lr'], weight_decay=config.get('weight_decay', 0))
    criterion = nn.BCEWithLogitsLoss()
    
    # Training history
    train_losses = []
    val_losses = []
    val_accuracies = []
    val_f1_scores = []
    
    best_val_f1 = 0
    patience_counter = 0
    best_model_state = None
    
    print(f"Starting training with config: {config}")
    print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Training loop
    for epoch in tqdm(range(config['epochs']), desc="Training"):
        # Train
        train_loss = train_model(model, data, optimizer, criterion, device)
        
        # Validate
        val_loss, val_acc, val_f1 = evaluate_model(model, data, criterion, device, 'val')
        
        # Store metrics
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)
        val_f1_scores.append(val_f1)
        
        # Early stopping
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            patience_counter = 0
            best_model_state = model.state_dict().copy()
        else:
            patience_counter += 1
            
        if patience_counter >= config.get('patience', 100):
            print(f"Early stopping at epoch {epoch+1}")
            break
            
        # Print progress
        if (epoch + 1) % 50 == 0:
            print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, "
                  f"Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")
    
    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    # Final evaluation
    test_loss, test_acc, test_f1 = evaluate_model(model, data, criterion, device, 'test')
    
    print(f"\nFinal Results:")
    print(f"Best Val F1: {best_val_f1:.4f}")
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Test F1: {test_f1:.4f}")
    
    return {
        'model': model,
        'train_losses': train_losses,
        'val_losses': val_losses,
        'val_accuracies': val_accuracies,
        'val_f1_scores': val_f1_scores,
        'best_val_f1': best_val_f1,
        'test_loss': test_loss,
        'test_accuracy': test_acc,
        'test_f1': test_f1
    }


In [34]:

def convert_graph_to_pyg(graph):
    """Convert CogDL graph to PyTorch Geometric Data format"""
    
    # Extract features and labels
    x = graph.x  # Node features
    y = graph.y  # Node labels (multi-label)
    
    # Extract edge information and ensure it's in the correct format
    edge_index = graph.edge_index
    
    # Debug: Check edge_index format
    print(f"Original edge_index type: {type(edge_index)}")
    
    # Handle different edge_index formats
    if isinstance(edge_index, tuple):
        # If it's a tuple, it's likely (source_nodes, target_nodes)
        print(f"Edge_index is a tuple with {len(edge_index)} elements")
        print(f"First element type: {type(edge_index[0])}, shape: {edge_index[0].shape if hasattr(edge_index[0], 'shape') else 'no shape'}")
        print(f"Second element type: {type(edge_index[1])}, shape: {edge_index[1].shape if hasattr(edge_index[1], 'shape') else 'no shape'}")
        
        # Convert tuple to tensor format [2, num_edges]
        source_nodes = edge_index[0]
        target_nodes = edge_index[1]
        
        # Stack them to create [2, num_edges] format
        edge_index = torch.stack([source_nodes, target_nodes], dim=0).long()
        
    elif hasattr(edge_index, 'shape'):
        print(f"Original edge_index shape: {edge_index.shape}")
        print(f"Original edge_index dtype: {edge_index.dtype}")
        
        # Convert to proper format if needed
        if hasattr(edge_index, 'to_dense'):
            # If it's a sparse tensor, convert to dense then to edge_index format
            dense_adj = edge_index.to_dense()
            edge_index = torch.nonzero(dense_adj, as_tuple=False).t().contiguous()
        elif edge_index.dim() == 2 and edge_index.shape[0] != 2:
            # If it's [num_edges, 2], transpose to [2, num_edges]
            edge_index = edge_index.t().contiguous()
        elif edge_index.dim() == 3:
            # If it's a batch format, take the first (and only) graph
            edge_index = edge_index[0]
        
        # Ensure it's a long tensor (integer type)
        edge_index = edge_index.long()
    else:
        raise ValueError(f"Unexpected edge_index type: {type(edge_index)}")
    
    # Extract masks
    train_mask = graph.train_mask
    val_mask = graph.val_mask
    test_mask = graph.test_mask
    
    print(f"Converted edge_index shape: {edge_index.shape}")
    print(f"Converted edge_index dtype: {edge_index.dtype}")
    
    # Create PyTorch Geometric Data object
    data = Data(
        x=x,
        edge_index=edge_index,
        y=y,
        train_mask=train_mask,
        val_mask=val_mask,
        test_mask=test_mask
    )
    
    return data



In [124]:
graph = torch.load('./experiment_runs/run_2025-09-26_22-12-13/final_graph.pt')

In [125]:

pyg_data = convert_graph_to_pyg(graph)
print(f"PyTorch Geometric Data:")
print(f"  Nodes: {pyg_data.num_nodes}")
print(f"  Features: {pyg_data.num_features}")
print(f"  Classes: {pyg_data.y.shape[1]}")
print(f"  Edges: {pyg_data.num_edges}")
print(f"  Train nodes: {pyg_data.train_mask.sum()}")
print(f"  Val nodes: {pyg_data.val_mask.sum()}")
print(f"  Test nodes: {pyg_data.test_mask.sum()}")


Original edge_index type: <class 'tuple'>
Edge_index is a tuple with 2 elements
First element type: <class 'torch.Tensor'>, shape: torch.Size([124056])
Second element type: <class 'torch.Tensor'>, shape: torch.Size([124056])
Converted edge_index shape: torch.Size([2, 124056])
Converted edge_index dtype: torch.int64
PyTorch Geometric Data:
  Nodes: 389066
  Features: 768
  Classes: 54
  Edges: 124056
  Train nodes: 327765
  Val nodes: 19444
  Test nodes: 6078


In [126]:
# Training configuration (matching your CogDL setup)
config = {
    'num_layers': 2,
    'lr': 0.001,
    'hidden_size': 256,
    'epochs': 500,
    'weight_decay': 0,
    'patience': 100,
    'dropout': 0.5
}

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Train the model
results = train_gcn(pyg_data, config, device)


Using device: cuda
GCN(
  (convs): ModuleList(
    (0): GCNConv(768, 256)
    (1): GCNConv(256, 54)
  )
)
Starting training with config: {'num_layers': 2, 'lr': 0.001, 'hidden_size': 256, 'epochs': 500, 'weight_decay': 0, 'patience': 100, 'dropout': 0.5}
Model parameters: 210,742


Training:  10%|█         | 51/500 [00:09<01:20,  5.58it/s]

Epoch 50: Train Loss: 0.0728, Val Loss: 0.0739, Val Acc: 0.9794, Val F1: 0.0000


Training:  20%|██        | 101/500 [00:18<01:11,  5.59it/s]

Epoch 100: Train Loss: 0.0636, Val Loss: 0.0674, Val Acc: 0.9794, Val F1: 0.0000


Training:  21%|██        | 105/500 [00:19<01:11,  5.51it/s]


Early stopping at epoch 106

Final Results:
Best Val F1: 0.0473
Test Loss: 0.1077
Test Accuracy: 0.9753
Test F1: 0.0000


In [None]:
# Example predictions
logits = results['model'](pyg_data.x, pyg_data.edge_index)
labels = pyg_data.y

probs = torch.sigmoid(logits)

predictions = (probs > 0.5).float()
predictions

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')

In [30]:
# Save the trained model
torch.save(results['model'].state_dict(), 'trained_gcn_pytorch.pt')
print("Model saved as 'trained_gcn_pytorch.pt'")

# You can also save the entire results dictionary for later analysis
import pickle
with open('training_results.pkl', 'wb') as f:
    pickle.dump(results, f)
print("Training results saved as 'training_results.pkl'")


Model saved as 'trained_gcn_pytorch.pt'
Training results saved as 'training_results.pkl'
