In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GATConv
from torch_geometric.loader import NeighborLoader # Import the loader
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

import os
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

In [2]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]
device = torch.device('cpu')
data = data.to(device)

In [3]:
train_loader = NeighborLoader(
    data,
    num_neighbors=[10, 10], # Sample 10 neighbors for each of the 2 layers
    batch_size=16,          # Process 16 nodes in each mini-batch
    input_nodes=data.train_mask, # Only use training nodes as seeds
)

In [4]:
class GAT(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # GAT layers are more complex, often using multiple 'heads' for attention
        # Layer 1: 8 attention heads, each outputting 8 features
        self.conv1 = GATConv(dataset.num_node_features, 8, heads=8, dropout=0.6)
        # Layer 2: 1 attention head (for final output), taking 8*8=64 input features
        self.conv2 = GATConv(8 * 8, dataset.num_classes, heads=1, concat=False, dropout=0.6)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        # Apply dropout before the first layer
        x = F.dropout(x, p=0.6, training=self.training)
        # First GAT layer with ELU activation
        x = F.elu(self.conv1(x, edge_index))
        # Apply dropout
        x = F.dropout(x, p=0.6, training=self.training)
        # Second GAT layer
        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

In [5]:
model = GAT().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

In [6]:
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
model = GAT().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

In [7]:
def train():
    model.train()
    total_loss = 0
    # Iterate over mini-batches
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        # The model now takes the batch's features and edge_index
        out = model(batch.x, batch.edge_index)
        # Calculate loss only on the batch's seed nodes
        loss = F.nll_loss(out[:batch.batch_size], batch.y[:batch.batch_size])
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

In [8]:
print("Starting GAT training with mini-batching...")
for epoch in range(1, 101): # Fewer epochs often needed with mini-batching
    loss = train()

Starting GAT training with mini-batching...


ImportError: 'NeighborSampler' requires either 'pyg-lib' or 'torch-sparse'