# Setup


In [1]:
# === Imports ===
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as func
import scipy as sp
import scipy.sparse as sp
import networkx as nx
import pickle as pkl
from scipy.sparse.linalg import eigsh
import sys
from torch.optim import Adam


# Utility Functions

In [2]:


def parse_index_file(filename):
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


def sample_mask(idx, l):
    """Return a boolean mask of length l with True at positions in idx."""
    mask = np.zeros(l, dtype=np.bool_)   # np.bool is deprecated
    mask[idx] = True
    return mask


def scipy_coo_to_torch_sparse(coo: sp.coo_matrix, device=None, dtype=torch.float32):
    """Convert a SciPy COO matrix to a torch.sparse_coo_tensor (coalesced)."""
    coo = coo.tocoo()
    indices = np.vstack((coo.row, coo.col))
    i = torch.from_numpy(indices).long()
    v = torch.from_numpy(coo.data).to(dtype)
    shape = coo.shape
    t = torch.sparse_coo_tensor(i, v, torch.Size(shape))
    t = t.coalesce()
    return t.to(device) if device else t



def normalize_adj(adj):
    """Symmetric normalization:  D^{-1/2} A D^{-1/2}  (keeps SciPy COO)."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1)).flatten()
    d_inv_sqrt = np.power(rowsum, -0.5, where=rowsum > 0)
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.0
    D_inv_sqrt = sp.diags(d_inv_sqrt)
    return (adj.dot(D_inv_sqrt).transpose().dot(D_inv_sqrt)).tocoo()


def preprocess_adj_to_torch(adj, add_self_loops=True, device=None, dtype=torch.float32):
    """A_hat = normalize(A + I); return torch sparse."""
    if add_self_loops:
        adj = adj + sp.eye(adj.shape[0], dtype=adj.dtype, format='coo')
    adj_norm = normalize_adj(adj)
    return scipy_coo_to_torch_sparse(adj_norm, device=device, dtype=dtype)


def preprocess_features_to_dense(features, device=None, dtype=torch.float32):
    """Row-normalize features (SciPy) and return dense torch.FloatTensor [N, F]."""
    rowsum = np.array(features.sum(1)).flatten()
    r_inv = np.power(rowsum, -1, where=rowsum != 0)
    r_inv[np.isinf(r_inv)] = 0.0
    R_inv = sp.diags(r_inv)
    feats_norm = R_inv.dot(features)          # still SciPy
    # For Cora/Citeseer/Pubmed sizes, dense is fine:
    feats_dense = torch.from_numpy(feats_norm.toarray()).to(dtype)
    return feats_dense.to(device) if device else feats_dense




def load_data_as_torch(dataset_str, device=None, dtype=torch.float32):
    """
    Load citation data (Cora/Citeseer/Pubmed) and return:
      adj_torch_sparse, features_dense, y_train, y_val, y_test, train_mask, val_mask, test_mask
    where labels (y_*) are torch.FloatTensor one-hot (for your masked loss) and masks are torch.BoolTensor.
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for n in names:
        with open(f"data/ind.{dataset_str}.{n}", 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(f"data/ind.{dataset_str}.test.index")
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix isolated nodes
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    # Build feature matrix
    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]

    # Build adjacency from graph dict
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))  # SciPy sparse

    # Labels (one-hot)
    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    # Splits (standard GCN)
    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    train_mask_np = sample_mask(idx_train, labels.shape[0])
    val_mask_np = sample_mask(idx_val, labels.shape[0])
    test_mask_np = sample_mask(idx_test, labels.shape[0])

    y_train_np = np.zeros_like(labels)
    y_val_np = np.zeros_like(labels)
    y_test_np = np.zeros_like(labels)
    y_train_np[train_mask_np, :] = labels[train_mask_np, :]
    y_val_np[val_mask_np, :] = labels[val_mask_np, :]
    y_test_np[test_mask_np, :] = labels[test_mask_np, :]

    # ---- conversions to torch ----
    adj_torch = preprocess_adj_to_torch(adj, add_self_loops=True, device=device, dtype=dtype)
    features_torch = preprocess_features_to_dense(features, device=device, dtype=dtype)

    y_train = torch.from_numpy(y_train_np).to(dtype).to(device) if device else torch.from_numpy(y_train_np).to(dtype)
    y_val   = torch.from_numpy(y_val_np).to(dtype).to(device)   if device else torch.from_numpy(y_val_np).to(dtype)
    y_test  = torch.from_numpy(y_test_np).to(dtype).to(device)  if device else torch.from_numpy(y_test_np).to(dtype)

    train_mask = torch.from_numpy(train_mask_np).to(torch.bool).to(device) if device else torch.from_numpy(train_mask_np).to(torch.bool)
    val_mask   = torch.from_numpy(val_mask_np).to(torch.bool).to(device)   if device else torch.from_numpy(val_mask_np).to(torch.bool)
    test_mask  = torch.from_numpy(test_mask_np).to(torch.bool).to(device)  if device else torch.from_numpy(test_mask_np).to(torch.bool)

    return adj_torch, features_torch, y_train, y_val, y_test, train_mask, val_mask, test_mask


# Metrics

In [3]:
def masked_softmax_cross_entropy(preds, labels, mask):
    
    labels = labels.argmax(dim=1)
    loss = func.cross_entropy(preds, labels, reduction='none')

    mask = mask.float()
    mask /= mask.mean()
    loss *= mask

    return loss.mean()


def masked_accuracy(preds, labels, mask):
 

    pred_classes = torch.argmax(preds, dim=1)
    true_classes = torch.argmax(labels, dim=1)

    correct_prediction = (pred_classes == true_classes).float()

    # Convert mask to float and normalize it
    mask = mask.float()
    mask = mask / mask.mean()
    correct_prediction *= mask

    return correct_prediction.mean()

# Layers


In [4]:
class GraphConvolution(nn.Module):
    def __init__(self, in_features, out_features, bias = True):
        super(GraphConvolution,self).__init__()
        self.weight = nn.Parameter(torch.FloatTensor(in_features,out_features))
        if bias:
            self.bias = nn.Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
    
    def reset_parameters(self):
        nn.init.xavier_uniform(self.weight)
        if self.bias is not None:
            nn.init.zeros_(self.bias)
    
    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj,support) if adj.is_sparse else torch.mm(adj,support)
        if self.bias is not None:
            output += self.bias
        
        return output

class GCN(nn.Module):
    def __init__(self, in_features, hidden_dim, class_num, dropout, bias= True):
        super().__init__()
        self.gcn_1 = GraphConvolution(in_features, hidden_dim, bias)
        self.gcn_2 = GraphConvolution(hidden_dim, class_num, bias)  
        self.dropout = nn.Dropout(p=dropout)
    
    def forward(self, x, adj):
        x = func.relu(self.gcn_1(x, adj))
        x = self.dropout(x)
        x = self.gcn_2(x, adj)   # logits
        return x
    
    

# Training


In [5]:



def train_gcn(dataset='cora',
              hidden_dim=16,
              dropout=0.5,
              lr=0.01,
              weight_decay=5e-4,
              epochs=200,
              patience=10,
              seed=42):

    torch.manual_seed(seed)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


    # Load data (adj sparse torch, features dense torch, labels one-hot, masks bool)
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = \
        load_data_as_torch(dataset, device=device, dtype=torch.float32)
    
  

    num_nodes, in_features = features.shape
    num_classes = y_train.shape[1]

    model = GCN(in_features, hidden_dim, num_classes, dropout=dropout).to(device)

    
    optimizer = Adam([
    {'params': model.gcn_1.parameters(), 'weight_decay': 5e-4},  # regularized
    {'params': model.gcn_2.parameters(), 'weight_decay': 0.0}    # no regularization
    ], lr=lr)

    


    best_val = -float('inf')
    best_state = None
    wait = 0

    for epoch in range(1, epochs + 1):
        model.train()
        optimizer.zero_grad()

        logits = model(features, adj)  # [N, C]
        loss_train = masked_softmax_cross_entropy(logits, y_train, train_mask)
        acc_train = masked_accuracy(logits, y_train, train_mask)

        loss_train.backward()
        optimizer.step()

        # ---- validation ----
        model.eval()
        with torch.no_grad():
            logits = model(features, adj)
            loss_val = masked_softmax_cross_entropy(logits, y_val, val_mask)
            acc_val = masked_accuracy(logits, y_val, val_mask)

       

        # Early stopping on validation accuracy (or use -loss)
        score = acc_val.item()
        if score > best_val:
            best_val = score
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                print(f"Early stopping at epoch {epoch}. Best val acc: {best_val:.4f}")
                break

    
    if best_state is not None:
        model.load_state_dict(best_state)

    model.eval()
    with torch.no_grad():
        logits = model(features, adj)
        test_loss = masked_softmax_cross_entropy(logits, y_test, test_mask).item()
        test_acc = masked_accuracy(logits, y_test, test_mask).item()
    print(f"Test  | Loss {test_loss:.4f} Acc {test_acc:.4f}")

    return model, {'val_acc': best_val, 'test_acc': test_acc}


In [6]:
model, metrics = train_gcn(dataset='citeseer')

  objects.append(pkl.load(f, encoding='latin1'))
  nn.init.xavier_uniform(self.weight)


Early stopping at epoch 78. Best val acc: 0.7000
Test  | Loss 1.3989 Acc 0.6950


In [17]:
# Cora
model, m = train_gcn(dataset='cora')



  objects.append(pkl.load(f, encoding='latin1'))
  nn.init.xavier_uniform(self.weight)


Epoch 001 | Train Loss 1.9459 Acc 0.1786 | Val Loss 1.9449 Acc 0.1960
Epoch 002 | Train Loss 1.9409 Acc 0.2643 | Val Loss 1.9423 Acc 0.2640
Epoch 003 | Train Loss 1.9341 Acc 0.4071 | Val Loss 1.9374 Acc 0.4340
Epoch 004 | Train Loss 1.9259 Acc 0.6071 | Val Loss 1.9326 Acc 0.4580
Epoch 005 | Train Loss 1.9149 Acc 0.5357 | Val Loss 1.9294 Acc 0.3640
Epoch 006 | Train Loss 1.9093 Acc 0.5500 | Val Loss 1.9265 Acc 0.3460
Epoch 007 | Train Loss 1.9004 Acc 0.4714 | Val Loss 1.9220 Acc 0.3480
Epoch 008 | Train Loss 1.8863 Acc 0.4786 | Val Loss 1.9164 Acc 0.3540
Epoch 009 | Train Loss 1.8754 Acc 0.5500 | Val Loss 1.9100 Acc 0.4040
Epoch 010 | Train Loss 1.8651 Acc 0.5643 | Val Loss 1.9026 Acc 0.4560
Epoch 011 | Train Loss 1.8587 Acc 0.6071 | Val Loss 1.8939 Acc 0.5300
Epoch 012 | Train Loss 1.8428 Acc 0.6214 | Val Loss 1.8834 Acc 0.5880
Epoch 013 | Train Loss 1.8259 Acc 0.7429 | Val Loss 1.8734 Acc 0.6240
Epoch 014 | Train Loss 1.8109 Acc 0.6786 | Val Loss 1.8638 Acc 0.6420
Epoch 015 | Train Lo

In [18]:
#Pubmed
model, m = train_gcn(dataset='pubmed')


  objects.append(pkl.load(f, encoding='latin1'))
  nn.init.xavier_uniform(self.weight)


Epoch 001 | Train Loss 1.0966 Acc 0.5167 | Val Loss 1.0963 Acc 0.4640
Epoch 002 | Train Loss 1.0896 Acc 0.5167 | Val Loss 1.0931 Acc 0.5600
Epoch 003 | Train Loss 1.0860 Acc 0.6000 | Val Loss 1.0874 Acc 0.6860
Epoch 004 | Train Loss 1.0757 Acc 0.7000 | Val Loss 1.0798 Acc 0.6340
Epoch 005 | Train Loss 1.0676 Acc 0.7000 | Val Loss 1.0727 Acc 0.5640
Epoch 006 | Train Loss 1.0550 Acc 0.7167 | Val Loss 1.0674 Acc 0.5740
Epoch 007 | Train Loss 1.0441 Acc 0.8000 | Val Loss 1.0629 Acc 0.6300
Epoch 008 | Train Loss 1.0307 Acc 0.7500 | Val Loss 1.0584 Acc 0.6580
Epoch 009 | Train Loss 1.0344 Acc 0.7833 | Val Loss 1.0537 Acc 0.6820
Epoch 010 | Train Loss 1.0180 Acc 0.7833 | Val Loss 1.0487 Acc 0.6980
Epoch 011 | Train Loss 1.0080 Acc 0.8000 | Val Loss 1.0427 Acc 0.7040
Epoch 012 | Train Loss 0.9925 Acc 0.8167 | Val Loss 1.0354 Acc 0.7020
Epoch 013 | Train Loss 0.9831 Acc 0.8833 | Val Loss 1.0272 Acc 0.7080
Epoch 014 | Train Loss 0.9557 Acc 0.8833 | Val Loss 1.0183 Acc 0.7020
Epoch 015 | Train Lo