In [15]:
# --------------------------------------------------------------
#  PyG: Train & Evaluate GCN on Cora Dataset
# --------------------------------------------------------------

import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, GATConv
from torch_geometric.loader import DataLoader

In [3]:


# ------------------- 1. Load Dataset ---------------------------
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]  # Single graph

# Device
device = torch.device('cuda:4' if torch.cuda.is_available() else 'cpu')
data = data.to(device)

# ------------------- 2. Define GCN Model -----------------------
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x  # raw logits

In [25]:
def edge_index_to_dense_adj(edge_index, num_nodes=None, include_self_loops=True):
    if num_nodes is None:
        num_nodes = int(edge_index.max()) + 1
    
    # 初始化全零稠密矩阵
    adj = torch.zeros(num_nodes, num_nodes, dtype=torch.float, device=edge_index.device)
    
    # 填充边（自动去重 + 累加）
    row, col = edge_index
    adj[row, col] = 1.0
    
    # 无向图 → 对称化
    adj = adj + adj.t()  # 现在每条无向边权重为 2（可除以 2 归一化）
    adj = (adj > 0).float()  # 转为 0/1 邻接矩阵
    
    # （可选）加入自环
    if include_self_loops:
        adj.fill_diagonal_(1.0)
    
    return adj

# 使用示例
adj_dense = edge_index_to_dense_adj(data.edge_index, num_nodes=data.num_nodes)
adj_dense.shape

torch.Size([2708, 2708])

In [36]:
adj_dense

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 0., 1., 1.]], device='cuda:4')

In [13]:
class MLP(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.lin1 = torch.nn.Linear(in_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, out_channels)

    def forward(self, x):
        x = self.lin1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return x  # raw logits
    
class LinearModel(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x):
        x = self.lin(x)
        return x  # raw logits

In [20]:
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels=8, out_channels=None, dropout=0.6):
        super().__init__()
        # Layer 1: 8 heads × 8 feats/head → 64 total
        self.conv1 = GATConv(
            in_channels=in_channels,
            out_channels=hidden_channels,
            heads=8,
            concat=True,        # default, merge heads
            dropout=dropout,
            add_self_loops=True,
            bias=True
        )
        # Layer 2: 1 head for classification
        self.conv2 = GATConv(
            in_channels=hidden_channels * 8,
            out_channels=out_channels,
            heads=1,
            concat=False,       # average heads → scalar per class
            dropout=dropout,
            add_self_loops=True,
            bias=True
        )
        self.dropout = dropout

    def forward(self, x, edge_index):
        # ---- Layer 1 ----
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv1(x, edge_index)
        x = F.elu(x)

        # ---- Layer 2 ----
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)

        return x  # logits (for NLLLoss after log_softmax)



In [129]:
class MyGATLayerMultiHead(torch.nn.Module):
    def __init__(self, input_dim, output_dim, num_head):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.linear = torch.nn.Linear(input_dim, output_dim)
        self.leaky_relu = torch.nn.LeakyReLU()
        self.num_head = num_head
        

    def forward(self, H, A):
        # n, d -> h, n, d_h
        n, d = H.shape[0], self.output_dim
        h = self.num_head
        H = self.linear(H)

        H = H.view(n, h, d//h)
        H = H.transpose(0,1) # h, n, d_h
        Att = H @ H.transpose(1,2) # h, n, n
        Att = F.leaky_relu(Att) 
        # Att = Att * A # 这个mask 有问题, 会导致边泄漏
        Att = Att.masked_fill(A==0, -torch.inf)
        Att = F.softmax(Att, dim=1)
        H = Att @ H # h, n, d_h
        H = H.transpose(0,1)
        H = H.contiguous().view(n,d)
        return H

class MyGATMultiHead(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_head=8):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.gat_layer1 = MyGATLayerMultiHead(input_dim, hidden_dim, num_head)
        self.gat_layer2 = MyGATLayerMultiHead(hidden_dim, hidden_dim, num_head)
        self.projector = torch.nn.Linear(hidden_dim, output_dim)
        self.elu = torch.nn.ELU()
        self.dropout = torch.nn.Dropout(p=0.6)

    def forward(self, H, A):
        H = self.dropout(H)
        H = self.gat_layer1(H, A)
        H = self.elu(H)

        H = self.dropout(H)
        H = self.gat_layer2(H, A)
        H = self.elu(H)

        H = self.projector(H)

        return H

In [130]:
class MyGATLayer(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.linear = torch.nn.Linear(input_dim, output_dim)
        self.leaky_relu = torch.nn.LeakyReLU()

    def forward(self, H, A):
        H = self.linear(H)
        Att = H @ H.T
        Att = self.leaky_relu(Att)
        # Att = Att * A # 这个mask 有问题, 会导致边泄漏
        Att = Att.masked_fill(A==0, -torch.inf)
        Att = F.softmax(Att, dim=1)
        H = Att @ H
        return H

class MyGAT(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.gat_layer1 = MyGATLayer(input_dim, hidden_dim)
        self.gat_layer2 = MyGATLayer(hidden_dim, output_dim)
        self.elu = torch.nn.ELU()
        self.dropout = torch.nn.Dropout(p=0.6)

    def forward(self, H, A):
        H = self.dropout(H)
        H = self.gat_layer1(H, A)
        H = self.elu(H)

        H = self.dropout(H)
        H = self.gat_layer2(H, A)
        return H
    

In [141]:


# Hyperparameters
# hidden_channels = 16
# learning_rate = 0.01
# weight_decay = 5e-4
epochs = 200

# model = GCN(
#     in_channels=dataset.num_node_features,
#     hidden_channels=hidden_channels,
#     out_channels=dataset.num_classes
# ).to(device)


# model = MLP(
#     in_channels=dataset.num_node_features,
#     hidden_channels=hidden_channels,
#     out_channels=dataset.num_classes
# ).to(device)

# model = LinearModel(
#     in_channels=dataset.num_node_features,
#     out_channels=dataset.num_classes
# ).to(device)

# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)


# ------------------- 3. Hyperparameters ------------------------
# model = GAT(
#     in_channels=dataset.num_node_features,
#     hidden_channels=8,                    # F' = 8
#     out_channels=dataset.num_classes,
#     dropout=0.6
# ).to(device)

model = MyGAT(
    input_dim=dataset.num_node_features,
    hidden_dim=256,                    # F' = 8
    output_dim=dataset.num_classes,
).to(device)

# model = MyGATMultiHead(
#     input_dim=dataset.num_node_features,
#     hidden_dim=256,                    # F' = 8
#     output_dim=dataset.num_classes,
#     num_head = 8
# ).to(device)

# L2 regularization strength
weight_decay = 0.0005  # λ = 5e-4 for Cora & Citeseer

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=weight_decay)

Adj = edge_index_to_dense_adj(data.edge_index, num_nodes=data.num_nodes)

# ------------------- 3. Training & Eval Functions -------------
def train():
    model.train()
    optimizer.zero_grad()
    # out = model(data.x, data.edge_index)
    out = model(data.x, Adj)
    # out = model(data.x)
    loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def evaluate(mask):
    model.eval()
    logits = model(data.x, Adj)
    # logits = model(data.x)
    pred = logits[mask].max(1)[1]
    acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
    return acc

# ------------------- 4. Training Loop -------------------------
print("Starting training...")
for epoch in range(1, epochs + 1):
    loss = train()
    if epoch % 10 == 0 or epoch == 1:
        val_acc = evaluate(data.val_mask)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}')

# ------------------- 5. Final Test Evaluation -----------------
test_acc = evaluate(data.test_mask)
print(f'\nFinal Test Accuracy: {test_acc:.4f}')

Starting training...
Epoch: 001, Loss: 1.9464, Val Acc: 0.1580
Epoch: 010, Loss: 1.9150, Val Acc: 0.3760
Epoch: 020, Loss: 1.8860, Val Acc: 0.5760
Epoch: 030, Loss: 1.8390, Val Acc: 0.6660
Epoch: 040, Loss: 1.8097, Val Acc: 0.7080
Epoch: 050, Loss: 1.7621, Val Acc: 0.7440
Epoch: 060, Loss: 1.7195, Val Acc: 0.7620
Epoch: 070, Loss: 1.6851, Val Acc: 0.7740
Epoch: 080, Loss: 1.6134, Val Acc: 0.7720
Epoch: 090, Loss: 1.5640, Val Acc: 0.7760
Epoch: 100, Loss: 1.4756, Val Acc: 0.7800
Epoch: 110, Loss: 1.4317, Val Acc: 0.7840
Epoch: 120, Loss: 1.3514, Val Acc: 0.7840
Epoch: 130, Loss: 1.2166, Val Acc: 0.7800
Epoch: 140, Loss: 1.1521, Val Acc: 0.7800
Epoch: 150, Loss: 1.0278, Val Acc: 0.7800
Epoch: 160, Loss: 1.0162, Val Acc: 0.7760
Epoch: 170, Loss: 0.9658, Val Acc: 0.7800
Epoch: 180, Loss: 0.8103, Val Acc: 0.7800
Epoch: 190, Loss: 0.7730, Val Acc: 0.7800
Epoch: 200, Loss: 0.7173, Val Acc: 0.7740

Final Test Accuracy: 0.7880


In [49]:
dataset.num_node_features

1433

In [44]:
H = data.x
n, d = H.shape
linear = torch.nn.Linear(d, d).to(device)


H = linear(H)
Att = H @ H.T
Att = F.leaky_relu(Att)
Att = Att * Adj
print(Att)
Att = F.softmax(Att, dim=1)
print(Att.sum(1))
H = Att @ H


tensor([[3.5847, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 7.8410, 1.5073,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.5073, 6.6483,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 6.3402, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 5.0924, 1.0883],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.0883, 4.9292]],
       device='cuda:4', grad_fn=<MulBackward0>)
tensor([1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000], device='cuda:4',
       grad_fn=<SumBackward1>)


In [76]:
torch.nn.Parameter(torch.ones(1))

Parameter containing:
tensor([1.], requires_grad=True)

In [105]:
H = data.x
linear = torch.nn.Linear(H.shape[1], 128).to(device)


n, d = H.shape[0], 128
h = 8
H = linear(H)
H = H.view(n, h, d//h)
H = H.transpose(0,1) # h, n, d_h
Att = H @ H.transpose(1,2) # h, n, n
Att = F.leaky_relu(Att) 
# Att = Att * A # 这个mask 有问题, 会导致边泄漏
Att = Att.masked_fill(Adj==0, -torch.inf)
Att = F.softmax(Att, dim=1)
H = Att @ H # h, n, d_h
H = H.transpose(0,1)
H = H.contiguous().view(n,d)
H

tensor([[ 0.0055,  0.0516,  0.0024,  ..., -0.0196,  0.1127,  0.0521],
        [-0.0165,  0.1021,  0.0360,  ...,  0.0028,  0.0469, -0.0018],
        [ 0.0524,  0.0039,  0.0172,  ...,  0.0198, -0.0031, -0.0038],
        ...,
        [-0.0537,  0.0097,  0.0401,  ...,  0.0306,  0.0290,  0.0543],
        [ 0.0275,  0.0190, -0.0480,  ...,  0.0496,  0.0347,  0.0139],
        [ 0.0166, -0.0029, -0.0681,  ..., -0.0406,  0.0346,  0.0456]],
       device='cuda:4', grad_fn=<ViewBackward0>)

RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.


In [93]:
H.shape

torch.Size([2708, 1433])

In [98]:
[i for i in range(1,1434) if 1433 % i == 0]

[1, 1433]