### GCN

In [18]:
import os
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.utils import from_scipy_sparse_matrix
import scipy.sparse as sp
import json

# =========================
# 1. Load and Preprocess Data
# =========================

# Load adjacency matrix, features, labels, and splits
adj = sp.load_npz('data/data/adj.npz')
feat = np.load('data/data/features.npy')
labels = np.load('data/data/labels.npy')
splits = json.load(open('data/data/splits.json'))
idx_train, idx_test = splits['idx_train'], splits['idx_test']

# Convert to PyTorch tensors
x = torch.from_numpy(feat).float()
edge_index, _ = from_scipy_sparse_matrix(adj)

# Initialize all labels as -1 and set only train labels
y = torch.full((x.shape[0],), -1, dtype=torch.long)
y[idx_train] = torch.from_numpy(labels).long()

# Convert indices to tensors
idx_train = torch.tensor(idx_train, dtype=torch.long)
idx_test = torch.tensor(idx_test, dtype=torch.long)

# Filter training indices to only valid labels
idx_train = idx_train[y[idx_train] != -1]

# =========================
# 2. Define GCN Model
# =========================

class GCN(torch.nn.Module):
    def __init__(self, num_features, hidden_dim, num_classes, dropout=0.5):
        super().__init__()
        self.conv1 = GCNConv(num_features, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, num_classes)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return x

# =========================
# 3. Training Setup
# =========================

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x, edge_index, y = x.to(device), edge_index.to(device), y.to(device)
idx_train = idx_train.to(device)
idx_test = idx_test.to(device)

num_features = x.shape[1]
num_classes = y[idx_train].max().item() + 1  # number of classes in train

model = GCN(num_features, hidden_dim=64, num_classes=num_classes, dropout=0.5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# =========================
# 4. Training Loop
# =========================

best_acc = 0
best_pred = None

for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    out = model(x, edge_index)
    loss = criterion(out[idx_train], y[idx_train])
    loss.backward()
    optimizer.step()

    # Evaluate on train set
    model.eval()
    _, pred = out.max(dim=1)
    correct = int((pred[idx_train] == y[idx_train]).sum())
    acc = correct / len(idx_train)

    if acc > best_acc:
        best_acc = acc
        best_pred = pred.detach().cpu().numpy()

    if epoch % 10 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f} | Train Acc: {acc*100:.2f}%")

# =========================
# 5. Predict and Save Results
# =========================

model.eval()
with torch.no_grad():
    out = model(x, edge_index)
    pred = out.argmax(dim=1).cpu().numpy()


# Save in current working directory
output_dir = ''  
output_path = os.path.join(output_dir, 'your_team_submission_GCN.txt')

# Save the file
np.savetxt(output_path, submission, fmt='%d %d')

# Confirm and preview results
print(f"Submission saved to: {os.path.abspath(output_path)}")
with open(output_path, 'r') as f:
    for _ in range(10):
        print(f.readline().strip())


Epoch 0 | Loss: 1.9456 | Train Acc: 16.53%
Epoch 10 | Loss: 0.2910 | Train Acc: 93.35%
Epoch 20 | Loss: 0.0755 | Train Acc: 98.59%
Epoch 30 | Loss: 0.0477 | Train Acc: 99.19%
Epoch 40 | Loss: 0.0404 | Train Acc: 99.19%
Epoch 50 | Loss: 0.0360 | Train Acc: 99.80%
Epoch 60 | Loss: 0.0449 | Train Acc: 99.60%
Epoch 70 | Loss: 0.0396 | Train Acc: 99.40%
Epoch 80 | Loss: 0.0340 | Train Acc: 100.00%
Epoch 90 | Loss: 0.0311 | Train Acc: 99.80%
Epoch 100 | Loss: 0.0318 | Train Acc: 100.00%
Epoch 110 | Loss: 0.0306 | Train Acc: 100.00%
Epoch 120 | Loss: 0.0288 | Train Acc: 99.80%
Epoch 130 | Loss: 0.0322 | Train Acc: 99.60%
Epoch 140 | Loss: 0.0270 | Train Acc: 100.00%
Epoch 150 | Loss: 0.0279 | Train Acc: 99.80%
Epoch 160 | Loss: 0.0284 | Train Acc: 99.80%
Epoch 170 | Loss: 0.0271 | Train Acc: 99.60%
Epoch 180 | Loss: 0.0247 | Train Acc: 99.80%
Epoch 190 | Loss: 0.0216 | Train Acc: 100.00%
Submission saved to: E:\Data_Mining\your_team_submission_GCN.txt
2051 6
1788 3
1233 0
926 5
2053 1
2083 3


### GraphSage

In [15]:
import os
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.utils import from_scipy_sparse_matrix
import scipy.sparse as sp
import json

# =========================
# 1. Load and Preprocess Data
# =========================

# Load adjacency matrix, features, labels, and splits
adj = sp.load_npz('data/data/adj.npz')
feat = np.load('data/data/features.npy')
labels = np.load('data/data/labels.npy')
splits = json.load(open('data/data/splits.json'))
idx_train, idx_test = splits['idx_train'], splits['idx_test']

# Convert to PyTorch tensors
x = torch.from_numpy(feat).float()
edge_index, _ = from_scipy_sparse_matrix(adj)

# Initialize all labels as -1 and assign labels for training nodes
y = torch.full((x.shape[0],), -1, dtype=torch.long)
y[idx_train] = torch.from_numpy(labels).long()

# Convert indices to tensors
idx_train = torch.tensor(idx_train, dtype=torch.long)
idx_test = torch.tensor(idx_test, dtype=torch.long)

# Filter invalid label indices
idx_train = idx_train[y[idx_train] != -1]

# =========================
# 2. Define GraphSAGE Model
# =========================

class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, dropout=0.5):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return x

# =========================
# 3. Training Setup
# =========================

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x, edge_index, y = x.to(device), edge_index.to(device), y.to(device)
idx_train = idx_train.to(device)
idx_test = idx_test.to(device)

num_features = x.shape[1]
num_classes = y[idx_train].max().item() + 1

model = GraphSAGE(num_features, hidden_channels=64, out_channels=num_classes, dropout=0.5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# =========================
# 4. Training Loop
# =========================

best_acc = 0
best_pred = None

for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    out = model(x, edge_index)
    loss = criterion(out[idx_train], y[idx_train])
    loss.backward()
    optimizer.step()

    # Train accuracy
    model.eval()
    _, pred = out.max(dim=1)
    correct = int((pred[idx_train] == y[idx_train]).sum())
    acc = correct / len(idx_train)

    if acc > best_acc:
        best_acc = acc
        best_pred = pred.detach().cpu().numpy()

    if epoch % 10 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f} | Train Acc: {acc*100:.2f}%")

# =========================
# 5. Predict and Save Results
# =========================

model.eval()
with torch.no_grad():
    out = model(x, edge_index)
    pred = out.argmax(dim=1).cpu().numpy()

# Prepare submission
# Save in current working directory
output_dir = ''  # leave empty to save in current script directory
output_path = os.path.join(output_dir, 'your_team_submission_sage.txt')

# Save the file
np.savetxt(output_path, submission, fmt='%d %d')

# Confirm and preview results
print(f"Submission saved to: {os.path.abspath(output_path)}")
with open(output_path, 'r') as f:
    for _ in range(10):
        print(f.readline().strip())



Epoch 0 | Loss: 1.9266 | Train Acc: 27.82%
Epoch 10 | Loss: 0.0900 | Train Acc: 97.38%
Epoch 20 | Loss: 0.0064 | Train Acc: 100.00%
Epoch 30 | Loss: 0.0046 | Train Acc: 99.80%
Epoch 40 | Loss: 0.0060 | Train Acc: 100.00%
Epoch 50 | Loss: 0.0081 | Train Acc: 100.00%
Epoch 60 | Loss: 0.0106 | Train Acc: 100.00%
Epoch 70 | Loss: 0.0132 | Train Acc: 100.00%
Epoch 80 | Loss: 0.0111 | Train Acc: 100.00%
Epoch 90 | Loss: 0.0101 | Train Acc: 100.00%
Epoch 100 | Loss: 0.0133 | Train Acc: 100.00%
Epoch 110 | Loss: 0.0104 | Train Acc: 100.00%
Epoch 120 | Loss: 0.0079 | Train Acc: 100.00%
Epoch 130 | Loss: 0.0093 | Train Acc: 100.00%
Epoch 140 | Loss: 0.0101 | Train Acc: 100.00%
Epoch 150 | Loss: 0.0079 | Train Acc: 100.00%
Epoch 160 | Loss: 0.0094 | Train Acc: 100.00%
Epoch 170 | Loss: 0.0069 | Train Acc: 100.00%
Epoch 180 | Loss: 0.0080 | Train Acc: 100.00%
Epoch 190 | Loss: 0.0072 | Train Acc: 100.00%
Submission saved to: E:\Data_Mining\your_team_submission_sage.txt
2051 6
1788 3
1233 0
926 5
2

### GAT

In [16]:
import os
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.utils import from_scipy_sparse_matrix
import scipy.sparse as sp
import json

# =========================
# 1. Load and Preprocess Data
# =========================

# Load adjacency matrix, features, labels, and splits
adj = sp.load_npz('data/data/adj.npz')
feat = np.load('data/data/features.npy')
labels = np.load('data/data/labels.npy')
splits = json.load(open('data/data/splits.json'))
idx_train, idx_test = splits['idx_train'], splits['idx_test']

# Convert to PyTorch tensors
x = torch.from_numpy(feat).float()
edge_index, _ = from_scipy_sparse_matrix(adj)

# Initialize label tensor with -1
y = torch.full((x.shape[0],), -1, dtype=torch.long)
y[idx_train] = torch.from_numpy(labels).long()

# Convert index arrays to torch tensors
idx_train = torch.tensor(idx_train, dtype=torch.long)
idx_test = torch.tensor(idx_test, dtype=torch.long)

# Remove any invalid label indices from training set
idx_train = idx_train[y[idx_train] != -1]

# =========================
# 2. Define GAT Model
# =========================

class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=8, dropout=0.6):
        super().__init__()
        self.gat1 = GATConv(in_channels, hidden_channels, heads=heads, dropout=dropout)
        self.gat2 = GATConv(hidden_channels * heads, out_channels, heads=1, concat=False, dropout=dropout)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.gat1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.gat2(x, edge_index)
        return x

# =========================
# 3. Training Setup
# =========================

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x, edge_index, y = x.to(device), edge_index.to(device), y.to(device)
idx_train = idx_train.to(device)
idx_test = idx_test.to(device)

num_features = x.shape[1]
num_classes = y[idx_train].max().item() + 1

model = GAT(num_features, hidden_channels=8, out_channels=num_classes, heads=8, dropout=0.6).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# =========================
# 4. Training Loop
# =========================

best_acc = 0
best_pred = None

for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    out = model(x, edge_index)
    loss = criterion(out[idx_train], y[idx_train])
    loss.backward()
    optimizer.step()

    # Evaluate on train set
    model.eval()
    _, pred = out.max(dim=1)
    correct = int((pred[idx_train] == y[idx_train]).sum())
    acc = correct / len(idx_train)

    if acc > best_acc:
        best_acc = acc
        best_pred = pred.detach().cpu().numpy()

    if epoch % 10 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f} | Train Acc: {acc*100:.2f}%")

# =========================
# 5. Predict and Save Results
# =========================

model.eval()
with torch.no_grad():
    out = model(x, edge_index)
    pred = out.argmax(dim=1).cpu().numpy()

# Prepare submission
output_dir = ''  # leave empty to save in current script directory
output_path = os.path.join(output_dir, 'your_team_submission_gat.txt')

# Save the file
np.savetxt(output_path, submission, fmt='%d %d')

# Confirm and preview results
print(f"Submission saved to: {os.path.abspath(output_path)}")
with open(output_path, 'r') as f:
    for _ in range(10):
        print(f.readline().strip())


Epoch 0 | Loss: 1.9975 | Train Acc: 15.52%
Epoch 10 | Loss: 1.1594 | Train Acc: 69.56%
Epoch 20 | Loss: 0.8810 | Train Acc: 77.42%
Epoch 30 | Loss: 0.7279 | Train Acc: 77.62%
Epoch 40 | Loss: 0.6735 | Train Acc: 78.23%
Epoch 50 | Loss: 0.6071 | Train Acc: 80.44%
Epoch 60 | Loss: 0.6539 | Train Acc: 79.84%
Epoch 70 | Loss: 0.6094 | Train Acc: 83.27%
Epoch 80 | Loss: 0.5884 | Train Acc: 81.25%
Epoch 90 | Loss: 0.5487 | Train Acc: 81.85%
Epoch 100 | Loss: 0.5980 | Train Acc: 80.04%
Epoch 110 | Loss: 0.6034 | Train Acc: 80.04%
Epoch 120 | Loss: 0.5743 | Train Acc: 81.85%
Epoch 130 | Loss: 0.5121 | Train Acc: 84.27%
Epoch 140 | Loss: 0.4923 | Train Acc: 82.66%
Epoch 150 | Loss: 0.5141 | Train Acc: 82.86%
Epoch 160 | Loss: 0.5359 | Train Acc: 82.06%
Epoch 170 | Loss: 0.5571 | Train Acc: 80.65%
Epoch 180 | Loss: 0.5273 | Train Acc: 82.86%
Epoch 190 | Loss: 0.5612 | Train Acc: 81.25%
Submission saved to: E:\Data_Mining\your_team_submission_gat.txt
2051 6
1788 3
1233 0
926 5
2053 1
2083 3
2370 

### Baseline MLP

In [17]:
import os
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.utils import from_scipy_sparse_matrix
import scipy.sparse as sp
import json

# =========================
# 1. Load and Preprocess Data
# =========================

# Load adjacency matrix, features, labels, and splits
adj = sp.load_npz('data/data/adj.npz')
feat = np.load('data/data/features.npy')
labels = np.load('data/data/labels.npy')
splits = json.load(open('data/data/splits.json'))
idx_train, idx_test = splits['idx_train'], splits['idx_test']

# Convert features to torch tensor
x = torch.from_numpy(feat).float()

# Initialize label tensor with -1
y = torch.full((x.shape[0],), -1, dtype=torch.long)
y[idx_train] = torch.from_numpy(labels).long()

# Convert index arrays to torch tensors
idx_train = torch.tensor(idx_train, dtype=torch.long)
idx_test = torch.tensor(idx_test, dtype=torch.long)

# Remove invalid label indices from training set
idx_train = idx_train[y[idx_train] != -1]

# =========================
# 2. Define MLP Model
# =========================

class MLP(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, dropout=0.5):
        super().__init__()
        self.fc1 = torch.nn.Linear(in_channels, hidden_channels)
        self.fc2 = torch.nn.Linear(hidden_channels, out_channels)
        self.dropout = dropout

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.fc2(x)
        return x

# =========================
# 3. Training Setup
# =========================

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x, y = x.to(device), y.to(device)
idx_train = idx_train.to(device)
idx_test = idx_test.to(device)

num_features = x.shape[1]
num_classes = y[idx_train].max().item() + 1

model = MLP(num_features, hidden_channels=64, out_channels=num_classes, dropout=0.5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# =========================
# 4. Training Loop
# =========================

best_acc = 0
best_pred = None

for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    out = model(x)
    loss = criterion(out[idx_train], y[idx_train])
    loss.backward()
    optimizer.step()

    # Evaluate on train set
    model.eval()
    _, pred = out.max(dim=1)
    correct = int((pred[idx_train] == y[idx_train]).sum())
    acc = correct / len(idx_train)

    if acc > best_acc:
        best_acc = acc
        best_pred = pred.detach().cpu().numpy()

    if epoch % 10 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f} | Train Acc: {acc*100:.2f}%")

# =========================
# 5. Predict and Save Results
# =========================

model.eval()
with torch.no_grad():
    out = model(x)
    pred = out.argmax(dim=1).cpu().numpy()

# Prepare submission
output_dir = ''  # leave empty to save in current script directory
output_path = os.path.join(output_dir, 'your_team_submission_mlp.txt')

# Save the file
np.savetxt(output_path, submission, fmt='%d %d')

# Confirm and preview results
print(f"Submission saved to: {os.path.abspath(output_path)}")
with open(output_path, 'r') as f:
    for _ in range(10):
        print(f.readline().strip())


Epoch 0 | Loss: 1.9420 | Train Acc: 16.53%
Epoch 10 | Loss: 0.4451 | Train Acc: 93.15%
Epoch 20 | Loss: 0.0798 | Train Acc: 98.59%
Epoch 30 | Loss: 0.0367 | Train Acc: 99.19%
Epoch 40 | Loss: 0.0248 | Train Acc: 99.60%
Epoch 50 | Loss: 0.0272 | Train Acc: 99.40%
Epoch 60 | Loss: 0.0261 | Train Acc: 100.00%
Epoch 70 | Loss: 0.0252 | Train Acc: 100.00%
Epoch 80 | Loss: 0.0282 | Train Acc: 99.80%
Epoch 90 | Loss: 0.0293 | Train Acc: 99.60%
Epoch 100 | Loss: 0.0243 | Train Acc: 100.00%
Epoch 110 | Loss: 0.0183 | Train Acc: 99.80%
Epoch 120 | Loss: 0.0225 | Train Acc: 100.00%
Epoch 130 | Loss: 0.0191 | Train Acc: 100.00%
Epoch 140 | Loss: 0.0193 | Train Acc: 99.80%
Epoch 150 | Loss: 0.0185 | Train Acc: 100.00%
Epoch 160 | Loss: 0.0277 | Train Acc: 100.00%
Epoch 170 | Loss: 0.0161 | Train Acc: 100.00%
Epoch 180 | Loss: 0.0170 | Train Acc: 100.00%
Epoch 190 | Loss: 0.0176 | Train Acc: 100.00%
Submission saved to: E:\Data_Mining\your_team_submission_mlp.txt
2051 6
1788 3
1233 0
926 5
2053 1
20