In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d shay2030/processed-graphs-torch-filefor-evaluating-models --force

Dataset URL: https://www.kaggle.com/datasets/shay2030/processed-graphs-torch-filefor-evaluating-models
License(s): unknown
Downloading processed-graphs-torch-filefor-evaluating-models.zip to /content
 97% 417M/428M [00:05<00:00, 30.2MB/s]
100% 428M/428M [00:05<00:00, 88.6MB/s]


In [None]:
import os
import zipfile

zip_path = "/content/processed-graphs-torch-filefor-evaluating-models.zip"
extract_path = "/content/processed_graphs"

os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Files extracted to {extract_path}")


Files extracted to /content/processed_graphs


In [None]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [None]:
import torch
import os
file_path = os.path.join(extract_path, "processed_graphs.pt")
graphs = torch.load(file_path, weights_only = )

print(f"Loaded {len(graphs)} graphs successfully!")

  graphs = torch.load(file_path)


#GGNN

In [None]:
import torch
import torch.nn.functional as F
from torch.nn import Linear, GRU, BatchNorm1d, Dropout
from torch_geometric.nn import MessagePassing, global_mean_pool

class GGNNLayer(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super().__init__(aggr='add')
        self.lin = Linear(in_channels, out_channels)
        self.gru = GRU(out_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.lin(x)
        m = self.propagate(edge_index, x=x)
        x, _ = self.gru(m.unsqueeze(0), x.unsqueeze(0))
        return x.squeeze(0)

    def message(self, x_j):
        return x_j

class GGNN(torch.nn.Module):
    def __init__(self, num_node_features, num_global_features, hidden_dim=128, num_layers=3, dropout=0.3):
        super().__init__()
        self.num_layers = num_layers
        self.dropout = dropout
        self.layers = torch.nn.ModuleList()
        self.bns = torch.nn.ModuleList()
        self.layers.append(GGNNLayer(num_node_features, hidden_dim))
        self.bns.append(BatchNorm1d(hidden_dim))

        for _ in range(num_layers - 1):
            self.layers.append(GGNNLayer(hidden_dim, hidden_dim))
            self.bns.append(BatchNorm1d(hidden_dim))

        self.fc1 = Linear(hidden_dim + num_global_features, 128)
        self.fc2 = Linear(128, 1)

    def forward(self, x, edge_index, batch, global_features):
        for i in range(self.num_layers):
            x = self.layers[i](x, edge_index)
            x = self.bns[i](x)
            x = F.leaky_relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)

        x = global_mean_pool(x, batch)

        batch_size = batch.max().item() + 1
        global_features = global_features.view(batch_size, -1)

        x = torch.cat([x, global_features], dim=1)
        x = F.leaky_relu(self.fc1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.fc2(x)

        return x.squeeze()

In [None]:
from torch_geometric.loader import DataLoader
import torch

torch.manual_seed(42)
num_samples = len(graphs)

train_ratio, val_ratio, test_ratio = 0.8,0.1,0.1

train_size = int(train_ratio * num_samples)
val_size = int(val_ratio * num_samples)
test_size = num_samples - train_size - val_size

train_graphs = graphs[:train_size]
val_graphs = graphs[train_size:train_size + val_size]
test_graphs = graphs[train_size + val_size:]

train_loader = DataLoader(train_graphs, batch_size=32, shuffle=True)
val_loader = DataLoader(val_graphs, batch_size=32, shuffle=False)
test_loader = DataLoader(test_graphs, batch_size=32, shuffle=False)

print(f"Train samples: {len(train_graphs)}")
print(f"Validation samples: {len(val_graphs)}")
print(f"Test samples: {len(test_graphs)}")


Train samples: 80
Validation samples: 10
Test samples: 10


In [None]:
import torch
from torch.optim import Adam
from tqdm import tqdm
from torch.optim.lr_scheduler import CosineAnnealingLR

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

num_node_features = graphs[0].x.shape[1]
num_global_features = graphs[0].global_features.shape[0]

model = GGNN(num_node_features, num_global_features, hidden_dim=128).to(device)

optimizer = Adam(model.parameters(), lr=5e-4, weight_decay=5e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
loss_fn = torch.nn.MSELoss()
scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))

best_val_loss = float('inf')
patience_counter = 0
PATIENCE = 5
EPOCHS = 10

def train(model, loader):
    model.train()
    total_loss = 0

    for batch in tqdm(loader, desc="Training", leave=False):
        batch = batch.to(device)

        optimizer.zero_grad()

        with torch.amp.autocast('cuda'):
            output = model(batch.x, batch.edge_index, batch.batch, batch.global_features)
            loss = loss_fn(output, batch.y)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    scheduler.step()
    return total_loss / len(loader)

def validate(model, loader):
    model.eval()
    total_loss = 0

    with torch.no_grad(), torch.amp.autocast('cuda'):
        for batch in loader:
            batch = batch.to(device)
            output = model(batch.x, batch.edge_index, batch.batch, batch.global_features)
            loss = loss_fn(output, batch.y)
            total_loss += loss.item()

    return total_loss / len(loader)

for epoch in range(1, EPOCHS + 1):
    train_loss = train(model, train_loader)
    val_loss = validate(model, val_loader)

    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), "ggnn_model_final.pth")
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print("Early stopping triggered!")
            break




Epoch 1: Train Loss = 1.9305, Val Loss = 1.7693




Epoch 2: Train Loss = 1.5987, Val Loss = 1.7467




Epoch 3: Train Loss = 1.4937, Val Loss = 1.7052




Epoch 4: Train Loss = 1.4183, Val Loss = 1.6845




Epoch 5: Train Loss = 1.3465, Val Loss = 1.6641




Epoch 6: Train Loss = 1.2773, Val Loss = 1.6708




Epoch 7: Train Loss = 1.2122, Val Loss = 1.6557




Epoch 8: Train Loss = 1.1471, Val Loss = 1.6401




Epoch 9: Train Loss = 1.0848, Val Loss = 1.6237




Epoch 10: Train Loss = 1.0419, Val Loss = 1.6182


In [None]:
torch.save(model.state_dict, "ggnn_model_final.pth")

#Graphormer

In [None]:
print("Shape of node features:", graphs[0].x.shape)
print("Shape of labels:", graphs[0].y.shape)

Shape of node features: torch.Size([25, 6])
Shape of labels: torch.Size([1])


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import global_mean_pool, TransformerConv

class Graphormer(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_layers, global_feature_dim=0):
        super(Graphormer, self).__init__()
        self.embedding = nn.Linear(input_dim, hidden_dim)
        self.transformer_layers = nn.ModuleList([
            TransformerConv(hidden_dim, hidden_dim // num_heads, heads=num_heads)
            for _ in range(num_layers)
        ])
        self.global_pool = global_mean_pool
        self.fc = nn.Linear(hidden_dim + global_feature_dim, output_dim)
        self.droput = nn.Dropout(0.1)

    def forward(self, x, edge_index, batch, global_features=None):
     x = self.embedding(x)
     for transformer in self.transformer_layers:
        x = transformer(x, edge_index)
        x = F.relu(x)
        x = self.dropout(x)
     x = self.global_pool(x, batch)
     if global_features is not None:
        global_features = global_features.view(x.shape[0], -1)
        x = torch.cat([x, global_features], dim=1)
     x = self.fc(x)
     return x

In [None]:
import torch
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from tqdm import tqdm

def train_graphormer(model, loader, optimizer, scheduler, loss_fn, device):
    model.train()
    total_loss = 0
    for batch in tqdm(loader, desc="Training", leave=False):
        batch = batch.to(device)
        optimizer.zero_grad()
        global_features = batch.global_features if hasattr(batch, 'global_features') else None
        output = model(batch.x, batch.edge_index, batch.batch, global_features)
        loss = loss_fn(output, batch.y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        total_loss += loss.item()
    scheduler.step()
    return total_loss / len(loader)

def validate_graphormer(model, loader, loss_fn, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            global_features = batch.global_features if hasattr(batch, 'global_features') else None
            output = model(batch.x, batch.edge_index, batch.batch, global_features)
            loss = loss_fn(output, batch.y)
            total_loss += loss.item()
    return total_loss / len(loader)

if __name__ == "__main__":
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    input_dim = graphs[0].x.shape[1]
    output_dim = 1
    global_feature_dim = 0

    model = Graphormer(
        input_dim=input_dim,
        hidden_dim=128,
        output_dim=output_dim,
        num_heads=8,
        num_layers=4,
        global_feature_dim=global_feature_dim
    ).to(device)

    optimizer = Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)
    scheduler = CosineAnnealingLR(optimizer, T_max=30)
    loss_fn = nn.MSELoss()

    best_val_loss = float('inf')
    patience_counter = 0
    PATIENCE = 5
    EPOCHS = 10

    for epoch in range(1, EPOCHS + 1):
        train_loss = train_graphormer(model, train_loader, optimizer, scheduler, loss_fn, device)
        val_loss = validate_graphormer(model, val_loader, loss_fn, device)
        print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), "graphormer_model_final.pth")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print("Early stopping triggered!")
                break



Epoch 1: Train Loss = 0.2059, Val Loss = 0.0888




Epoch 2: Train Loss = 0.1591, Val Loss = 0.1343




Epoch 3: Train Loss = 0.0916, Val Loss = 0.1010




Epoch 4: Train Loss = 0.1104, Val Loss = 0.1205




Epoch 5: Train Loss = 0.1137, Val Loss = 0.0923




Epoch 6: Train Loss = 0.0822, Val Loss = 0.0934
Early stopping triggered!
