In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d shay2030/processed-graphs-torch-filefor-evaluating-models --force

Dataset URL: https://www.kaggle.com/datasets/shay2030/processed-graphs-torch-filefor-evaluating-models
License(s): unknown
Downloading processed-graphs-torch-filefor-evaluating-models.zip to /content
 99% 422M/428M [00:03<00:00, 171MB/s]
100% 428M/428M [00:03<00:00, 142MB/s]


In [None]:
import os
import zipfile

zip_path = "/content/processed-graphs-torch-filefor-evaluating-models.zip"
extract_path = "/content/processed_graphs"

os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Files extracted to {extract_path}")


Files extracted to /content/processed_graphs


In [None]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [None]:
import torch
file_path = os.path.join(extract_path, "processed_graphs.pt")
graphs = torch.load(file_path)

print(f"Loaded {len(graphs)} graphs successfully!")

  graphs = torch.load(file_path)


Loaded 679269 graphs successfully!


#GCN Architecture

In [None]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool, BatchNorm
import torch

class GCNModel(torch.nn.Module):
    def __init__(self, num_node_features, num_global_features, hidden_dim=64, dropout=0.3):
        super(GCNModel, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_dim)
        self.bn1 = BatchNorm(hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.bn2 = BatchNorm(hidden_dim)
        self.fc1 = torch.nn.Linear(hidden_dim + num_global_features, 64)
        self.fc2 = torch.nn.Linear(64, 1)

        self.dropout = dropout

    def forward(self, x, edge_index, batch, global_features):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.leaky_relu(x)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.leaky_relu(x)
        x = global_mean_pool(x, batch)

        batch_size = batch.max().item() + 1
        global_features = global_features.view(batch_size, -1)


        print(f"Node features shape after pooling: {x.shape}")
        print(f"Global features shape after fixing: {global_features.shape}")

        x = torch.cat([x, global_features], dim=1)
        x = F.leaky_relu(self.fc1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.fc2(x)

        return x.squeeze()


In [None]:
from torch_geometric.loader import DataLoader
import torch

torch.manual_seed(42)
num_samples = len(graphs)

train_ratio, val_ratio, test_ratio = 0.8,0.1,0.1

train_size = int(train_ratio * num_samples)
val_size = int(val_ratio * num_samples)
test_size = num_samples - train_size - val_size

train_graphs = graphs[:train_size]
val_graphs = graphs[train_size:train_size + val_size]
test_graphs = graphs[train_size + val_size:]

train_loader = DataLoader(train_graphs, batch_size=32, shuffle=True)
val_loader = DataLoader(val_graphs, batch_size=32, shuffle=False)
test_loader = DataLoader(test_graphs, batch_size=32, shuffle=False)

print(f"Train samples: {len(train_graphs)}")
print(f"Validation samples: {len(val_graphs)}")
print(f"Test samples: {len(test_graphs)}")


Train samples: 543415
Validation samples: 67926
Test samples: 67928


In [None]:
import torch
import torch.nn.functional as F
from torch.optim import AdamW
from torch.cuda.amp import GradScaler, autocast

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
scaler = GradScaler()

num_node_features = train_graphs[0].x.shape[1]
num_global_features = train_graphs[0].global_features.shape[0]

model = GCNModel(num_node_features, num_global_features).to(device)
optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)
loss_fn = torch.nn.MSELoss()

def train(model, loader):
    model.train()
    total_loss = 0

    for batch in loader:
        batch = batch.to(device)

        optimizer.zero_grad()

        with autocast():
            output = model(batch.x, batch.edge_index, batch.batch, batch.global_features)
            loss = loss_fn(output, batch.y)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    return total_loss / len(loader)

def validate(model, loader):
    model.eval()
    total_loss = 0

    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            output = model(batch.x, batch.edge_index, batch.batch, batch.global_features)
            loss = loss_fn(output, batch.y)
            total_loss += loss.item()

    return total_loss / len(loader)

num_epochs = 10
batch_size = 128

train_loader = DataLoader(train_graphs, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_graphs, batch_size=batch_size, shuffle=False)

for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader)
    val_loss = validate(model, val_loader)
    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")


  scaler = GradScaler()
  with autocast():


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])

In [None]:
torch.save(model.state_dict(), "gcn_model_final.pth")

#GAT Architecture

In [None]:
from torch_geometric.nn import GATConv

class GATModel(torch.nn.Module):
    def __init__(self, num_node_features, num_global_features, hidden_dim=64, heads=4, dropout=0.3):
        super(GATModel, self).__init__()
        self.conv1 = GATConv(num_node_features, hidden_dim, heads=heads, dropout=dropout)
        self.bn1 = BatchNorm(hidden_dim * heads)
        self.conv2 = GATConv(hidden_dim * heads, hidden_dim, heads=1, dropout=dropout)
        self.bn2 = BatchNorm(hidden_dim)
        self.fc1 = torch.nn.Linear(hidden_dim + num_global_features, 64)
        self.fc2 = torch.nn.Linear(64, 1)

        self.dropout = dropout

    def forward(self, x, edge_index, batch, global_features):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.leaky_relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.leaky_relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)

        x = global_mean_pool(x, batch)

        batch_size = batch.max().item() + 1
        global_features = global_features.view(batch_size, -1)

        print(f"Node features shape after pooling: {x.shape}")
        print(f"Global features shape after fixing: {global_features.shape}")

        x = torch.cat([x, global_features], dim=1)

        x = F.leaky_relu(self.fc1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.fc2(x)

        return x.squeeze()


In [None]:
import torch
import torch.nn.functional as F
from torch.optim import Adam

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_node_features = train_graphs[0].x.shape[1]
num_global_features = train_graphs[0].global_features.shape[0]

model = GATModel(num_node_features, num_global_features).to(device, dtype=torch.float32)
optimizer = Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.MSELoss()

def train(model, loader):
    model.train()
    total_loss = 0

    for batch in loader:
        batch = batch.to(device)

        optimizer.zero_grad()
        output = model(batch.x, batch.edge_index, batch.batch, batch.global_features)
        loss = loss_fn(output, batch.y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)

def validate(model, loader):
    model.eval()
    total_loss = 0

    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            output = model(batch.x, batch.edge_index, batch.batch, batch.global_features)
            loss = loss_fn(output, batch.y)
            total_loss += loss.item()

    return total_loss / len(loader)

num_epochs = 10

for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader)
    val_loss = validate(model, val_loader)

    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])
Global features shape after fixing: torch.Size([128, 1028])
Node features shape after pooling: torch.Size([128, 64])

In [None]:
torch.save(model.state_dict, "gat_model_final.pth")

#GraphSAGE

In [None]:
import torch
import torch.nn.functional as F
from torch.nn import BatchNorm1d
from torch_geometric.nn import SAGEConv, global_mean_pool

class GraphSAGEModel(torch.nn.Module):
    def __init__(self, num_node_features, num_global_features, hidden_dim=128, dropout=0.3):
        super(GraphSAGEModel, self).__init__()
        self.conv1 = SAGEConv(num_node_features, hidden_dim)
        self.bn1 = BatchNorm1d(hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, hidden_dim)
        self.bn2 = BatchNorm1d(hidden_dim)
        self.fc1 = torch.nn.Linear(hidden_dim + num_global_features, 128)
        self.fc2 = torch.nn.Linear(128, 1)
        self.dropout = dropout

    def forward(self, x, edge_index, batch, global_features):
     x = self.conv1(x, edge_index)
     x = self.bn1(x)
     x = F.leaky_relu(x)
     x = self.conv2(x, edge_index)
     x = self.bn2(x)
     x = F.leaky_relu(x)
     x = global_mean_pool(x, batch)

     batch_size = x.shape[0]
     global_features = global_features.view(batch_size, -1)

     x = torch.cat([x, global_features], dim=1)
     x = F.leaky_relu(self.fc1(x))
     x = F.dropout(x, p=self.dropout, training=self.training)
     x = self.fc2(x)

     return x.squeeze()


In [None]:
import torch
from torch.optim import Adam

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_node_features = train_graphs[0].x.shape[1]
num_global_features = train_graphs[0].global_features.shape[0]

model = GraphSAGEModel(num_node_features, num_global_features).to(device, dtype=torch.float32)
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
loss_fn = torch.nn.MSELoss()

def train(model, loader):
    model.train()
    total_loss = 0

    for batch in loader:
        batch = batch.to(device)

        optimizer.zero_grad()
        output = model(batch.x, batch.edge_index, batch.batch, batch.global_features)
        loss = loss_fn(output, batch.y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)

def validate(model, loader):
    model.eval()
    total_loss = 0

    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            output = model(batch.x, batch.edge_index, batch.batch, batch.global_features)
            loss = loss_fn(output, batch.y)
            total_loss += loss.item()

    return total_loss / len(loader)

num_epochs =30

for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader)
    val_loss = validate(model, val_loader)

    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")

Epoch 1: Train Loss = 1.9384, Val Loss = 1.6944
Epoch 2: Train Loss = 1.5581, Val Loss = 1.7150
Epoch 3: Train Loss = 1.4222, Val Loss = 1.6633
Epoch 4: Train Loss = 1.3404, Val Loss = 1.7304
Epoch 5: Train Loss = 1.2772, Val Loss = 1.6734
Epoch 6: Train Loss = 1.2317, Val Loss = 1.6747
Epoch 7: Train Loss = 1.1915, Val Loss = 1.7009
Epoch 8: Train Loss = 1.1542, Val Loss = 1.6977
Epoch 9: Train Loss = 1.1213, Val Loss = 1.6689
Epoch 10: Train Loss = 1.0770, Val Loss = 1.6475
Epoch 11: Train Loss = 1.0412, Val Loss = 1.6503
Epoch 12: Train Loss = 1.0134, Val Loss = 1.6688
Epoch 13: Train Loss = 0.9884, Val Loss = 1.6651
Epoch 14: Train Loss = 0.9746, Val Loss = 1.6446
Epoch 15: Train Loss = 0.9591, Val Loss = 1.6625
Epoch 16: Train Loss = 0.9480, Val Loss = 1.6814
Epoch 17: Train Loss = 0.9407, Val Loss = 1.6704
Epoch 18: Train Loss = 0.9319, Val Loss = 1.6677
Epoch 19: Train Loss = 0.9279, Val Loss = 1.6458
Epoch 20: Train Loss = 0.9206, Val Loss = 1.6680
Epoch 21: Train Loss = 0.9173

In [None]:
torch.save(model.state_dict, "gsage_model_final.pth")

#Graph Isomorphism Network (GIN)

In [None]:
import torch
import torch.nn.functional as F
from torch.nn import Sequential, Linear, ReLU, BatchNorm1d
from torch_geometric.nn import GINConv, global_mean_pool

class GINModel(torch.nn.Module):
    def __init__(self, num_node_features, num_global_features, hidden_dim=128, dropout=0.4):
        super(GINModel, self).__init__()
        self.conv1 = GINConv(Sequential(Linear(num_node_features, hidden_dim),ReLU(),Linear(hidden_dim, hidden_dim),ReLU()))
        self.bn1 = BatchNorm1d(hidden_dim)
        self.conv2 = GINConv(Sequential(Linear(hidden_dim, hidden_dim),ReLU(),Linear(hidden_dim, hidden_dim),ReLU()))
        self.bn2 = BatchNorm1d(hidden_dim)
        self.fc1 = Linear(hidden_dim + num_global_features,128)
        self.fc2 = Linear(128, 1)
        self.dropout = dropout

    def forward(self, x, edge_index, batch, global_features):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.leaky_relu(x)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.leaky_relu(x)
        x = global_mean_pool(x, batch)

        batch_size = batch.max().item() + 1
        global_features = global_features.view(batch_size, -1)

        x = torch.cat([x, global_features], dim=1)
        x = F.leaky_relu(self.fc1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.fc2(x)

        return x.squeeze()

In [None]:
import torch
import torch.nn.functional as F
from torch.optim import Adam

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_node_features = train_graphs[0].x.shape[1]
num_global_features = train_graphs[0].global_features.shape[0]

model = GINModel(num_node_features, num_global_features, hidden_dim=128, dropout=0.4).to(device, dtype=torch.float32)
optimizer = Adam(model.parameters(), lr=0.0003, weight_decay=1e-4)
loss_fn = torch.nn.MSELoss()

def train(model, loader):
    model.train()
    total_loss = 0

    for batch in loader:
        batch = batch.to(device)

        optimizer.zero_grad()
        output = model(batch.x, batch.edge_index, batch.batch, batch.global_features)
        loss = loss_fn(output, batch.y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

def validate(model, loader):
    model.eval()
    total_loss = 0

    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            output = model(batch.x, batch.edge_index, batch.batch, batch.global_features)
            loss = loss_fn(output, batch.y)
            total_loss += loss.item()

    return total_loss / len(loader)

num_epochs = 30
for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader)
    val_loss = validate(model, val_loader)
    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")

Epoch 1: Train Loss = 2.3380, Val Loss = 1.8058
Epoch 2: Train Loss = 1.7823, Val Loss = 1.6682
Epoch 3: Train Loss = 1.6425, Val Loss = 1.7804
Epoch 4: Train Loss = 1.5402, Val Loss = 1.7001
Epoch 5: Train Loss = 1.4625, Val Loss = 1.7058
Epoch 6: Train Loss = 1.4024, Val Loss = 1.7118
Epoch 7: Train Loss = 1.3565, Val Loss = 1.6765
Epoch 8: Train Loss = 1.3184, Val Loss = 1.7088
Epoch 9: Train Loss = 1.2808, Val Loss = 1.6710
Epoch 10: Train Loss = 1.2506, Val Loss = 1.6931
Epoch 11: Train Loss = 1.2265, Val Loss = 1.6732
Epoch 12: Train Loss = 1.2028, Val Loss = 1.6915
Epoch 13: Train Loss = 1.1823, Val Loss = 1.7050
Epoch 14: Train Loss = 1.1659, Val Loss = 1.6616
Epoch 15: Train Loss = 1.1525, Val Loss = 1.6727
Epoch 16: Train Loss = 1.1404, Val Loss = 1.6702
Epoch 17: Train Loss = 1.1278, Val Loss = 1.6816
Epoch 18: Train Loss = 1.1147, Val Loss = 1.6828
Epoch 19: Train Loss = 1.1030, Val Loss = 1.6709
Epoch 20: Train Loss = 1.0948, Val Loss = 1.6568
Epoch 21: Train Loss = 1.0804

In [None]:
torch.save(model.state_dict(), 'gin_model_final.pth')