### loading the data 


In [21]:
import torch

# Check if CUDA (GPU support) is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Using device:", device)
print("CUDA available:", torch.cuda.is_available())


Using device: cuda
CUDA available: True


In [4]:
!pip install torch_geometric
# !pip install torch
!pip install networkx
# !pip install torch-geometric

Collecting torch_geometric
  Using cached torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
Collecting aiohttp (from torch_geometric)
  Downloading aiohttp-3.12.13-cp310-cp310-win_amd64.whl.metadata (7.9 kB)
Collecting fsspec (from torch_geometric)
  Using cached fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)
Collecting pyparsing (from torch_geometric)
  Using cached pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)
Collecting tqdm (from torch_geometric)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp->torch_geometric)
  Using cached aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->torch_geometric)
  Using cached aiosignal-1.4.0-py3-none-any.whl.metadata (3.7 kB)
Collecting async-timeout<6.0,>=4.0 (from aiohttp->torch_geometric)
  Downloading async_timeout-5.0.1-py3-none-any.whl.metadata (5.1 kB)
Collecting attrs>=17.3.0 (from aiohttp->torch_geometric)
  Using cac

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torch 2.5.1 requires sympy==1.13.1, but you have sympy 1.13.3 which is incompatible.




In [13]:
import networkx as nx
from torch_geometric.utils import from_networkx

# Load the .graphml file
G_nx = nx.read_graphml("all_documents_newww.graphml")

# Optional: Convert node attributes to float tensors (if needed)
for node_id in G_nx.nodes:
    attrs = G_nx.nodes[node_id]
    for k, v in attrs.items():
        try:
            G_nx.nodes[node_id][k] = float(v)
        except:
            pass  # Skip non-numeric attributes

# Convert to PyTorch Geometric format
from torch_geometric.data import Data

data = from_networkx(G_nx)

# Now data is ready to be used with GAT
print(data)


Data(edge_index=[2, 0], Text=[480], ValueType=[480], EndsWithColon=[480], left_spacing=[480], right_spacing=[480], IsHorizontalNeighbourKey=[480], IsVerticalNeighbourKey=[480], Label=[480], num_nodes=480)


In [14]:
import torch
num_nodes = data.num_nodes
data.x = torch.eye(num_nodes)  # One-hot features
print(data.x)


tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])


In [15]:
data.x.shape

torch.Size([480, 480])

In [None]:
import networkx as nx
import torch
from torch_geometric.utils import from_networkx

# Step 1: Load .graphml file
G = nx.read_graphml("all_documents_newww.graphml")

# Optional: convert node attributes to float (if needed)
for node in G.nodes:
    for key, val in G.nodes[node].items():
        try:
            G.nodes[node][key] = float(val)
        except:
            pass  # Skip non-numeric attributes

# Step 2: Convert to PyTorch Geometric Data
data = from_networkx(G)

# If node features are missing, create identity or random features
if not hasattr(data, 'x'):
    num_nodes = data.num_nodes
    data.x = torch.eye(num_nodes)  # one-hot as fallback
    # Or use: data.x = torch.rand(num_nodes, feature_dim)
# print(data.x)
# Step 3: Save to .pt file
torch.save(data, "graph_data.pt")
print("Saved as graph_data.pt")


None
Saved as graph_data.pt


In [15]:
import torch
from torch_geometric.data import Data
data = torch.load("graph_data.pt",weights_only=False)



In [35]:
# print(data)
print(data.edge_index)
# there is no edge index in the data

tensor([], size=(2, 0), dtype=torch.int64)


In [None]:
data.ValueType[0] # alphanumeric, # numeric etc.

'[0, 0, 0, 0, 0, 0, 0, 1, 0]'

### model testing

In [22]:
import json
import torch
import matplotlib.pyplot as plt
import pandas as pd
from torch.nn import CrossEntropyLoss
from torch_geometric.loader import DataLoader
from torch_geometric.nn.models import GAT
import os
from torch_geometric.data import Data
from torch_geometric.data.data import DataEdgeAttr, DataTensorAttr
from torch_geometric.data.storage import GlobalStorage
import torch.serialization

In [41]:
!pip install torch-scatter -f https://data.pyg.org/whl/torch-2.0.0+cu118.html

Looking in links: https://data.pyg.org/whl/torch-2.0.0+cu118.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcu118/torch_scatter-2.1.2%2Bpt20cu118-cp310-cp310-win_amd64.whl (3.7 MB)
     ---------------------------------------- 0.0/3.7 MB ? eta -:--:--
     ---------------------------------------- 0.0/3.7 MB ? eta -:--:--
     ---------------------------------------- 0.0/3.7 MB ? eta -:--:--
     ---------------------------------------- 0.0/3.7 MB ? eta -:--:--
     -- ------------------------------------- 0.3/3.7 MB ? eta -:--:--
     -- ------------------------------------- 0.3/3.7 MB ? eta -:--:--
     -------- ------------------------------- 0.8/3.7 MB 1.8 MB/s eta 0:00:02
     ---------------- ----------------------- 1.6/3.7 MB 2.5 MB/s eta 0:00:01
     --------------------------------- ------ 3.1/3.7 MB 3.6 MB/s eta 0:00:01
     ---------------------------------------- 3.7/3.7 MB 3.8 MB/s eta 0:00:00
Installing collected packages: torch-scatter


In [None]:
# ✅ NodeFormer-style Graph Transformer for Node-Level Classification

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import knn_graph
from torch_scatter import scatter_mean

class NodeFormerLayer(nn.Module):
    def __init__(self, in_dim, out_dim, k=16):
        super().__init__()
        self.k = k
        self.attn_proj = nn.Linear(in_dim, out_dim)
        self.val_proj = nn.Linear(in_dim, out_dim)
        self.out_proj = nn.Linear(out_dim, out_dim)

    def forward(self, x, batch):
        # x: [N, F]  -- node features
        # batch: [N] -- batch IDs

        edge_index = knn_graph(x, self.k, batch=batch, loop=False)
        row, col = edge_index

        # Attention score between i and j
        q = self.attn_proj(x)  # [N, D]
        v = self.val_proj(x)

        attn_score = (q[row] * q[col]).sum(dim=-1) / (q.size(-1) ** 0.5)  # [E]
        attn_score = F.softmax(attn_score, dim=0)

        # Weighted aggregation
        out = attn_score.unsqueeze(-1) * v[col]  # [E, D]
        out = scatter_mean(out, row, dim=0, dim_size=x.size(0))  # [N, D]

        return self.out_proj(out) + x  # Residual


class NodeFormer(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, num_layers=2, k=16):
        super().__init__()
        self.input_proj = nn.Linear(in_dim, hidden_dim)
        self.layers = nn.ModuleList([
            NodeFormerLayer(hidden_dim, hidden_dim, k=k)
            for _ in range(num_layers)
        ])
        self.classifier = nn.Linear(hidden_dim, out_dim)

    def forward(self, x, batch):
        x = self.input_proj(x)
        for layer in self.layers:
            x = layer(x, batch)
        return self.classifier(x)


# # Example usage:
# if __name__ == '__main__':
#     from torch_geometric.datasets import Planetoid
#     from torch_geometric.loader import DataLoader
#     from torch_geometric.utils import to_dense_batch
    
#     dataset = Planetoid(root="./data", name="Cora")
#     data = dataset[0]

#     model = NodeFormer(in_dim=dataset.num_node_features, hidden_dim=64, out_dim=dataset.num_classes)
#     optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
#     criterion = nn.CrossEntropyLoss()

#     model.train()
#     for epoch in range(100):
#         optimizer.zero_grad()
#         out = model(data.x, batch=torch.zeros_like(data.y))
#         loss = criterion(out[data.train_mask], data.y[data.train_mask])
#         loss.backward()
#         optimizer.step()
#         print(f"Epoch {epoch} | Loss: {loss.item():.4f}")


OSError: [WinError 127] The specified procedure could not be found

In [None]:

def smooth_curve(data, weight=0.9):
    smoothed = []
    last = data[0]
    for point in data:
        smoothed_val = last * weight + (1 - weight) * point
        smoothed.append(smoothed_val)
        last = smoothed_val
    return smoothed


def train_single_config(config, train_loader, val_loader, in_channels, num_classes, run_name, model_dir, results_dir, plots_dir):
    model = GAT(
        in_channels=in_channels,
        hidden_channels=config['hidden_channels'],
        num_layers=config['num_layers'],
        out_channels=num_classes,
        dropout=config['dropout'],
        heads=config['heads'],
        v2=True,
        edge_dim=1,
        jk='lstm'
    )

    all_labels = torch.cat([data.y for data in train_loader.dataset])
    class_counts = torch.bincount(all_labels, minlength=num_classes)
    class_weights = 1.0 / (class_counts.float() + 1e-6)
    class_weights = class_weights / class_weights.sum()

    criterion = CrossEntropyLoss(weight=class_weights)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)

    training_loss, validation_loss, validation_acc = [], [], []

    for epoch in range(500):
        model.train()
        total_loss = 0
        for data in train_loader:
            optimizer.zero_grad()
            # out = model(data.x, data.edge_index, edge_weight=data.edge_attr)
            model = NodeFormer(in_dim=data.x, hidden_dim=64, out_dim=4)
            loss = criterion(out, data.y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_train_loss = total_loss / len(train_loader)
        training_loss.append(avg_train_loss)

        model.eval()
        val_total_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for data in val_loader:
                out = model(data.x, data.edge_index, edge_weight=data.edge_attr)
                loss = criterion(out, data.y)
                val_total_loss += loss.item()
                pred = out.argmax(dim=1)
                correct += (pred == data.y).sum().item()
                total += data.y.size(0)

        avg_val_loss = val_total_loss / len(val_loader)
        val_accuracy = correct / total
        validation_loss.append(avg_val_loss)
        validation_acc.append(val_accuracy)

        scheduler.step(avg_val_loss)

        print(f"Epoch {epoch + 1:03d} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_accuracy:.4f}")

    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(results_dir, exist_ok=True)
    os.makedirs(plots_dir, exist_ok=True)

    model_path = os.path.join(model_dir, f"{run_name}.pth")
    csv_path = os.path.join(results_dir, f"{run_name}.csv")
    plot_path = os.path.join(plots_dir, f"{run_name}.png")

    torch.save(model.state_dict(), model_path)

    df = pd.DataFrame({
        'Epoch': list(range(1, len(training_loss)+1)),
        'TrainLoss': training_loss,
        'ValLoss': validation_loss,
        'ValAcc': validation_acc
    })
    df.to_csv(csv_path, index=False)

    plt.figure()
    plt.plot(smooth_curve(training_loss), label='Train')
    plt.plot(validation_loss, label='Val')
    plt.title(run_name)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(plot_path)
    plt.close()




In [9]:
!pip install optuna

Collecting optuna
  Using cached optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.3-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Using cached colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.41-cp310-cp310-win_amd64.whl.metadata (9.8 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Using cached mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.2.3-cp310-cp310-win_amd64.whl.metadata (4.2 kB)
Using cached optuna-4.4.0-py3-none-any.whl (395 kB)
Downloading alembic-1.16.3-py3-none-any.whl (246 kB)
Downloading sqlalchemy-2.0.41-cp310-cp310-win_amd64.whl (2.1 MB)
   ---------------------------------------- 0.0/2.1 MB ? eta -:--:--
   ---------------------------------- ----- 1.8/2.1 MB 10.0 MB/s eta 0:00:01
   ---------------------------------------- 2.

In [23]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [35]:
data_list = torch.load("datacheckpoint_training_(15).pt", map_location='cuda', weights_only=False)
data_list

[Data(x=[103, 18], edge_index=[2, 79], edge_attr=[79, 1], y=[103]),
 Data(x=[111, 18], edge_index=[2, 84], edge_attr=[84, 1], y=[111]),
 Data(x=[35, 18], edge_index=[2, 25], edge_attr=[25, 1], y=[35]),
 Data(x=[76, 18], edge_index=[2, 56], edge_attr=[56, 1], y=[76]),
 Data(x=[256, 18], edge_index=[2, 194], edge_attr=[194, 1], y=[256]),
 Data(x=[48, 18], edge_index=[2, 36], edge_attr=[36, 1], y=[48]),
 Data(x=[92, 18], edge_index=[2, 70], edge_attr=[70, 1], y=[92]),
 Data(x=[138, 18], edge_index=[2, 106], edge_attr=[106, 1], y=[138]),
 Data(x=[40, 18], edge_index=[2, 30], edge_attr=[30, 1], y=[40]),
 Data(x=[81, 18], edge_index=[2, 60], edge_attr=[60, 1], y=[81]),
 Data(x=[113, 18], edge_index=[2, 86], edge_attr=[86, 1], y=[113]),
 Data(x=[185, 18], edge_index=[2, 143], edge_attr=[143, 1], y=[185]),
 Data(x=[85, 18], edge_index=[2, 64], edge_attr=[64, 1], y=[85]),
 Data(x=[73, 18], edge_index=[2, 56], edge_attr=[56, 1], y=[73]),
 Data(x=[54, 18], edge_index=[2, 41], edge_attr=[41, 1], y

In [29]:
data_list[0].x.size(1)

18

In [13]:
import torch
import json
import os
import pandas as pd
import matplotlib.pyplot as plt
import optuna
from torch.nn import CrossEntropyLoss
from torch_geometric.loader import DataLoader
from torch_geometric.data import Data

# from your_model_file import GAT  # Replace with actual import
# from your_utils import smooth_curve  # Replace if defined elsewhere

def smooth_curve(data, weight=0.9):
    smoothed = []
    last = data[0]
    for point in data:
        smoothed_val = last * weight + (1 - weight) * point
        smoothed.append(smoothed_val)
        last = smoothed_val
    return smoothed



# Load data 
with torch.serialization.safe_globals([Data]):
    data_list = torch.load("datacheckpoint_training_(15).pt", map_location='cuda', weights_only=False)

labels = json.load(open("label_encoding.json"))
batch_size = 1

train_split = int(len(data_list) * 0.8)
train_data = data_list[:train_split]
val_data = data_list[train_split:]

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)

in_channels = data_list[0].x.size(1)
# in_channels =18
num_classes = len(labels)

model_dir = "C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\GAT-test\\models"
results_dir = "C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\GAT-test\\results"
plots_dir = "C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\GAT-test\\plots"
os.makedirs(model_dir, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)
os.makedirs(plots_dir, exist_ok=True)

def objective(trial):
    config = {
        'hidden_channels': trial.suggest_categorical('hidden_channels', [64, 128, 256]),
        'num_layers': trial.suggest_int('num_layers', 1, 3),
        'heads': trial.suggest_categorical('heads', [1, 2, 4, 8]),
        'dropout': trial.suggest_float('dropout', 0.0, 0.5),
    }

    model = GAT(
        in_channels=in_channels,
        hidden_channels=config['hidden_channels'],
        num_layers=config['num_layers'],
        out_channels=num_classes,
        dropout=config['dropout'],
        heads=config['heads'],
        v2=True,
        edge_dim=1,
        jk='lstm'
    ).to(device)  # 🚀 Move model to GPU

    all_labels = torch.cat([data.y for data in train_loader.dataset])
    class_counts = torch.bincount(all_labels, minlength=num_classes)
    class_weights = 1.0 / (class_counts.float() + 1e-6)
    class_weights = class_weights / class_weights.sum()
    class_weights = class_weights.to(device)  # 🎯 Move weights to GPU

    criterion = CrossEntropyLoss(weight=class_weights)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)

    best_val_acc = 0
    training_loss, validation_loss, validation_acc = [], [], []

    for epoch in range(500):
        model.train()
        total_loss = 0
        for data in train_loader:
            data = data.to(device)  #  Move batch to GPU
            optimizer.zero_grad()
            out = model(data.x, data.edge_index, edge_weight=data.edge_attr)
            loss = criterion(out, data.y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_train_loss = total_loss / len(train_loader)
        training_loss.append(avg_train_loss)

        model.eval()
        val_loss = 0
        correct, total = 0, 0
        with torch.no_grad():
            for data in val_loader:
                data = data.to(device)  # 🚀 Move validation data to GPU
                out = model(data.x, data.edge_index, edge_weight=data.edge_attr)
                loss = criterion(out, data.y)
                val_loss += loss.item()
                pred = out.argmax(dim=1)
                correct += (pred == data.y).sum().item()
                total += data.y.size(0)

        avg_val_loss = val_loss / len(val_loader)
        val_acc = correct / total
        validation_loss.append(avg_val_loss)
        validation_acc.append(val_acc)

        scheduler.step(avg_val_loss)
        print(f"Epoch {epoch + 1:03d} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
        trial.report(val_acc, epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val_acc:
            best_val_acc = val_acc

            run_name = f"BestTrial_H{config['hidden_channels']}_L{config['num_layers']}_HD{config['heads']}_DO{int(config['dropout']*10)}"
            torch.save(model.state_dict(), os.path.join(model_dir, f"{run_name}.pth"))

            df = pd.DataFrame({
                'Epoch': list(range(1, len(training_loss)+1)),
                'TrainLoss': training_loss,
                'ValLoss': validation_loss,
                'ValAcc': validation_acc
            })
            df.to_csv(os.path.join(results_dir, f"{run_name}.csv"), index=False)

            plt.figure()
            plt.plot(smooth_curve(training_loss), label='Train')
            plt.plot(validation_loss, label='Val')
            plt.title(run_name)
            plt.xlabel('Epoch')
            plt.ylabel('Loss')
            plt.legend()
            plt.savefig(os.path.join(plots_dir, f"{run_name}.png"))
            plt.close()

    return best_val_acc


In [25]:


# with torch.serialization.safe_globals([Data, DataEdgeAttr, DataTensorAttr, GlobalStorage]):
#     data_list = torch.load(f"DatacheckpointNew_Training.pt", map_location='cpu')

# labels = json.load(open("label_encoding.json"))
# batch_size = 1

# train_split = int(len(data_list) * 0.8)
# train_data = data_list[:train_split]
# val_data = data_list[train_split:]

# train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
# val_loader = DataLoader(val_data, batch_size=batch_size)

# in_channels = data_list[0].x.size(1)
# num_classes = len(labels)
# # num_classes = 4

#     # 🔧 Use only one configuration here:
# config = {
#         'hidden_channels':256,
#         'num_layers': 2,
#         'heads':8,
#         'dropout': 0.2
#     }

# run_name = f"SingleRun_H{config['hidden_channels']}_L{config['num_layers']}_HD{config['heads']}_DO{int(config['dropout']*10)}_Updated"

# model_dir = "C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\models"
# results_dir = "C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\results"
# plots_dir = "C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\plots"

# print(f"\n🚀 Starting {run_name}")
# train_single_config(config, train_loader, val_loader, in_channels, num_classes, run_name, model_dir, results_dir, plots_dir)


In [None]:
# ✅ NodeFormer-style Graph Transformer for Node-Level Classification

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import knn_graph
from torch_scatter import scatter_mean

class NodeFormerLayer(nn.Module):
    def __init__(self, in_dim, out_dim, k=16):
        super().__init__()
        self.k = k
        self.attn_proj = nn.Linear(in_dim, out_dim)
        self.val_proj = nn.Linear(in_dim, out_dim)
        self.out_proj = nn.Linear(out_dim, out_dim)

    def forward(self, x, batch):
        # x: [N, F]  -- node features
        # batch: [N] -- batch IDs

        edge_index = knn_graph(x, self.k, batch=batch, loop=False)
        row, col = edge_index

        # Attention score between i and j
        q = self.attn_proj(x)  # [N, D]
        v = self.val_proj(x)

        attn_score = (q[row] * q[col]).sum(dim=-1) / (q.size(-1) ** 0.5)  # [E]
        attn_score = F.softmax(attn_score, dim=0)

        # Weighted aggregation
        out = attn_score.unsqueeze(-1) * v[col]  # [E, D]
        out = scatter_mean(out, row, dim=0, dim_size=x.size(0))  # [N, D]

        return self.out_proj(out) + x  # Residual


class NodeFormer(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, num_layers=2, k=16):
        super().__init__()
        self.input_proj = nn.Linear(in_dim, hidden_dim)
        self.layers = nn.ModuleList([
            NodeFormerLayer(hidden_dim, hidden_dim, k=k)
            for _ in range(num_layers)
        ])
        self.classifier = nn.Linear(hidden_dim, out_dim)

    def forward(self, x, batch):
        x = self.input_proj(x)
        for layer in self.layers:
            x = layer(x, batch)
        return self.classifier(x)


# # Example usage:
# if __name__ == '__main__':
#     from torch_geometric.datasets import Planetoid
#     from torch_geometric.loader import DataLoader
#     from torch_geometric.utils import to_dense_batch
    
#     dataset = Planetoid(root="./data", name="Cora")
#     data = dataset[0]

#     model = NodeFormer(in_dim=dataset.num_node_features, hidden_dim=64, out_dim=dataset.num_classes)
#     optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
#     criterion = nn.CrossEntropyLoss()

#     model.train()
#     for epoch in range(100):
#         optimizer.zero_grad()
#         out = model(data.x, batch=torch.zeros_like(data.y))
#         loss = criterion(out[data.train_mask], data.y[data.train_mask])
#         loss.backward()
#         optimizer.step()
#         print(f"Epoch {epoch} | Loss: {loss.item():.4f}")


In [6]:
print(type(data_list))

<class 'list'>


In [26]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

print("Best Trial:")
print("  Accuracy:", study.best_trial.value)
print("  Params:")
for k, v in study.best_trial.params.items():
    print(f"    {k}: {v}")


[I 2025-07-09 13:20:35,716] A new study created in memory with name: no-name-0a68bcc5-fc02-4ea8-b897-5eccf8604221


Epoch 001 | Train Loss: 1.3960 | Val Loss: 1.3876
Epoch 002 | Train Loss: 1.3900 | Val Loss: 1.3786
Epoch 003 | Train Loss: 1.3817 | Val Loss: 1.3713
Epoch 004 | Train Loss: 1.3728 | Val Loss: 1.3639
Epoch 005 | Train Loss: 1.3705 | Val Loss: 1.3588
Epoch 006 | Train Loss: 1.3672 | Val Loss: 1.3502
Epoch 007 | Train Loss: 1.3633 | Val Loss: 1.3441
Epoch 008 | Train Loss: 1.3555 | Val Loss: 1.3378
Epoch 009 | Train Loss: 1.3491 | Val Loss: 1.3302
Epoch 010 | Train Loss: 1.3366 | Val Loss: 1.3179
Epoch 011 | Train Loss: 1.3324 | Val Loss: 1.3026
Epoch 012 | Train Loss: 1.3190 | Val Loss: 1.2843
Epoch 013 | Train Loss: 1.3064 | Val Loss: 1.2622
Epoch 014 | Train Loss: 1.2869 | Val Loss: 1.2319
Epoch 015 | Train Loss: 1.2542 | Val Loss: 1.1926
Epoch 016 | Train Loss: 1.2258 | Val Loss: 1.1429
Epoch 017 | Train Loss: 1.1922 | Val Loss: 1.0888
Epoch 018 | Train Loss: 1.1570 | Val Loss: 1.0191
Epoch 019 | Train Loss: 1.0900 | Val Loss: 0.9348
Epoch 020 | Train Loss: 1.0405 | Val Loss: 0.8658


[I 2025-07-09 13:22:47,848] Trial 0 finished with value: 0.8530927835051546 and parameters: {'hidden_channels': 64, 'num_layers': 3, 'heads': 8, 'dropout': 0.28083252386108903}. Best is trial 0 with value: 0.8530927835051546.


Epoch 500 | Train Loss: 0.4221 | Val Loss: 0.2873
Epoch 001 | Train Loss: 1.3766 | Val Loss: 1.3343
Epoch 002 | Train Loss: 1.3523 | Val Loss: 1.3174
Epoch 003 | Train Loss: 1.3233 | Val Loss: 1.2833
Epoch 004 | Train Loss: 1.2940 | Val Loss: 1.2415
Epoch 005 | Train Loss: 1.2380 | Val Loss: 1.1560
Epoch 006 | Train Loss: 1.1619 | Val Loss: 0.9740
Epoch 007 | Train Loss: 1.0203 | Val Loss: 0.7354
Epoch 008 | Train Loss: 0.8424 | Val Loss: 0.5793
Epoch 009 | Train Loss: 0.7862 | Val Loss: 0.5016
Epoch 010 | Train Loss: 0.7175 | Val Loss: 0.4365
Epoch 011 | Train Loss: 0.6792 | Val Loss: 0.4164
Epoch 012 | Train Loss: 0.6486 | Val Loss: 0.3937
Epoch 013 | Train Loss: 0.6335 | Val Loss: 0.3775
Epoch 014 | Train Loss: 0.6316 | Val Loss: 0.3697
Epoch 015 | Train Loss: 0.6129 | Val Loss: 0.3620
Epoch 016 | Train Loss: 0.6075 | Val Loss: 0.3580
Epoch 017 | Train Loss: 0.5921 | Val Loss: 0.3481
Epoch 018 | Train Loss: 0.5971 | Val Loss: 0.3543
Epoch 019 | Train Loss: 0.5766 | Val Loss: 0.3374


[I 2025-07-09 13:24:29,298] Trial 1 finished with value: 0.8608247422680413 and parameters: {'hidden_channels': 256, 'num_layers': 2, 'heads': 2, 'dropout': 0.14019800291543955}. Best is trial 1 with value: 0.8608247422680413.


Epoch 499 | Train Loss: 0.4003 | Val Loss: 0.2776
Epoch 500 | Train Loss: 0.3910 | Val Loss: 0.2776
Epoch 001 | Train Loss: 1.3885 | Val Loss: 1.3702
Epoch 002 | Train Loss: 1.3809 | Val Loss: 1.3641
Epoch 003 | Train Loss: 1.3750 | Val Loss: 1.3565
Epoch 004 | Train Loss: 1.3688 | Val Loss: 1.3502
Epoch 005 | Train Loss: 1.3622 | Val Loss: 1.3403
Epoch 006 | Train Loss: 1.3532 | Val Loss: 1.3290
Epoch 007 | Train Loss: 1.3429 | Val Loss: 1.3147
Epoch 008 | Train Loss: 1.3291 | Val Loss: 1.2963
Epoch 009 | Train Loss: 1.3098 | Val Loss: 1.2715
Epoch 010 | Train Loss: 1.2859 | Val Loss: 1.2353
Epoch 011 | Train Loss: 1.2488 | Val Loss: 1.1885
Epoch 012 | Train Loss: 1.2059 | Val Loss: 1.1237
Epoch 013 | Train Loss: 1.1439 | Val Loss: 1.0291
Epoch 014 | Train Loss: 1.0557 | Val Loss: 0.8864
Epoch 015 | Train Loss: 0.9380 | Val Loss: 0.7534
Epoch 016 | Train Loss: 0.8179 | Val Loss: 0.6034
Epoch 017 | Train Loss: 0.7453 | Val Loss: 0.5230
Epoch 018 | Train Loss: 0.6833 | Val Loss: 0.4767


[I 2025-07-09 13:26:32,279] Trial 2 finished with value: 0.8737113402061856 and parameters: {'hidden_channels': 64, 'num_layers': 3, 'heads': 2, 'dropout': 0.004153706565613036}. Best is trial 2 with value: 0.8737113402061856.


Epoch 500 | Train Loss: 0.3500 | Val Loss: 0.2800
Epoch 001 | Train Loss: 1.3912 | Val Loss: 1.3785
Epoch 002 | Train Loss: 1.3753 | Val Loss: 1.3626
Epoch 003 | Train Loss: 1.3612 | Val Loss: 1.3458
Epoch 004 | Train Loss: 1.3479 | Val Loss: 1.3287
Epoch 005 | Train Loss: 1.3346 | Val Loss: 1.3010
Epoch 006 | Train Loss: 1.3093 | Val Loss: 1.2709
Epoch 007 | Train Loss: 1.2802 | Val Loss: 1.2277
Epoch 008 | Train Loss: 1.2454 | Val Loss: 1.1787
Epoch 009 | Train Loss: 1.1912 | Val Loss: 1.0889
Epoch 010 | Train Loss: 1.1151 | Val Loss: 0.9570
Epoch 011 | Train Loss: 0.9871 | Val Loss: 0.7969
Epoch 012 | Train Loss: 0.8529 | Val Loss: 0.6456
Epoch 013 | Train Loss: 0.7588 | Val Loss: 0.5326
Epoch 014 | Train Loss: 0.6999 | Val Loss: 0.4726
Epoch 015 | Train Loss: 0.6593 | Val Loss: 0.4378
Epoch 016 | Train Loss: 0.6376 | Val Loss: 0.4348
Epoch 017 | Train Loss: 0.6017 | Val Loss: 0.4055
Epoch 018 | Train Loss: 0.5854 | Val Loss: 0.3899
Epoch 019 | Train Loss: 0.5640 | Val Loss: 0.3823


[I 2025-07-09 13:28:14,279] Trial 3 finished with value: 0.8711340206185567 and parameters: {'hidden_channels': 128, 'num_layers': 2, 'heads': 2, 'dropout': 0.041283092851097325}. Best is trial 2 with value: 0.8737113402061856.


Epoch 500 | Train Loss: 0.3509 | Val Loss: 0.2679
Epoch 001 | Train Loss: 1.3818 | Val Loss: 1.3705
Epoch 002 | Train Loss: 1.3734 | Val Loss: 1.3615
Epoch 003 | Train Loss: 1.3642 | Val Loss: 1.3503
Epoch 004 | Train Loss: 1.3553 | Val Loss: 1.3366
Epoch 005 | Train Loss: 1.3419 | Val Loss: 1.3222
Epoch 006 | Train Loss: 1.3295 | Val Loss: 1.3054
Epoch 007 | Train Loss: 1.3095 | Val Loss: 1.2765
Epoch 008 | Train Loss: 1.2825 | Val Loss: 1.2347
Epoch 009 | Train Loss: 1.2448 | Val Loss: 1.1822
Epoch 010 | Train Loss: 1.1902 | Val Loss: 1.0801
Epoch 011 | Train Loss: 1.0903 | Val Loss: 0.9053
Epoch 012 | Train Loss: 0.9662 | Val Loss: 0.7612
Epoch 013 | Train Loss: 0.8584 | Val Loss: 0.6068
Epoch 014 | Train Loss: 0.7744 | Val Loss: 0.5246
Epoch 015 | Train Loss: 0.7348 | Val Loss: 0.4753
Epoch 016 | Train Loss: 0.6860 | Val Loss: 0.4527
Epoch 017 | Train Loss: 0.6717 | Val Loss: 0.4307
Epoch 018 | Train Loss: 0.6441 | Val Loss: 0.4152
Epoch 019 | Train Loss: 0.6297 | Val Loss: 0.3985


[I 2025-07-09 13:30:33,057] Trial 4 finished with value: 0.865979381443299 and parameters: {'hidden_channels': 128, 'num_layers': 3, 'heads': 4, 'dropout': 0.08093558102389614}. Best is trial 2 with value: 0.8737113402061856.


Epoch 500 | Train Loss: 0.3502 | Val Loss: 0.2785


[I 2025-07-09 13:30:33,292] Trial 5 pruned. 


Epoch 001 | Train Loss: 1.3931 | Val Loss: 1.3883
Epoch 001 | Train Loss: 1.3905 | Val Loss: 1.3704
Epoch 002 | Train Loss: 1.3728 | Val Loss: 1.3527
Epoch 003 | Train Loss: 1.3571 | Val Loss: 1.3338
Epoch 004 | Train Loss: 1.3385 | Val Loss: 1.3125
Epoch 005 | Train Loss: 1.3225 | Val Loss: 1.2849
Epoch 006 | Train Loss: 1.2986 | Val Loss: 1.2542
Epoch 007 | Train Loss: 1.2690 | Val Loss: 1.2185
Epoch 008 | Train Loss: 1.2393 | Val Loss: 1.1718
Epoch 009 | Train Loss: 1.1982 | Val Loss: 1.1224
Epoch 010 | Train Loss: 1.1493 | Val Loss: 1.0659


[I 2025-07-09 13:30:36,303] Trial 6 pruned. 


Epoch 011 | Train Loss: 1.1066 | Val Loss: 1.0037
Epoch 012 | Train Loss: 1.0612 | Val Loss: 0.9367
Epoch 001 | Train Loss: 1.4053 | Val Loss: 1.3971
Epoch 002 | Train Loss: 1.3949 | Val Loss: 1.3827
Epoch 003 | Train Loss: 1.3874 | Val Loss: 1.3695
Epoch 004 | Train Loss: 1.3810 | Val Loss: 1.3576
Epoch 005 | Train Loss: 1.3768 | Val Loss: 1.3457
Epoch 006 | Train Loss: 1.3617 | Val Loss: 1.3334
Epoch 007 | Train Loss: 1.3588 | Val Loss: 1.3238
Epoch 008 | Train Loss: 1.3431 | Val Loss: 1.3122
Epoch 009 | Train Loss: 1.3265 | Val Loss: 1.3017
Epoch 010 | Train Loss: 1.3176 | Val Loss: 1.2896


[I 2025-07-09 13:30:39,167] Trial 7 pruned. 


Epoch 011 | Train Loss: 1.3093 | Val Loss: 1.2779
Epoch 012 | Train Loss: 1.3129 | Val Loss: 1.2650


[I 2025-07-09 13:30:39,487] Trial 8 pruned. 


Epoch 001 | Train Loss: 1.3937 | Val Loss: 1.3502
Epoch 001 | Train Loss: 1.3812 | Val Loss: 1.3602
Epoch 002 | Train Loss: 1.3726 | Val Loss: 1.3428
Epoch 003 | Train Loss: 1.3586 | Val Loss: 1.3247
Epoch 004 | Train Loss: 1.3443 | Val Loss: 1.3072
Epoch 005 | Train Loss: 1.3247 | Val Loss: 1.2886
Epoch 006 | Train Loss: 1.3043 | Val Loss: 1.2655
Epoch 007 | Train Loss: 1.2826 | Val Loss: 1.2418
Epoch 008 | Train Loss: 1.2705 | Val Loss: 1.2130
Epoch 009 | Train Loss: 1.2285 | Val Loss: 1.1776
Epoch 010 | Train Loss: 1.2040 | Val Loss: 1.1348
Epoch 011 | Train Loss: 1.1735 | Val Loss: 1.0923


[I 2025-07-09 13:30:42,598] Trial 9 pruned. 


Epoch 012 | Train Loss: 1.1399 | Val Loss: 1.0423


[I 2025-07-09 13:30:42,898] Trial 10 pruned. 


Epoch 001 | Train Loss: 1.3851 | Val Loss: 1.3673
Epoch 001 | Train Loss: 1.3908 | Val Loss: 1.3855
Epoch 002 | Train Loss: 1.3743 | Val Loss: 1.3680
Epoch 003 | Train Loss: 1.3626 | Val Loss: 1.3472
Epoch 004 | Train Loss: 1.3473 | Val Loss: 1.3339
Epoch 005 | Train Loss: 1.3303 | Val Loss: 1.3115
Epoch 006 | Train Loss: 1.3104 | Val Loss: 1.2807
Epoch 007 | Train Loss: 1.2853 | Val Loss: 1.2376
Epoch 008 | Train Loss: 1.2450 | Val Loss: 1.1813
Epoch 009 | Train Loss: 1.1919 | Val Loss: 1.1007
Epoch 010 | Train Loss: 1.1057 | Val Loss: 0.9830
Epoch 011 | Train Loss: 0.9920 | Val Loss: 0.8021


[I 2025-07-09 13:30:46,554] Trial 11 pruned. 


Epoch 012 | Train Loss: 0.8481 | Val Loss: 0.6074
Epoch 001 | Train Loss: 1.3760 | Val Loss: 1.3603
Epoch 002 | Train Loss: 1.3659 | Val Loss: 1.3506
Epoch 003 | Train Loss: 1.3582 | Val Loss: 1.3396
Epoch 004 | Train Loss: 1.3495 | Val Loss: 1.3301
Epoch 005 | Train Loss: 1.3403 | Val Loss: 1.3154
Epoch 006 | Train Loss: 1.3280 | Val Loss: 1.3012
Epoch 007 | Train Loss: 1.3160 | Val Loss: 1.2843
Epoch 008 | Train Loss: 1.2985 | Val Loss: 1.2646
Epoch 009 | Train Loss: 1.2796 | Val Loss: 1.2376
Epoch 010 | Train Loss: 1.2561 | Val Loss: 1.2051


[I 2025-07-09 13:30:50,234] Trial 12 pruned. 


Epoch 011 | Train Loss: 1.2247 | Val Loss: 1.1669
Epoch 012 | Train Loss: 1.1890 | Val Loss: 1.1118
Epoch 001 | Train Loss: 1.3838 | Val Loss: 1.3581


[I 2025-07-09 13:30:50,807] Trial 13 pruned. 


Epoch 002 | Train Loss: 1.3688 | Val Loss: 1.3496
Epoch 001 | Train Loss: 1.3893 | Val Loss: 1.3752
Epoch 002 | Train Loss: 1.3714 | Val Loss: 1.3484
Epoch 003 | Train Loss: 1.3559 | Val Loss: 1.3259
Epoch 004 | Train Loss: 1.3337 | Val Loss: 1.2991
Epoch 005 | Train Loss: 1.2990 | Val Loss: 1.2449
Epoch 006 | Train Loss: 1.2332 | Val Loss: 1.1020
Epoch 007 | Train Loss: 1.0619 | Val Loss: 0.7717
Epoch 008 | Train Loss: 0.8537 | Val Loss: 0.5367
Epoch 009 | Train Loss: 0.7144 | Val Loss: 0.4762
Epoch 010 | Train Loss: 0.6518 | Val Loss: 0.4298
Epoch 011 | Train Loss: 0.6221 | Val Loss: 0.4057
Epoch 012 | Train Loss: 0.5882 | Val Loss: 0.3781
Epoch 013 | Train Loss: 0.5546 | Val Loss: 0.3692
Epoch 014 | Train Loss: 0.5599 | Val Loss: 0.3747
Epoch 015 | Train Loss: 0.5441 | Val Loss: 0.3577
Epoch 016 | Train Loss: 0.5243 | Val Loss: 0.3331
Epoch 017 | Train Loss: 0.5240 | Val Loss: 0.3355
Epoch 018 | Train Loss: 0.5001 | Val Loss: 0.3227
Epoch 019 | Train Loss: 0.4861 | Val Loss: 0.3299


[I 2025-07-09 13:33:02,796] Trial 14 finished with value: 0.8762886597938144 and parameters: {'hidden_channels': 256, 'num_layers': 3, 'heads': 2, 'dropout': 0.012645607435582129}. Best is trial 14 with value: 0.8762886597938144.


Epoch 500 | Train Loss: 0.3236 | Val Loss: 0.2695


[I 2025-07-09 13:33:03,136] Trial 15 pruned. 


Epoch 001 | Train Loss: 1.3886 | Val Loss: 1.3691
Epoch 001 | Train Loss: 1.3777 | Val Loss: 1.3620
Epoch 002 | Train Loss: 1.3675 | Val Loss: 1.3476
Epoch 003 | Train Loss: 1.3559 | Val Loss: 1.3378
Epoch 004 | Train Loss: 1.3416 | Val Loss: 1.3217
Epoch 005 | Train Loss: 1.3323 | Val Loss: 1.3047
Epoch 006 | Train Loss: 1.3050 | Val Loss: 1.2688
Epoch 007 | Train Loss: 1.2805 | Val Loss: 1.2266
Epoch 008 | Train Loss: 1.2360 | Val Loss: 1.1657
Epoch 009 | Train Loss: 1.1848 | Val Loss: 1.0642
Epoch 010 | Train Loss: 1.0807 | Val Loss: 0.8960
Epoch 011 | Train Loss: 0.9765 | Val Loss: 0.7529


[I 2025-07-09 13:33:07,113] Trial 16 pruned. 


Epoch 012 | Train Loss: 0.8594 | Val Loss: 0.6636
Epoch 001 | Train Loss: 1.3945 | Val Loss: 1.3575
Epoch 002 | Train Loss: 1.3681 | Val Loss: 1.3501
Epoch 003 | Train Loss: 1.3525 | Val Loss: 1.3320
Epoch 004 | Train Loss: 1.3337 | Val Loss: 1.3069
Epoch 005 | Train Loss: 1.3065 | Val Loss: 1.2571
Epoch 006 | Train Loss: 1.2586 | Val Loss: 1.1909
Epoch 007 | Train Loss: 1.1586 | Val Loss: 1.0046
Epoch 008 | Train Loss: 0.9845 | Val Loss: 0.7416
Epoch 009 | Train Loss: 0.7788 | Val Loss: 0.4766
Epoch 010 | Train Loss: 0.6251 | Val Loss: 0.4295
Epoch 011 | Train Loss: 0.6257 | Val Loss: 0.3787
Epoch 012 | Train Loss: 0.5794 | Val Loss: 0.3694
Epoch 013 | Train Loss: 0.5481 | Val Loss: 0.3488
Epoch 014 | Train Loss: 0.5270 | Val Loss: 0.3375
Epoch 015 | Train Loss: 0.5068 | Val Loss: 0.3244
Epoch 016 | Train Loss: 0.5199 | Val Loss: 0.3210
Epoch 017 | Train Loss: 0.4836 | Val Loss: 0.3251
Epoch 018 | Train Loss: 0.4698 | Val Loss: 0.3141
Epoch 019 | Train Loss: 0.4615 | Val Loss: 0.3085


[I 2025-07-09 13:35:17,536] Trial 17 finished with value: 0.8685567010309279 and parameters: {'hidden_channels': 256, 'num_layers': 3, 'heads': 4, 'dropout': 0.004421828845806492}. Best is trial 14 with value: 0.8762886597938144.


Epoch 500 | Train Loss: 0.3151 | Val Loss: 0.2805
Epoch 001 | Train Loss: 1.3868 | Val Loss: 1.3749
Epoch 002 | Train Loss: 1.3747 | Val Loss: 1.3588
Epoch 003 | Train Loss: 1.3587 | Val Loss: 1.3404
Epoch 004 | Train Loss: 1.3351 | Val Loss: 1.3106
Epoch 005 | Train Loss: 1.3149 | Val Loss: 1.2657
Epoch 006 | Train Loss: 1.2339 | Val Loss: 1.1538
Epoch 007 | Train Loss: 1.1584 | Val Loss: 1.0024
Epoch 008 | Train Loss: 1.0416 | Val Loss: 0.8899
Epoch 009 | Train Loss: 1.0055 | Val Loss: 0.7613
Epoch 010 | Train Loss: 0.9567 | Val Loss: 0.7354
Epoch 011 | Train Loss: 0.9272 | Val Loss: 0.6957
Epoch 012 | Train Loss: 0.9255 | Val Loss: 0.6824
Epoch 013 | Train Loss: 0.8882 | Val Loss: 0.6730
Epoch 014 | Train Loss: 0.8992 | Val Loss: 0.6144
Epoch 015 | Train Loss: 0.8560 | Val Loss: 0.6122
Epoch 016 | Train Loss: 0.8828 | Val Loss: 0.6036
Epoch 017 | Train Loss: 0.8864 | Val Loss: 0.6101
Epoch 018 | Train Loss: 0.8675 | Val Loss: 0.6326
Epoch 019 | Train Loss: 0.8198 | Val Loss: 0.5871


[I 2025-07-09 13:37:29,121] Trial 18 finished with value: 0.8685567010309279 and parameters: {'hidden_channels': 256, 'num_layers': 3, 'heads': 1, 'dropout': 0.3296700483130123}. Best is trial 14 with value: 0.8762886597938144.


Epoch 500 | Train Loss: 0.6534 | Val Loss: 0.4209


[I 2025-07-09 13:37:29,420] Trial 19 pruned. 


Epoch 001 | Train Loss: 1.3938 | Val Loss: 1.4053


[I 2025-07-09 13:37:29,728] Trial 20 pruned. 


Epoch 001 | Train Loss: 1.3844 | Val Loss: 1.3650


[I 2025-07-09 13:37:29,970] Trial 21 pruned. 


Epoch 001 | Train Loss: 1.3780 | Val Loss: 1.3711


[I 2025-07-09 13:37:30,210] Trial 22 pruned. 


Epoch 001 | Train Loss: 1.3882 | Val Loss: 1.3611


[I 2025-07-09 13:37:30,436] Trial 23 pruned. 


Epoch 001 | Train Loss: 1.3903 | Val Loss: 1.3695


[I 2025-07-09 13:37:30,669] Trial 24 pruned. 
[I 2025-07-09 13:37:30,835] Trial 25 pruned. 


Epoch 001 | Train Loss: 1.3781 | Val Loss: 1.3519
Epoch 001 | Train Loss: 1.3897 | Val Loss: 1.3663


[I 2025-07-09 13:37:31,116] Trial 26 pruned. 


Epoch 001 | Train Loss: 1.3987 | Val Loss: 1.3778


[I 2025-07-09 13:37:31,385] Trial 27 pruned. 


Epoch 001 | Train Loss: 1.3780 | Val Loss: 1.3647


[I 2025-07-09 13:37:31,721] Trial 28 pruned. 


Epoch 001 | Train Loss: 1.3880 | Val Loss: 1.3676


[I 2025-07-09 13:37:32,001] Trial 29 pruned. 


Epoch 001 | Train Loss: 1.3860 | Val Loss: 1.3702
Best Trial:
  Accuracy: 0.8762886597938144
  Params:
    hidden_channels: 256
    num_layers: 3
    heads: 2
    dropout: 0.012645607435582129


In [27]:
# Save best params to JSON
best_params_path = os.path.join(model_dir, "best_params.json")
with open(best_params_path, "w") as f:
    json.dump(study.best_trial.params, f, indent=4)

# ----------------- FINAL MODEL TRAINING ----------------------

# Build model with best params
best_params = study.best_trial.params

final_model = GAT(
    in_channels=in_channels,
    hidden_channels=best_params['hidden_channels'],
    num_layers=best_params['num_layers'],
    out_channels=num_classes,
    dropout=best_params['dropout'],
    heads=best_params['heads'],
    v2=True,
    edge_dim=1,
    jk='lstm'
).to(device)

# Loss and optimizer setup
all_labels = torch.cat([data.y for data in train_loader.dataset])
class_counts = torch.bincount(all_labels, minlength=num_classes)
class_weights = 1.0 / (class_counts.float() + 1e-6)
class_weights = class_weights / class_weights.sum()
class_weights = class_weights.to(device)

criterion = CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(final_model.parameters(), lr=0.0005, weight_decay=5e-4)

# Train final model
for epoch in range(500):
    final_model.train()
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = final_model(data.x, data.edge_index, edge_weight=data.edge_attr)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()

    print(f"[FINAL TRAIN] Epoch {epoch+1:03d} completed.")

# ----------------- SAVE FULL MODEL ----------------------

full_model_path = os.path.join(model_dir, "GAT_full_model_1.pt")
torch.save(final_model, full_model_path)
print(f"✅ Full model saved to {full_model_path}")

[FINAL TRAIN] Epoch 001 completed.
[FINAL TRAIN] Epoch 002 completed.
[FINAL TRAIN] Epoch 003 completed.
[FINAL TRAIN] Epoch 004 completed.
[FINAL TRAIN] Epoch 005 completed.
[FINAL TRAIN] Epoch 006 completed.
[FINAL TRAIN] Epoch 007 completed.
[FINAL TRAIN] Epoch 008 completed.
[FINAL TRAIN] Epoch 009 completed.
[FINAL TRAIN] Epoch 010 completed.
[FINAL TRAIN] Epoch 011 completed.
[FINAL TRAIN] Epoch 012 completed.
[FINAL TRAIN] Epoch 013 completed.
[FINAL TRAIN] Epoch 014 completed.
[FINAL TRAIN] Epoch 015 completed.
[FINAL TRAIN] Epoch 016 completed.
[FINAL TRAIN] Epoch 017 completed.
[FINAL TRAIN] Epoch 018 completed.
[FINAL TRAIN] Epoch 019 completed.
[FINAL TRAIN] Epoch 020 completed.
[FINAL TRAIN] Epoch 021 completed.
[FINAL TRAIN] Epoch 022 completed.
[FINAL TRAIN] Epoch 023 completed.
[FINAL TRAIN] Epoch 024 completed.
[FINAL TRAIN] Epoch 025 completed.
[FINAL TRAIN] Epoch 026 completed.
[FINAL TRAIN] Epoch 027 completed.
[FINAL TRAIN] Epoch 028 completed.
[FINAL TRAIN] Epoch 

In [16]:
data.x

tensor([[1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [1., 0.,

In [34]:
import torch
import json
from torch_geometric.loader import DataLoader

# Load the saved full model
model = torch.load("C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\GAT-test\\models\\GAT_full_model_1.pt")
model.eval()

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Load label decoder (to map index -> class name)
label_mapping = json.load(open("label_encoding.json"))
index_to_label = {v: k for k, v in label_mapping.items()}  # reverse mapping

# Load the data you want to predict on

data_list = torch.load("datacheckpoint_01 (1).pt", map_location='cuda', weights_only=False)
test_loader = DataLoader(data_list, batch_size=1, shuffle=False)

# Predict on each sample
predictions = []
model.eval()
with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, edge_weight=data.edge_attr)
        pred = out.argmax(dim=1).cpu().numpy()
        labels = [index_to_label[int(p)] for p in pred]
        predictions.append(labels)

# Print or save predictions
for i, label in enumerate(predictions):
    print(f"Sample {i+1}: Predicted class → {label}")

# Optional: Save to CSV
import pandas as pd
df = pd.DataFrame({'Sample': list(range(1, len(predictions)+1)), 'Prediction': predictions})
df.to_csv("results/predictions.csv", index=False)
print("✅ Predictions saved to results/predictions.csv")


Sample 1: Predicted class → ['KEY', 'VALUE', 'KEY', 'NON_RELATED', 'NON_RELATED', 'VALUE', 'KEY', 'VALUE', 'OTHER_KEY', 'KEY', 'VALUE', 'KEY', 'NON_RELATED', 'OTHER_KEY', 'VALUE', 'KEY', 'NON_RELATED', 'OTHER_KEY', 'VALUE']
✅ Predictions saved to results/predictions.csv


  model = torch.load("C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\GAT-test\\models\\GAT_full_model_1.pt")


In [37]:
data_list = torch.load("datacheckpoint_01 (1).pt", map_location='cuda', weights_only=False)
data_list[0]

Data(x=[19, 18], edge_index=[2, 13], edge_attr=[13, 1], y=[19])