In [4]:
import mlflow
import mlflow.pytorch
import json
import torch
import matplotlib.pyplot as plt
import pandas as pd
from torch.nn import CrossEntropyLoss
from torch_geometric.loader import DataLoader
from torch_geometric.nn.models import GAT
import os
from torch_geometric.data import Data
from torch_geometric.data.data import DataEdgeAttr, DataTensorAttr
from torch_geometric.data.storage import GlobalStorage
import torch.serialization
import optuna
from sklearn.model_selection import KFold
import numpy as np

# from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
# import numpy as np




In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [20]:

# Load data 
with torch.serialization.safe_globals([Data]):
    data_list = torch.load("training_data\Datacheckpoint_latest_22", map_location='cuda', weights_only=False)

labels = json.load(open("label_encoding.json"))
batch_size = 1

train_split = int(len(data_list) * 0.8)
train_data = data_list[:train_split]
val_data = data_list[train_split:]

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)

in_channels = data_list[0].x.size(1)
# in_channels =18
num_classes = len(labels)

model_dir = "C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\GAT-test\\models\\model"
results_dir = "C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\GAT-test\\results"
plots_dir = "C:\\Users\\User\\OneDrive\\Desktop\\GAT-model testing\\GAT-test\\plots"
os.makedirs(model_dir, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)
os.makedirs(plots_dir, exist_ok=True)

In [21]:
def objective(trial):
    config = {
        'hidden_channels': trial.suggest_categorical('hidden_channels', [64, 128, 256]),
        'num_layers': trial.suggest_int('num_layers', 1, 3),
        'heads': trial.suggest_categorical('heads', [1, 2, 4, 8]),
        'dropout': trial.suggest_float('dropout', 0.0, 0.5),
        'hidden_dim': trial.suggest_categorical('hidden_dim', [64, 128, 256])
    }

    with mlflow.start_run(run_name=f"trial_{trial.number}", nested=True):
        mlflow.log_params(config)
        mlflow.set_tag("model", "GATv2")
        mlflow.set_tag("cv_strategy", "StratifiedKFold")

        # all_labels = torch.tensor([data.y.item() for data in data_list])
        all_labels = torch.cat([data.y.view(-1) for data in data_list])
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        data_indices = list(range(len(data_list)))

        fold_val_acc = []

        for fold, (train_idx, val_idx) in enumerate(skf.split(data_indices, all_labels)):
            train_data = [data_list[i] for i in train_idx]
            val_data = [data_list[i] for i in val_idx]
            train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(val_data, batch_size=batch_size)

            # ----- Model, loss, optimizer setup -----
            model = GAT(
                in_channels=in_channels,
                hidden_channels=config['hidden_channels'],
                num_layers=config['num_layers'],
                out_channels=num_classes,
                dropout=config['dropout'],
                heads=config['heads'],
                v2=True,
                edge_dim=1,
                jk='lstm'
            ).to(device)

            all_train_labels = torch.cat([data.y for data in train_loader.dataset])
            class_weights = 1.0 / (torch.bincount(all_train_labels, minlength=num_classes).float() + 1e-6)
            class_weights = class_weights / class_weights.sum()
            class_weights = class_weights.to(device)

            criterion = CrossEntropyLoss(weight=class_weights)
            optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=5e-4)
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)

            best_val_acc = 0
            best_val_loss = float('inf')
            wait = 0
            patience = 30
            min_delta = 1e-4

            # Track metrics for this fold
            train_acc_list, val_acc_list = [], []
            train_loss_list, val_loss_list = [], []

            for epoch in range(500):
                model.train()
                total_loss = 0
                correct_train = 0
                total_train = 0
                for data in train_loader:
                    data = data.to(device)
                    optimizer.zero_grad()
                    out = model(data.x, data.edge_index, edge_weight=data.edge_attr)
                    loss = criterion(out, data.y)
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()
                    correct_train += (out.argmax(dim=1) == data.y).sum().item()
                    total_train += data.y.size(0)

                train_acc = correct_train / total_train
                avg_train_loss = total_loss / len(train_loader)
                train_acc_list.append(train_acc)
                train_loss_list.append(avg_train_loss)

                model.eval()
                val_loss = 0
                correct, total = 0, 0
                with torch.no_grad():
                    for data in val_loader:
                        data = data.to(device)
                        out = model(data.x, data.edge_index, edge_weight=data.edge_attr)
                        loss = criterion(out, data.y)
                        val_loss += loss.item()
                        correct += (out.argmax(dim=1) == data.y).sum().item()
                        total += data.y.size(0)

                avg_val_loss = val_loss / len(val_loader)
                val_acc = correct / total
                val_acc_list.append(val_acc)
                val_loss_list.append(avg_val_loss)

                # Print training and validation accuracy per epoch
                print(f"Fold {fold+1} | Epoch {epoch+1:03d} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

                scheduler.step(avg_val_loss)
                trial.report(val_acc, epoch)

                if avg_val_loss < best_val_loss - min_delta:
                    best_val_loss = avg_val_loss
                    best_val_acc = val_acc
                    wait = 0
                else:
                    wait += 1
                    if wait >= patience:
                        print(f" Early stopping at epoch {epoch+1} for fold {fold+1}")
                        break

                if trial.should_prune():
                    raise optuna.exceptions.TrialPruned()

            # Save metrics to CSV for this fold
            fold_run_name = f"Trial{trial.number}_Fold{fold+1}_H{config['hidden_channels']}_L{config['num_layers']}_HD{config['heads']}_DO{int(config['dropout']*10)}"
            fold_csv_path = os.path.join(results_dir, f"{fold_run_name}.csv")
            df = pd.DataFrame({
                'Epoch': list(range(1, len(train_acc_list)+1)),
                'TrainAcc': train_acc_list,
                'ValAcc': val_acc_list,
                'TrainLoss': train_loss_list,
                'ValLoss': val_loss_list
            })
            df.to_csv(fold_csv_path, index=False)

            print(f"✅ Fold {fold+1}: Best Val Acc = {best_val_acc:.4f}")
            fold_val_acc.append(best_val_acc)

        mean_val_acc = np.mean(fold_val_acc)
        mlflow.log_metric("mean_val_acc", mean_val_acc)
        return mean_val_acc


In [22]:
import pandas as pd
import os
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

print("Best Trial:")
# print("  Accuracy:", study.best_trial.value)
print("  Params:")
for k, v in study.best_trial.params.items():
    print(f"    {k}: {v}")

# === No need to retrain: Best model is already saved during Optuna search ===
best_trial = study.best_trial
print("Best model state_dict saved at:", best_trial.user_attrs["best_model_state_path"] if "best_model_state_path" in best_trial.user_attrs else "N/A")
print("Best full model saved at:", best_trial.user_attrs["best_model_full_path"] if "best_model_full_path" in best_trial.user_attrs else "N/A")

# Print best train and validation accuracy from all folds for the best trial
hidden = best_trial.params['hidden_channels']
layers = best_trial.params['num_layers']
heads = best_trial.params['heads']
dropout = int(best_trial.params['dropout']*10)
trial_num = best_trial.number
pattern = f"Trial{trial_num}_Fold*_H{hidden}_L{layers}_HD{heads}_DO{dropout}.csv"
import glob
csv_files = glob.glob(os.path.join("results", pattern))
best_train_acc = None
best_val_acc = None
if csv_files:
    all_train_acc = []
    all_val_acc = []
    for csv_path in csv_files:
        df = pd.read_csv(csv_path)
        if 'TrainAcc' in df:
            all_train_acc.append(df['TrainAcc'].max())
        if 'ValAcc' in df:
            all_val_acc.append(df['ValAcc'].max())
    best_train_acc = max(all_train_acc) if all_train_acc else None
    best_val_acc = max(all_val_acc) if all_val_acc else None
    print(f"Best Training Accuracy (across folds): {best_train_acc:.4f}" if best_train_acc is not None else "Best Training Accuracy: N/A")
    print(f"Best Validation Accuracy (across folds): {best_val_acc:.4f}" if best_val_acc is not None else "Best Validation Accuracy: N/A")
else:
    print(f"Could not find CSV files for best trial with pattern {pattern}")


[I 2025-07-14 16:42:03,795] A new study created in memory with name: no-name-690c0e8f-7cd9-4c46-ab09-5217386ce23e
[W 2025-07-14 16:42:04,069] Trial 0 failed with parameters: {'hidden_channels': 256, 'num_layers': 3, 'heads': 2, 'dropout': 0.24520047567727848, 'hidden_dim': 128} because of the following error: TypeError("can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.").
Traceback (most recent call last):
  File "c:\Users\User\anaconda3\envs\ENVGAT\lib\site-packages\optuna\study\_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\User\AppData\Local\Temp\ipykernel_24368\2898662375.py", line 22, in objective
    for fold, (train_idx, val_idx) in enumerate(skf.split(data_indices, all_labels)):
  File "c:\Users\User\anaconda3\envs\ENVGAT\lib\site-packages\sklearn\model_selection\_split.py", line 881, in split
    y = check_array(y, input_name="y", ensure_2d=False, dtype=None)
  File "c:\Users\U

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.