In [1]:
from sklearn.metrics import (
    accuracy_score, hamming_loss, f1_score, roc_auc_score
)
def evaluate_multilabel_model(y_true, y_pred, y_prob=None, name=None):
    """
    Evaluate multilabel classification performance.
    
    Parameters
    ----------
    y_true : np.ndarray
        Ground-truth binary matrix (n_samples x n_labels)
    y_pred : np.ndarray
        Predicted binary matrix (same shape as y_true)
    y_prob : np.ndarray, optional
        Predicted probabilities (for ROC-AUC if available)
    name : str
        Name of the dataframe evaluated
    """

    metrics = {"Nom :": name}
    metrics["Subset accuracy"] = accuracy_score(y_true, y_pred)
    metrics["Hamming loss"] = hamming_loss(y_true, y_pred)
    metrics["Micro F1"] = f1_score(y_true, y_pred, average="micro")
    metrics["Macro F1"] = f1_score(y_true, y_pred, average="macro")
    metrics["Weighted F1"] = f1_score(y_true, y_pred, average="weighted")
    
    if y_prob is not None:
        try:
            metrics["Micro ROC-AUC"] = roc_auc_score(y_true, y_prob, average="micro")
            metrics["Macro ROC-AUC"] = roc_auc_score(y_true, y_prob, average="macro")
        except ValueError:
            metrics["Micro ROC-AUC"] = np.nan
            metrics["Macro ROC-AUC"] = np.nan

    print("\n📊 Multilabel Evaluation Metrics:")
    for k, v in metrics.items():
        if isinstance(v, float):
            print(f"{k:20s}: {v:.4f}")
        else:
            print(f"{k:20s}: {v}")

    return metrics


In [2]:
import deepchem as dc
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from tqdm import tqdm

# Helper function to combine ECFP and SMILES datasets
def dataset_to_df_with_smiles(ecfp_dataset, raw_dataset, tasks):
    X_list, y_list, ids_list, smiles_list = [], [], [], []
    
    # Iterate through both datasets in parallel
    for (X_batch, y_batch, w_batch, ids_batch), (X_raw, _, _, _) in zip(
        ecfp_dataset.iterbatches(batch_size=128, pad_batches=False),
        raw_dataset.iterbatches(batch_size=128, pad_batches=False)
    ):
        X_list.append(X_batch)
        y_list.append(y_batch)
        ids_list.extend(ids_batch)
        smiles_list.extend(X_raw)  # SMILES strings are in the raw features

    # Stack numerical and label arrays
    X_all = np.vstack(X_list)
    y_all = np.vstack(y_list)

    # Create DataFrames
    df_X = pd.DataFrame(X_all, columns=[f"fp_{i}" for i in range(X_all.shape[1])])
    df_y = pd.DataFrame(y_all, columns=tasks)
    df_y["mol_id"] = ids_list
    df_y["smiles"] = smiles_list

    # Combine all information into a single DataFrame
    df = pd.concat([df_y, df_X], axis=1)
    return df


# Load SIDER dataset with ECFP features (for model input)
tasks, datasets, transformers = dc.molnet.load_sider(featurizer='ECFP', splitter='scaffold')
train_ecfp, valid_ecfp, test_ecfp = datasets

# Load SIDER dataset again with raw SMILES (for visualization / metadata)
_, datasets_raw, _ = dc.molnet.load_sider(featurizer='Raw', splitter='scaffold')
train_raw, valid_raw, test_raw = datasets_raw

# Convert both representations to DataFrames
df_train = dataset_to_df_with_smiles(train_ecfp, train_raw, tasks)
df_valid = dataset_to_df_with_smiles(valid_ecfp, valid_raw, tasks)
df_test  = dataset_to_df_with_smiles(test_ecfp,  test_raw,  tasks)

feature_cols = [col for col in df_train.columns if col.startswith("fp_")]
label_cols = [col for col in df_train.columns if col not in feature_cols + ['mol_id', 'smiles', 'scaffold']]

X_train = df_train[feature_cols].astype(float).values
y_train = df_train[label_cols].astype(float).values

X_valid = df_valid[feature_cols].astype(float).values
y_valid = df_valid[label_cols].astype(float).values

X_test = df_test[feature_cols].astype(float).values
y_test = df_test[label_cols].astype(float).values

pca = PCA().fit(X_train)
cumulative_variance = pca.explained_variance_ratio_.cumsum()

# Number of components to reach 80% variance
n_components_80 = np.argmax(cumulative_variance >= 0.80) + 1

pca = PCA(n_components=n_components_80)
X_train_pca = pca.fit_transform(X_train)
X_valid_pca = pca.transform(X_valid)
X_test_pca = pca.transform(X_test)

No normalization for SPS. Feature removed!
No normalization for AvgIpc. Feature removed!
No normalization for NumAmideBonds. Feature removed!
No normalization for NumAtomStereoCenters. Feature removed!
No normalization for NumBridgeheadAtoms. Feature removed!
No normalization for NumHeterocycles. Feature removed!
No normalization for NumSpiroAtoms. Feature removed!
No normalization for NumUnspecifiedAtomStereoCenters. Feature removed!
No normalization for Phi. Feature removed!
Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow.python'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'torchdata.datapipes'
Skipped loading some Jax models, missing a dependency. No module named 'haiku'
Skipped loading some PyTorch models, missing a dependency. No module named 'tensorflow.python'


In [3]:
# Charger le dataset SIDER avec un featurizer de graphes
tasks, datasets, transformers = dc.molnet.load_sider(
    featurizer=dc.feat.ConvMolFeaturizer(), 
    splitter='scaffold'
)

train_dataset, valid_dataset, test_dataset = datasets

print(f"Nombre de tâches : {len(tasks)}")
print(f"Train samples : {len(train_dataset)}")
print(f"Valid samples : {len(valid_dataset)}")
print(f"Test samples  : {len(test_dataset)}")

Nombre de tâches : 27
Train samples : 1141
Valid samples : 143
Test samples  : 143


In [4]:
# --- Retrieve Ground Truth Labels (y_true) ---
print("🏷️ Retrieving ground truth labels (y_true) from original DataFrames...")
# Check if label_cols exists, otherwise define it (robustness)
if 'label_cols' not in locals():
    print("   'label_cols' not found, defining it now from df_train...")
    label_cols = [col for col in df_train.columns if col not in ['mol_id', 'smiles', 'scaffold'] and not col.startswith("fp_")]
    if not label_cols:
        raise ValueError("Could not define label_cols. Make sure df_train is loaded and has label columns.")

y_train = df_train[label_cols].astype(float).values
y_valid = df_valid[label_cols].astype(float).values
y_test = df_test[label_cols].astype(float).values
print(f"✅ Ground truth label arrays created (y_train shape: {y_train.shape})")

🏷️ Retrieving ground truth labels (y_true) from original DataFrames...
✅ Ground truth label arrays created (y_train shape: (1141, 27))


In [5]:
import warnings
from rdkit import RDLogger
warnings.filterwarnings("ignore", category=DeprecationWarning)

lg = RDLogger.logger()
lg.setLevel(RDLogger.CRITICAL) # This tells RDKit's own logger to only show critical errors

In [6]:
import deepchem as dc
import numpy as np
import pandas as pd
from rdkit import Chem
import torch
from torch_geometric.data import Data, Batch # PyTorch Geometric Data format
from torch.utils.data import Dataset, DataLoader

# --- 1. Load SIDER dataset using MoleculeNetDataset ---
print("🔄 Loading SIDER dataset using MoleculeNetDataset...")
sider_tasks, (train_raw_ds, valid_raw_ds, test_raw_ds), transformers = dc.molnet.load_sider(
    featurizer=dc.feat.DummyFeaturizer(), # Load raw SMILES first
    splitter='scaffold',
    transformers=[] # No initial transformers
)
print("✅ SIDER raw datasets loaded.")

# --- 2. Define Featurizer for PyG (Manual Feature Extraction) ---
# We'll extract atom features and adjacency info manually for PyG's format

def smiles_to_pyg_graph(smiles, y):
    """Converts SMILES string to PyTorch Geometric Data object."""
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None

    # --- Atom Features ---
    atom_features = []
    allowable_features = {
        'possible_atomic_num_list': list(range(1, 119)),
        'possible_formal_charge_list': [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
        'possible_chirality_list': [
            Chem.rdchem.ChiralType.CHI_UNSPECIFIED,
            Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW,
            Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW,
            Chem.rdchem.ChiralType.CHI_OTHER
        ],
        'possible_hybridization_list': [
            Chem.rdchem.HybridizationType.S, Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,
            Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D, Chem.rdchem.HybridizationType.SP3D2,
            Chem.rdchem.HybridizationType.UNSPECIFIED
        ],
        'possible_numH_list': [0, 1, 2, 3, 4, 5, 6, 7, 8],
        'possible_implicit_valence_list': [0, 1, 2, 3, 4, 5, 6],
        'possible_degree_list': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'possible_bonds': [
            Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE,
            Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC
        ],
    }

    for atom in mol.GetAtoms():
        features = []
        features.append(allowable_features['possible_atomic_num_list'].index(atom.GetAtomicNum()))
        features.append(allowable_features['possible_formal_charge_list'].index(atom.GetFormalCharge()))
        features.append(allowable_features['possible_chirality_list'].index(atom.GetChiralTag()))
        features.append(allowable_features['possible_hybridization_list'].index(atom.GetHybridization()))
        features.append(allowable_features['possible_numH_list'].index(atom.GetTotalNumHs()))
        features.append(allowable_features['possible_implicit_valence_list'].index(atom.GetImplicitValence()))
        features.append(allowable_features['possible_degree_list'].index(atom.GetDegree()))
        features.append(atom.GetIsAromatic())
        features.append(atom.IsInRing())
        atom_features.append(features)
    x = torch.tensor(np.array(atom_features), dtype=torch.float)

    # --- Edge Index (Connectivity) ---
    edge_indices = []
    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()
        edge_indices += [[i, j], [j, i]] # Add edges in both directions
    edge_index = torch.tensor(np.array(edge_indices).T, dtype=torch.long)

    # --- Labels ---
    y_tensor = torch.tensor(y, dtype=torch.float).unsqueeze(0) # Shape [1, n_tasks]

    # Create PyG Data object
    data = Data(x=x, edge_index=edge_index, y=y_tensor)
    return data

# --- 3. Create PyTorch Geometric Datasets ---
class SIDERPyGDataset(Dataset):
    def __init__(self, dc_dataset):
        self.smiles = dc_dataset.ids
        self.labels = dc_dataset.y
        self.graphs = []
        for i in range(len(self.smiles)):
            graph = smiles_to_pyg_graph(self.smiles[i], self.labels[i])
            if graph is not None:
                self.graphs.append(graph)

    def __len__(self):
        return len(self.graphs)

    def __getitem__(self, idx):
        return self.graphs[idx]

print("🧬 Creating PyTorch Geometric datasets...")
train_pyg_dataset = SIDERPyGDataset(train_raw_ds)
valid_pyg_dataset = SIDERPyGDataset(valid_raw_ds)
test_pyg_dataset = SIDERPyGDataset(test_raw_ds)
print("✅ PyG Datasets created.")

# --- 4. Create PyTorch DataLoaders ---
# Use PyG's DataLoader for handling batches of graphs
from torch_geometric.loader import DataLoader as PyGDataLoader

batch_size = 64 # Adjust as needed
train_loader = PyGDataLoader(train_pyg_dataset, batch_size=batch_size, shuffle=True)
valid_loader = PyGDataLoader(valid_pyg_dataset, batch_size=batch_size, shuffle=False)
test_loader = PyGDataLoader(test_pyg_dataset, batch_size=batch_size, shuffle=False)
print("✅ PyG DataLoaders created.")


# --- 5. Retrieve y_true again (needed for evaluation) ---
print("🏷️ Retrieving ground truth labels (y_true) for evaluation...")
if 'label_cols' not in locals():
    label_cols = sider_tasks # Use tasks list from loader
y_train_list = [data.y.numpy().flatten() for data in train_pyg_dataset]
y_valid_list = [data.y.numpy().flatten() for data in valid_pyg_dataset]
y_test_list = [data.y.numpy().flatten() for data in test_pyg_dataset]

y_train_true = np.array(y_train_list)
y_valid_true = np.array(y_valid_list)
y_test_true = np.array(y_test_list)
print(f"✅ Ground truth label arrays created (y_train_true shape: {y_train_true.shape})")

🔄 Loading SIDER dataset using MoleculeNetDataset...
✅ SIDER raw datasets loaded.
🧬 Creating PyTorch Geometric datasets...
✅ PyG Datasets created.
✅ PyG DataLoaders created.
🏷️ Retrieving ground truth labels (y_true) for evaluation...
✅ Ground truth label arrays created (y_train_true shape: (1141, 27))


In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.nn import GCNConv, global_mean_pool # Example layers

# --- 6. Define the PyTorch Geometric GNN ---
class SiderGNN_PyG(nn.Module):
    def __init__(self, n_tasks, n_node_features, hidden_channels=128, dense_dim=256, dropout=0.3, num_gc_layers=None): # Increased dimensions
        super(SiderGNN_PyG, self).__init__()
        self.dropout_p = dropout
        self.n_tasks = n_tasks

        # GCN Layers (Wider)
        self.conv1 = GCNConv(n_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        # self.conv3 = GCNConv(hidden_channels, hidden_channels) # Option: Add a third layer if needed

        # Dense Layers after pooling (Wider)
        self.dense1 = nn.Linear(hidden_channels, dense_dim) # Input from pooling
        self.dropout = nn.Dropout(p=self.dropout_p) # Increased dropout slightly
        self.output_layer = nn.Linear(dense_dim, n_tasks)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # Graph Convolutions
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        # x = self.conv3(x, edge_index) # Option: Add third layer
        # x = F.relu(x)

        # Global Pooling
        x_pooled = global_mean_pool(x, batch)

        # Dense layers
        x = self.dense1(x_pooled)
        x = F.relu(x)
        if self.dropout_p > 0:
            x = self.dropout(x)

        # Output logits
        logits = self.output_layer(x)
        return logits

# --- 7. Instantiate Model, Loss, Optimizer ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

n_node_features = train_pyg_dataset[0].num_node_features # Get feature dim from first sample
n_tasks = len(sider_tasks)

model_pyg = SiderGNN_PyG(n_tasks=n_tasks, n_node_features=n_node_features).to(device)
optimizer = optim.Adam(model_pyg.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss() # Multi-label binary classification loss

print("✅ PyTorch Geometric GNN model instantiated.")

# --- 8. Training Loop ---
n_epochs = 200 # Adjust as needed

print(f"🚀 Training PyTorch Geometric GNN for {n_epochs} epochs...")

def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()
        logits = model(data)
        loss = criterion(logits, data.y) # PyG Data objects store 'y' directly
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(loader.dataset)

def eval_model(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_logits = []
    all_labels = []
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            logits = model(data)
            loss = criterion(logits, data.y)
            total_loss += loss.item() * data.num_graphs
            all_logits.append(logits.cpu().numpy())
            all_labels.append(data.y.cpu().numpy()) # Store true labels too

    avg_loss = total_loss / len(loader.dataset)
    logits_array = np.concatenate(all_logits, axis=0)
    labels_array = np.concatenate(all_labels, axis=0) # Shape might be [N, 1, n_tasks], need reshape if so

    # Reshape labels if necessary (if they are [N, 1, n_tasks])
    if labels_array.ndim == 3 and labels_array.shape[1] == 1:
        labels_array = labels_array.squeeze(1)

    return avg_loss, logits_array, labels_array


for epoch in range(1, n_epochs + 1):
    train_loss = train_epoch(model_pyg, train_loader, criterion, optimizer, device)
    valid_loss, valid_logits, _ = eval_model(model_pyg, valid_loader, criterion, device) # Ignoring labels from eval here
    print(f"Epoch {epoch:02d}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}")

print("✅ Training complete.")

# --- 9. Final Evaluation ---
print("⚙️ Generating final predictions...")
_, train_logits_final, _ = eval_model(model_pyg, train_loader, criterion, device)
_, valid_logits_final, _ = eval_model(model_pyg, valid_loader, criterion, device)
_, test_logits_final, test_labels_final = eval_model(model_pyg, test_loader, criterion, device) # Get test labels here

# Use y_true arrays extracted earlier for consistency with evaluation function
# assert np.array_equal(test_labels_final, y_test_true) # Optional check

# Convert final logits to probabilities and predictions
sigmoid = lambda x: 1 / (1 + np.exp(-x))
y_train_prob_pyg = sigmoid(train_logits_final)
y_valid_prob_pyg = sigmoid(valid_logits_final)
y_test_prob_pyg = sigmoid(test_logits_final)

y_train_pred_pyg = (y_train_prob_pyg >= 0.5).astype(int)
y_valid_pred_pyg = (y_valid_prob_pyg >= 0.5).astype(int)
y_test_pred_pyg = (y_test_prob_pyg >= 0.5).astype(int)

print("\n=== PyTorch Geometric GNN Evaluation ===")
metrics_train_pyg = evaluate_multilabel_model(y_train_true, y_train_pred_pyg, y_train_prob_pyg, "Train (GNN PyG)")
metrics_valid_pyg = evaluate_multilabel_model(y_valid_true, y_valid_pred_pyg, y_valid_prob_pyg, "Validation (GNN PyG)")
metrics_test_pyg = evaluate_multilabel_model(y_test_true, y_test_pred_pyg, y_test_prob_pyg, "Test (GNN PyG)")

Using device: cpu
✅ PyTorch Geometric GNN model instantiated.
🚀 Training PyTorch Geometric GNN for 200 epochs...
Epoch 01, Train Loss: 0.5872, Valid Loss: 0.4824
Epoch 02, Train Loss: 0.5352, Valid Loss: 0.4835
Epoch 03, Train Loss: 0.5259, Valid Loss: 0.4792
Epoch 04, Train Loss: 0.5236, Valid Loss: 0.4798
Epoch 05, Train Loss: 0.5206, Valid Loss: 0.4862
Epoch 06, Train Loss: 0.5231, Valid Loss: 0.4832
Epoch 07, Train Loss: 0.5221, Valid Loss: 0.4768
Epoch 08, Train Loss: 0.5201, Valid Loss: 0.4801
Epoch 09, Train Loss: 0.5198, Valid Loss: 0.4817
Epoch 10, Train Loss: 0.5182, Valid Loss: 0.4769
Epoch 11, Train Loss: 0.5183, Valid Loss: 0.4789
Epoch 12, Train Loss: 0.5193, Valid Loss: 0.4869
Epoch 13, Train Loss: 0.5187, Valid Loss: 0.4787
Epoch 14, Train Loss: 0.5180, Valid Loss: 0.4815
Epoch 15, Train Loss: 0.5160, Valid Loss: 0.4806
Epoch 16, Train Loss: 0.5158, Valid Loss: 0.4861
Epoch 17, Train Loss: 0.5166, Valid Loss: 0.4884
Epoch 18, Train Loss: 0.5156, Valid Loss: 0.4750
Epoch

In [18]:
import optuna
import gc

# --- Define the Objective Function for Optuna ---
def objective(trial):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    n_node_features = train_pyg_dataset[0].num_node_features
    n_tasks = len(sider_tasks)

    # --- Hyperparameters to Tune ---
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    hidden_channels = trial.suggest_categorical('hidden_channels', [64, 128, 256])
    dense_dim = trial.suggest_categorical('dense_dim', [128, 256, 512])
    dropout = trial.suggest_float('dropout', 0.1, 0.5)
    num_gc_layers = trial.suggest_int('num_gc_layers', 1, 3) # Test 1, 2, or 3 GCN layers
    n_epochs_objective = 100 # Fixed number of epochs for optimization trial (adjust if needed)

    # --- Instantiate Model, Loss, Optimizer ---
    model = SiderGNN_PyG(
        n_tasks=n_tasks,
        n_node_features=n_node_features,
        hidden_channels=hidden_channels,
        dense_dim=dense_dim,
        dropout=dropout,
        num_gc_layers=num_gc_layers
    ).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCEWithLogitsLoss()

    # --- Training Loop (with potential simple early stopping based on valid loss) ---
    best_valid_loss = float('inf')
    patience_counter = 0
    patience = 25 # Stop after 25 epochs with no improvement in validation loss

    for epoch in range(1, n_epochs_objective + 1):
        train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
        valid_loss, valid_logits, _ = eval_model(model, valid_loader, criterion, device)

        # Simple Early Stopping Check
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            patience_counter = 0
        else:
            patience_counter += 1

        # Report intermediate results to Optuna (optional, helps pruning)
        trial.report(valid_loss, epoch)

        # Handle pruning based on intermediate value
        if trial.should_prune():
            print(f"Trial {trial.number} pruned at epoch {epoch}.")
            # Clean up memory before raising TrialPruned
            del model, optimizer, criterion
            gc.collect()
            if device == 'cuda': torch.cuda.empty_cache()
            raise optuna.exceptions.TrialPruned()

        if patience_counter >= patience:
            print(f"Early stopping triggered at epoch {epoch} for trial {trial.number}.")
            break

        # Print progress less frequently
        if epoch % 10 == 0:
            print(f"Trial {trial.number}, Epoch {epoch}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}")


    # --- Evaluate Final Performance on Validation Set for this trial ---
    # We want to maximize Macro F1 (or Macro AUC)
    _, final_valid_logits, final_valid_labels = eval_model(model, valid_loader, criterion, device)
    sigmoid = lambda x: 1 / (1 + np.exp(-x))
    final_valid_probs = sigmoid(final_valid_logits)
    final_valid_preds = (final_valid_probs >= 0.5).astype(int)

    # Reshape labels if necessary (e.g., if [N, 1, n_tasks])
    if final_valid_labels.ndim == 3 and final_valid_labels.shape[1] == 1:
        final_valid_labels = final_valid_labels.squeeze(1)

    # Calculate the metric to optimize (e.g., Macro F1)
    macro_f1_valid = f1_score(final_valid_labels, final_valid_preds, average="macro", zero_division=0)
    # macro_auc_valid = roc_auc_score(final_valid_labels, final_valid_probs, average="macro") # Alternative

    # Clean up memory
    del model, optimizer, criterion, final_valid_logits, final_valid_probs, final_valid_preds
    gc.collect()
    if device == 'cuda': torch.cuda.empty_cache()

    return macro_f1_valid # Optuna tries to maximize this value

In [19]:
# --- Create and Run the Optuna Study ---
study_name = "sider-gnn-pyg-optimization"
storage_name = f"sqlite:///{study_name}.db" # Saves results to a file

# We aim to maximize Macro F1
study = optuna.create_study(
    study_name=study_name,
    storage=storage_name, # Use SQLite storage to resume if interrupted
    load_if_exists=True,   # Load previous results if the db file exists
    direction="maximize"
)

n_trials = 50 # Number of hyperparameter sets to test (adjust based on time/resources)

print(f"🚀 Starting Optuna Bayesian Optimization for {n_trials} trials...")
study.optimize(objective, n_trials=n_trials)

# --- Output Best Results ---
print("\n✅ Optimization Finished!")
print(f"Number of finished trials: {len(study.trials)}")
print("Best trial:")
trial = study.best_trial

print(f"  Value (Max Macro F1 on Validation): {trial.value:.4f}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2025-10-23 16:31:55,754] Using an existing study with name 'sider-gnn-pyg-optimization' instead of creating a new one.


🚀 Starting Optuna Bayesian Optimization for 50 trials...
Trial 3, Epoch 10, Train Loss: 0.5202, Valid Loss: 0.4764
Trial 3, Epoch 20, Train Loss: 0.5152, Valid Loss: 0.4790
Trial 3, Epoch 30, Train Loss: 0.5142, Valid Loss: 0.4777
Trial 3, Epoch 40, Train Loss: 0.5095, Valid Loss: 0.4773
Trial 3, Epoch 50, Train Loss: 0.5077, Valid Loss: 0.4764
Trial 3, Epoch 60, Train Loss: 0.5044, Valid Loss: 0.4731
Trial 3, Epoch 70, Train Loss: 0.5039, Valid Loss: 0.4746
Early stopping triggered at epoch 80 for trial 3.


[I 2025-10-23 16:32:22,474] Trial 3 finished with value: 0.6331369495601278 and parameters: {'lr': 0.0007577676387152359, 'hidden_channels': 64, 'dense_dim': 256, 'dropout': 0.26115381334077103, 'num_gc_layers': 2}. Best is trial 3 with value: 0.6331369495601278.


Trial 4, Epoch 10, Train Loss: 0.5330, Valid Loss: 0.4827
Trial 4, Epoch 20, Train Loss: 0.5278, Valid Loss: 0.4795
Trial 4, Epoch 30, Train Loss: 0.5217, Valid Loss: 0.4779
Trial 4, Epoch 40, Train Loss: 0.5214, Valid Loss: 0.4780
Trial 4, Epoch 50, Train Loss: 0.5195, Valid Loss: 0.4772
Trial 4, Epoch 60, Train Loss: 0.5171, Valid Loss: 0.4786
Trial 4, Epoch 70, Train Loss: 0.5164, Valid Loss: 0.4782
Trial 4, Epoch 80, Train Loss: 0.5147, Valid Loss: 0.4761
Trial 4, Epoch 90, Train Loss: 0.5145, Valid Loss: 0.4764
Trial 4, Epoch 100, Train Loss: 0.5137, Valid Loss: 0.4755


[I 2025-10-23 16:34:09,956] Trial 4 finished with value: 0.636597281845318 and parameters: {'lr': 0.00018166245646355152, 'hidden_channels': 256, 'dense_dim': 128, 'dropout': 0.4096553087655359, 'num_gc_layers': 1}. Best is trial 4 with value: 0.636597281845318.


Trial 5, Epoch 10, Train Loss: 0.5217, Valid Loss: 0.4781
Trial 5, Epoch 20, Train Loss: 0.5149, Valid Loss: 0.4774
Trial 5, Epoch 30, Train Loss: 0.5123, Valid Loss: 0.4747
Trial 5, Epoch 40, Train Loss: 0.5101, Valid Loss: 0.4766
Trial 5, Epoch 50, Train Loss: 0.5084, Valid Loss: 0.4755
Trial 5, Epoch 60, Train Loss: 0.5064, Valid Loss: 0.4740
Trial 5, Epoch 70, Train Loss: 0.5028, Valid Loss: 0.4732
Trial 5, Epoch 80, Train Loss: 0.4997, Valid Loss: 0.4782
Trial 5, Epoch 90, Train Loss: 0.4999, Valid Loss: 0.4719
Trial 5, Epoch 100, Train Loss: 0.4981, Valid Loss: 0.4779


[I 2025-10-23 16:34:51,908] Trial 5 finished with value: 0.6280169343766983 and parameters: {'lr': 0.0004316637250276161, 'hidden_channels': 64, 'dense_dim': 512, 'dropout': 0.383652696906138, 'num_gc_layers': 3}. Best is trial 4 with value: 0.636597281845318.


Trial 6, Epoch 10, Train Loss: 0.5206, Valid Loss: 0.4762
Trial 6, Epoch 20, Train Loss: 0.5152, Valid Loss: 0.4789
Trial 6, Epoch 30, Train Loss: 0.5095, Valid Loss: 0.4754
Trial 6, Epoch 40, Train Loss: 0.5041, Valid Loss: 0.4738
Trial 6, Epoch 50, Train Loss: 0.4984, Valid Loss: 0.4742
Trial 6, Epoch 60, Train Loss: 0.4979, Valid Loss: 0.4717
Trial 6, Epoch 70, Train Loss: 0.4924, Valid Loss: 0.4735
Trial 6, Epoch 80, Train Loss: 0.4892, Valid Loss: 0.4741
Early stopping triggered at epoch 86 for trial 6.


[I 2025-10-23 16:35:44,061] Trial 6 finished with value: 0.6355717502183398 and parameters: {'lr': 0.0018404959877121624, 'hidden_channels': 128, 'dense_dim': 256, 'dropout': 0.45833289009811184, 'num_gc_layers': 2}. Best is trial 4 with value: 0.636597281845318.


Trial 7, Epoch 10, Train Loss: 0.5213, Valid Loss: 0.4799
Trial 7, Epoch 20, Train Loss: 0.5174, Valid Loss: 0.4752
Trial 7, Epoch 30, Train Loss: 0.5145, Valid Loss: 0.4787
Trial 7, Epoch 40, Train Loss: 0.5143, Valid Loss: 0.4774
Early stopping triggered at epoch 50 for trial 7.


[I 2025-10-23 16:36:15,325] Trial 7 finished with value: 0.6296678125568748 and parameters: {'lr': 0.0002291594758135411, 'hidden_channels': 128, 'dense_dim': 512, 'dropout': 0.2965010570142653, 'num_gc_layers': 2}. Best is trial 4 with value: 0.636597281845318.


Trial 8 pruned at epoch 1.


[I 2025-10-23 16:36:16,496] Trial 8 pruned. 


Trial 9 pruned at epoch 1.


[I 2025-10-23 16:36:17,630] Trial 9 pruned. 


Trial 10 pruned at epoch 1.


[I 2025-10-23 16:36:18,536] Trial 10 pruned. 


Trial 11, Epoch 10, Train Loss: 0.5319, Valid Loss: 0.4867
Trial 11, Epoch 20, Train Loss: 0.5260, Valid Loss: 0.4816
Trial 11, Epoch 30, Train Loss: 0.5222, Valid Loss: 0.4797
Trial 11, Epoch 40, Train Loss: 0.5202, Valid Loss: 0.4784
Trial 11, Epoch 50, Train Loss: 0.5185, Valid Loss: 0.4776
Trial 11, Epoch 60, Train Loss: 0.5167, Valid Loss: 0.4778
Trial 11, Epoch 70, Train Loss: 0.5155, Valid Loss: 0.4771
Trial 11, Epoch 80, Train Loss: 0.5143, Valid Loss: 0.4766
Trial 11, Epoch 90, Train Loss: 0.5130, Valid Loss: 0.4773
Trial 11, Epoch 100, Train Loss: 0.5133, Valid Loss: 0.4762


[I 2025-10-23 16:36:57,238] Trial 11 finished with value: 0.6345147998205051 and parameters: {'lr': 0.00013194461230656444, 'hidden_channels': 64, 'dense_dim': 256, 'dropout': 0.3852186691853984, 'num_gc_layers': 1}. Best is trial 4 with value: 0.636597281845318.


Trial 12 pruned at epoch 1.


[I 2025-10-23 16:36:58,151] Trial 12 pruned. 


Trial 13 pruned at epoch 1.


[I 2025-10-23 16:36:59,683] Trial 13 pruned. 


Trial 14 pruned at epoch 1.


[I 2025-10-23 16:37:01,212] Trial 14 pruned. 


Trial 15, Epoch 10, Train Loss: 0.5342, Valid Loss: 0.4851
Trial 15, Epoch 20, Train Loss: 0.5276, Valid Loss: 0.4806
Trial 15, Epoch 30, Train Loss: 0.5236, Valid Loss: 0.4791
Trial 15, Epoch 40, Train Loss: 0.5230, Valid Loss: 0.4788
Trial 15, Epoch 50, Train Loss: 0.5207, Valid Loss: 0.4778
Trial 15, Epoch 60, Train Loss: 0.5187, Valid Loss: 0.4771
Trial 15, Epoch 70, Train Loss: 0.5178, Valid Loss: 0.4775
Trial 15, Epoch 80, Train Loss: 0.5171, Valid Loss: 0.4766
Trial 15, Epoch 90, Train Loss: 0.5152, Valid Loss: 0.4765
Early stopping triggered at epoch 91 for trial 15.


[I 2025-10-23 16:38:41,880] Trial 15 finished with value: 0.6346449911229528 and parameters: {'lr': 0.00010649438499695516, 'hidden_channels': 256, 'dense_dim': 256, 'dropout': 0.4407403167482229, 'num_gc_layers': 2}. Best is trial 4 with value: 0.636597281845318.


Trial 16 pruned at epoch 1.


[I 2025-10-23 16:38:42,903] Trial 16 pruned. 


Trial 17 pruned at epoch 1.


[I 2025-10-23 16:38:44,450] Trial 17 pruned. 


Trial 18 pruned at epoch 1.


[I 2025-10-23 16:38:45,553] Trial 18 pruned. 


Trial 19 pruned at epoch 1.


[I 2025-10-23 16:38:47,183] Trial 19 pruned. 


Trial 20, Epoch 10, Train Loss: 0.5306, Valid Loss: 0.4820
Trial 20, Epoch 20, Train Loss: 0.5241, Valid Loss: 0.4797
Trial 20, Epoch 30, Train Loss: 0.5208, Valid Loss: 0.4754
Trial 20, Epoch 40, Train Loss: 0.5180, Valid Loss: 0.4764
Trial 20, Epoch 50, Train Loss: 0.5161, Valid Loss: 0.4762
Early stopping triggered at epoch 55 for trial 20.


[I 2025-10-23 16:39:24,803] Trial 20 finished with value: 0.6330486707138062 and parameters: {'lr': 0.00020642043006316308, 'hidden_channels': 128, 'dense_dim': 256, 'dropout': 0.49783456492607947, 'num_gc_layers': 2}. Best is trial 4 with value: 0.636597281845318.


Trial 21 pruned at epoch 1.


[I 2025-10-23 16:39:27,199] Trial 21 pruned. 


Trial 22 pruned at epoch 1.


[I 2025-10-23 16:39:28,668] Trial 22 pruned. 


Trial 23 pruned at epoch 1.


[I 2025-10-23 16:39:30,588] Trial 23 pruned. 


Trial 24, Epoch 10, Train Loss: 0.5315, Valid Loss: 0.4831
Trial 24, Epoch 20, Train Loss: 0.5259, Valid Loss: 0.4804
Trial 24, Epoch 30, Train Loss: 0.5228, Valid Loss: 0.4801
Trial 24, Epoch 40, Train Loss: 0.5195, Valid Loss: 0.4774
Trial 24, Epoch 50, Train Loss: 0.5177, Valid Loss: 0.4772
Trial 24, Epoch 60, Train Loss: 0.5175, Valid Loss: 0.4803
Trial 24, Epoch 70, Train Loss: 0.5167, Valid Loss: 0.4801
Trial 24, Epoch 80, Train Loss: 0.5140, Valid Loss: 0.4792
Early stopping triggered at epoch 84 for trial 24.


[I 2025-10-23 16:41:04,715] Trial 24 finished with value: 0.633970144183028 and parameters: {'lr': 0.0001237071389803396, 'hidden_channels': 256, 'dense_dim': 256, 'dropout': 0.44217072142545116, 'num_gc_layers': 2}. Best is trial 4 with value: 0.636597281845318.


Trial 25 pruned at epoch 1.


[I 2025-10-23 16:41:06,219] Trial 25 pruned. 


Trial 26, Epoch 10, Train Loss: 0.5350, Valid Loss: 0.4856
Trial 26, Epoch 20, Train Loss: 0.5287, Valid Loss: 0.4818
Trial 26, Epoch 30, Train Loss: 0.5237, Valid Loss: 0.4789
Trial 26, Epoch 40, Train Loss: 0.5232, Valid Loss: 0.4792
Trial 26, Epoch 50, Train Loss: 0.5199, Valid Loss: 0.4775
Trial 26, Epoch 60, Train Loss: 0.5183, Valid Loss: 0.4764
Trial 26, Epoch 70, Train Loss: 0.5173, Valid Loss: 0.4790
Trial 26, Epoch 80, Train Loss: 0.5154, Valid Loss: 0.4776
Early stopping triggered at epoch 88 for trial 26.


[I 2025-10-23 16:42:43,439] Trial 26 finished with value: 0.6321091914354153 and parameters: {'lr': 0.00010182170609672667, 'hidden_channels': 256, 'dense_dim': 256, 'dropout': 0.40524717527736404, 'num_gc_layers': 3}. Best is trial 4 with value: 0.636597281845318.


Trial 27 pruned at epoch 1.


[I 2025-10-23 16:42:44,918] Trial 27 pruned. 


Trial 28, Epoch 10, Train Loss: 0.5287, Valid Loss: 0.4821
Trial 28, Epoch 20, Train Loss: 0.5232, Valid Loss: 0.4804
Trial 28, Epoch 30, Train Loss: 0.5220, Valid Loss: 0.4786
Trial 28, Epoch 40, Train Loss: 0.5186, Valid Loss: 0.4780
Trial 28, Epoch 50, Train Loss: 0.5183, Valid Loss: 0.4780
Trial 28, Epoch 60, Train Loss: 0.5158, Valid Loss: 0.4770
Early stopping triggered at epoch 68 for trial 28.


[I 2025-10-23 16:43:24,863] Trial 28 finished with value: 0.6309284484849627 and parameters: {'lr': 0.00015382625703583337, 'hidden_channels': 128, 'dense_dim': 256, 'dropout': 0.3622031044172216, 'num_gc_layers': 2}. Best is trial 4 with value: 0.636597281845318.


Trial 29, Epoch 10, Train Loss: 0.5373, Valid Loss: 0.4868
Trial 29, Epoch 20, Train Loss: 0.5282, Valid Loss: 0.4806
Trial 29, Epoch 30, Train Loss: 0.5251, Valid Loss: 0.4797
Trial 29, Epoch 40, Train Loss: 0.5221, Valid Loss: 0.4797
Trial 29, Epoch 50, Train Loss: 0.5214, Valid Loss: 0.4777
Trial 29, Epoch 60, Train Loss: 0.5200, Valid Loss: 0.4782
Trial 29, Epoch 70, Train Loss: 0.5174, Valid Loss: 0.4783
Trial 29, Epoch 80, Train Loss: 0.5177, Valid Loss: 0.4782
Trial 29, Epoch 90, Train Loss: 0.5167, Valid Loss: 0.4785
Trial 29, Epoch 100, Train Loss: 0.5153, Valid Loss: 0.4780


[I 2025-10-23 16:45:12,688] Trial 29 finished with value: 0.6344205596010943 and parameters: {'lr': 0.00010131123897784368, 'hidden_channels': 256, 'dense_dim': 256, 'dropout': 0.47151170065780657, 'num_gc_layers': 3}. Best is trial 4 with value: 0.636597281845318.


Trial 30 pruned at epoch 1.


[I 2025-10-23 16:45:13,806] Trial 30 pruned. 


Trial 31 pruned at epoch 1.


[I 2025-10-23 16:45:15,398] Trial 31 pruned. 


Trial 32 pruned at epoch 1.


[I 2025-10-23 16:45:16,399] Trial 32 pruned. 


Trial 33 pruned at epoch 1.


[I 2025-10-23 16:45:17,874] Trial 33 pruned. 


Trial 34, Epoch 10, Train Loss: 0.5341, Valid Loss: 0.4860
Trial 34, Epoch 20, Train Loss: 0.5266, Valid Loss: 0.4808
Trial 34, Epoch 30, Train Loss: 0.5231, Valid Loss: 0.4794
Trial 34, Epoch 40, Train Loss: 0.5199, Valid Loss: 0.4785
Trial 34, Epoch 50, Train Loss: 0.5194, Valid Loss: 0.4780
Trial 34, Epoch 60, Train Loss: 0.5174, Valid Loss: 0.4766
Trial 34, Epoch 70, Train Loss: 0.5171, Valid Loss: 0.4764
Trial 34, Epoch 80, Train Loss: 0.5154, Valid Loss: 0.4765
Trial 34, Epoch 90, Train Loss: 0.5154, Valid Loss: 0.4774
Trial 34, Epoch 100, Train Loss: 0.5134, Valid Loss: 0.4767


[I 2025-10-23 16:45:56,641] Trial 34 finished with value: 0.6338332615749807 and parameters: {'lr': 0.00013224539007653495, 'hidden_channels': 64, 'dense_dim': 256, 'dropout': 0.4115247796822336, 'num_gc_layers': 1}. Best is trial 4 with value: 0.636597281845318.


Trial 35, Epoch 10, Train Loss: 0.5345, Valid Loss: 0.4869
Trial 35, Epoch 20, Train Loss: 0.5256, Valid Loss: 0.4809
Trial 35, Epoch 30, Train Loss: 0.5246, Valid Loss: 0.4805
Trial 35, Epoch 40, Train Loss: 0.5226, Valid Loss: 0.4805
Trial 35, Epoch 50, Train Loss: 0.5206, Valid Loss: 0.4789
Trial 35, Epoch 60, Train Loss: 0.5184, Valid Loss: 0.4787
Trial 35, Epoch 70, Train Loss: 0.5177, Valid Loss: 0.4773
Trial 35, Epoch 80, Train Loss: 0.5168, Valid Loss: 0.4780
Trial 35, Epoch 90, Train Loss: 0.5166, Valid Loss: 0.4778
Trial 35, Epoch 100, Train Loss: 0.5148, Valid Loss: 0.4778


[I 2025-10-23 16:46:35,758] Trial 35 finished with value: 0.6329771749657422 and parameters: {'lr': 0.0001431085505114432, 'hidden_channels': 64, 'dense_dim': 256, 'dropout': 0.3880748219696136, 'num_gc_layers': 1}. Best is trial 4 with value: 0.636597281845318.


Trial 36 pruned at epoch 1.


[I 2025-10-23 16:46:36,646] Trial 36 pruned. 


Trial 37 pruned at epoch 1.


[I 2025-10-23 16:46:37,526] Trial 37 pruned. 


Trial 38, Epoch 10, Train Loss: 0.5471, Valid Loss: 0.4955
Trial 38, Epoch 20, Train Loss: 0.5299, Valid Loss: 0.4845
Trial 38, Epoch 30, Train Loss: 0.5266, Valid Loss: 0.4814
Trial 38, Epoch 40, Train Loss: 0.5257, Valid Loss: 0.4797
Trial 38, Epoch 50, Train Loss: 0.5232, Valid Loss: 0.4793
Trial 38, Epoch 60, Train Loss: 0.5220, Valid Loss: 0.4800
Trial 38, Epoch 70, Train Loss: 0.5205, Valid Loss: 0.4784
Trial 38, Epoch 80, Train Loss: 0.5198, Valid Loss: 0.4792
Trial 38, Epoch 90, Train Loss: 0.5205, Valid Loss: 0.4782
Trial 38, Epoch 100, Train Loss: 0.5172, Valid Loss: 0.4776


[I 2025-10-23 16:47:17,356] Trial 38 finished with value: 0.6332512478174548 and parameters: {'lr': 0.00010238489617897034, 'hidden_channels': 64, 'dense_dim': 256, 'dropout': 0.46217805206942736, 'num_gc_layers': 1}. Best is trial 4 with value: 0.636597281845318.


Trial 39 pruned at epoch 1.


[I 2025-10-23 16:47:18,278] Trial 39 pruned. 


Trial 40 pruned at epoch 1.


[I 2025-10-23 16:47:19,320] Trial 40 pruned. 


Trial 41 pruned at epoch 1.


[I 2025-10-23 16:47:20,207] Trial 41 pruned. 


Trial 42, Epoch 10, Train Loss: 0.5305, Valid Loss: 0.4814
Trial 42, Epoch 20, Train Loss: 0.5225, Valid Loss: 0.4785
Trial 42, Epoch 30, Train Loss: 0.5205, Valid Loss: 0.4791
Trial 42, Epoch 40, Train Loss: 0.5198, Valid Loss: 0.4783
Trial 42, Epoch 50, Train Loss: 0.5160, Valid Loss: 0.4773
Trial 42, Epoch 60, Train Loss: 0.5148, Valid Loss: 0.4779
Early stopping triggered at epoch 70 for trial 42.


[I 2025-10-23 16:48:36,832] Trial 42 finished with value: 0.6347263974891398 and parameters: {'lr': 0.00016308986897753982, 'hidden_channels': 256, 'dense_dim': 256, 'dropout': 0.3952458632835102, 'num_gc_layers': 1}. Best is trial 4 with value: 0.636597281845318.


Trial 43 pruned at epoch 1.


[I 2025-10-23 16:48:38,446] Trial 43 pruned. 


Trial 44 pruned at epoch 1.


[I 2025-10-23 16:48:39,963] Trial 44 pruned. 


Trial 45, Epoch 10, Train Loss: 0.5320, Valid Loss: 0.4833
Trial 45, Epoch 20, Train Loss: 0.5264, Valid Loss: 0.4795
Trial 45, Epoch 30, Train Loss: 0.5222, Valid Loss: 0.4782
Trial 45, Epoch 40, Train Loss: 0.5215, Valid Loss: 0.4784
Trial 45, Epoch 50, Train Loss: 0.5178, Valid Loss: 0.4787
Trial 45, Epoch 60, Train Loss: 0.5181, Valid Loss: 0.4769
Trial 45, Epoch 70, Train Loss: 0.5169, Valid Loss: 0.4782
Trial 45, Epoch 80, Train Loss: 0.5149, Valid Loss: 0.4782
Trial 45, Epoch 90, Train Loss: 0.5147, Valid Loss: 0.4773
Trial 45, Epoch 100, Train Loss: 0.5118, Valid Loss: 0.4775


[I 2025-10-23 16:50:30,146] Trial 45 finished with value: 0.6336959059009467 and parameters: {'lr': 0.00012143151316247146, 'hidden_channels': 256, 'dense_dim': 256, 'dropout': 0.4271227977257237, 'num_gc_layers': 1}. Best is trial 4 with value: 0.636597281845318.


Trial 46 pruned at epoch 1.


[I 2025-10-23 16:50:31,670] Trial 46 pruned. 


Trial 47 pruned at epoch 1.


[I 2025-10-23 16:50:33,174] Trial 47 pruned. 


Trial 48 pruned at epoch 1.


[I 2025-10-23 16:50:34,221] Trial 48 pruned. 


Trial 49 pruned at epoch 1.


[I 2025-10-23 16:50:35,136] Trial 49 pruned. 


Trial 50 pruned at epoch 1.


[I 2025-10-23 16:50:36,783] Trial 50 pruned. 


Trial 51 pruned at epoch 1.


[I 2025-10-23 16:50:37,903] Trial 51 pruned. 


Trial 52, Epoch 10, Train Loss: 0.5402, Valid Loss: 0.4876
Trial 52, Epoch 20, Train Loss: 0.5314, Valid Loss: 0.4837
Trial 52, Epoch 30, Train Loss: 0.5279, Valid Loss: 0.4806
Trial 52, Epoch 40, Train Loss: 0.5243, Valid Loss: 0.4791
Trial 52, Epoch 50, Train Loss: 0.5218, Valid Loss: 0.4780
Trial 52, Epoch 60, Train Loss: 0.5224, Valid Loss: 0.4802
Trial 52, Epoch 70, Train Loss: 0.5192, Valid Loss: 0.4781
Trial 52, Epoch 80, Train Loss: 0.5183, Valid Loss: 0.4793
Early stopping triggered at epoch 84 for trial 52.


[I 2025-10-23 16:52:08,002] Trial 52 finished with value: 0.6344442710457535 and parameters: {'lr': 0.0001264675905844007, 'hidden_channels': 256, 'dense_dim': 128, 'dropout': 0.42525667965217473, 'num_gc_layers': 1}. Best is trial 4 with value: 0.636597281845318.



✅ Optimization Finished!
Number of finished trials: 53
Best trial:
  Value (Max Macro F1 on Validation): 0.6366
  Params: 
    lr: 0.00018166245646355152
    hidden_channels: 256
    dense_dim: 128
    dropout: 0.4096553087655359
    num_gc_layers: 1


In [20]:
# --- 1. Get Best Hyperparameters ---
if 'study' not in locals(): raise NameError("Optuna 'study' object not found.")
best_params = study.best_params
print("📋 Using Best Hyperparameters:", best_params)

# --- 2. Instantiate Final Model ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
n_node_features = train_pyg_dataset[0].num_node_features
n_tasks = len(sider_tasks)

final_model = SiderGNN_PyG( # Make sure SiderGNN_PyG_Opt class is defined
    n_tasks=n_tasks, n_node_features=n_node_features,
    hidden_channels=best_params['hidden_channels'], dense_dim=best_params['dense_dim'],
    dropout=best_params['dropout'], num_gc_layers=best_params['num_gc_layers']
).to(device)

optimizer = optim.Adam(final_model.parameters(), lr=best_params['lr'])
criterion = nn.BCEWithLogitsLoss()
print(f"✅ Final GNN model instantiated on {device}.")

# --- 3. Final Training Loop ---
n_epochs_final = 100 # Adjust as needed (e.g., based on Optuna trial duration)
print(f"🚀 Training Final GNN for {n_epochs_final} epochs...")
for epoch in tqdm(range(1, n_epochs_final + 1), desc="Final Training"):
    train_loss = train_epoch(final_model, train_loader, criterion, optimizer, device) # Assumes train_epoch is defined
    # Optional: print occasional validation loss
    # if epoch % 20 == 0:
    #     valid_loss, _, _ = eval_model(final_model, valid_loader, criterion, device) # Assumes eval_model is defined
    #     print(f"Epoch {epoch}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}")

print("✅ Final Training complete.")

# --- 4. Final Evaluation on Test Set ---
print("⚙️ Evaluating Final Model on Test Set...")
# Assumes eval_model returns (avg_loss, logits_array, labels_array)
_, test_logits_final, _ = eval_model(final_model, test_loader, criterion, device)

sigmoid = lambda x: 1 / (1 + np.exp(-x))
y_test_prob_final = sigmoid(test_logits_final)
y_test_pred_final = (y_test_prob_final >= 0.5).astype(int)


metrics_test_final_gnn = evaluate_multilabel_model(
    y_test_true, y_test_pred_final, y_test_prob_final, "Test (Best GNN PyG)"
)

📋 Using Best Hyperparameters: {'lr': 0.00018166245646355152, 'hidden_channels': 256, 'dense_dim': 128, 'dropout': 0.4096553087655359, 'num_gc_layers': 1}
✅ Final GNN model instantiated on cpu.
🚀 Training Final GNN for 100 epochs...


Final Training: 100%|███████████████████████████████████████████████████████████████████████████████████| 100/100 [01:28<00:00,  1.12it/s]


✅ Final Training complete.
⚙️ Evaluating Final Model on Test Set...

📊 Multilabel Evaluation Metrics:
Nom :               : Test (Best GNN PyG)
Subset accuracy     : 0.0280
Hamming loss        : 0.2328
Micro F1            : 0.8175
Macro F1            : 0.6109
Weighted F1         : 0.7964
Micro ROC-AUC       : 0.8301
Macro ROC-AUC       : 0.5512


For this specific dataset and scaffold split, a simple GCN architecture, even after hyperparameter optimization, doesn't seem to provide a clear advantage over tuned ECFP-based models like XGBoost, particularly for predicting rare side effects. The inherent difficulty of the scaffold split and the severe label imbalance remain major challenges.

let's try a more complex GNN architecture using PyTorch Geometric. We'll make two main changes:

Use GATConv Layers: Graph Attention Network (GAT) layers allow nodes to weigh the importance of their neighbors' features, which can be more expressive than the simple averaging in GCNConv.

Include Edge Features: We'll add basic bond features (like bond type) to the graph representation, allowing the GNN to learn from bond information as well as atom information.

In [21]:
def smiles_to_pyg_graph_with_edges(smiles, y):
    """Converts SMILES string to PyTorch Geometric Data object with edge features."""
    mol = Chem.MolFromSmiles(smiles)
    if mol is None: return None

    # --- Atom Features (Using simpler features for clarity, adjust if needed) ---
    xs = []
    for atom in mol.GetAtoms():
        x = []
        x.append(atom.GetAtomicNum())
        x.append(atom.GetDegree())
        x.append(atom.GetFormalCharge())
        x.append(atom.GetNumRadicalElectrons())
        x.append(atom.GetHybridization())
        x.append(atom.GetIsAromatic())
        x.append(atom.IsInRing())
        # Add more features if desired...
        xs.append(x)
    x = torch.tensor(xs, dtype=torch.float).view(-1, len(xs[0])) # Node features

    # --- Edge Index and Edge Features ---
    edge_indices = []
    edge_attrs = []
    bond_types = [Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE,
                  Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC]

    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()
        bond_type = bond_types.index(bond.GetBondType()) # Get index of bond type

        # Add edge in both directions
        edge_indices += [[i, j], [j, i]]
        # Add bond type feature for both directions
        edge_attrs += [[bond_type], [bond_type]]

    edge_index = torch.tensor(edge_indices, dtype=torch.long).t().contiguous()
    edge_attr = torch.tensor(edge_attrs, dtype=torch.float).view(-1, 1) # Edge features

    # --- Labels ---
    y_tensor = torch.tensor(y, dtype=torch.float).unsqueeze(0) # Shape [1, n_tasks]

    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y_tensor)
    return data

# --- Recreate PyTorch Geometric Datasets using the new function ---
class SIDERPyGDatasetWithEdges(Dataset): # Renamed for clarity
    def __init__(self, dc_dataset):
        self.smiles = dc_dataset.ids
        self.labels = dc_dataset.y
        self.graphs = []
        print(f"Processing {len(self.smiles)} SMILES for PyG graphs with edge features...")
        for i in tqdm(range(len(self.smiles)), desc="Creating Graphs"):
            graph = smiles_to_pyg_graph_with_edges(self.smiles[i], self.labels[i])
            if graph is not None:
                self.graphs.append(graph)
        print(f"Successfully created {len(self.graphs)} graphs.")


    def __len__(self):
        return len(self.graphs)

    def __getitem__(self, idx):
        return self.graphs[idx]

print("🧬 Re-creating PyTorch Geometric datasets with edge features...")
# Make sure train_raw_ds, valid_raw_ds, test_raw_ds exist from previous loading
train_pyg_dataset_edge = SIDERPyGDatasetWithEdges(train_raw_ds)
valid_pyg_dataset_edge = SIDERPyGDatasetWithEdges(valid_raw_ds)
test_pyg_dataset_edge = SIDERPyGDatasetWithEdges(test_raw_ds)
print("✅ PyG Datasets with edge features created.")

# --- Create DataLoaders ---
from torch_geometric.loader import DataLoader as PyGDataLoader
batch_size = 64
train_loader_edge = PyGDataLoader(train_pyg_dataset_edge, batch_size=batch_size, shuffle=True)
valid_loader_edge = PyGDataLoader(valid_pyg_dataset_edge, batch_size=batch_size, shuffle=False)
test_loader_edge = PyGDataLoader(test_pyg_dataset_edge, batch_size=batch_size, shuffle=False)
print("✅ PyG DataLoaders with edge features created.")

# --- Retrieve y_true again (if needed, ensure consistency) ---
# Assuming y_train_true, y_valid_true, y_test_true are already correctly defined

🧬 Re-creating PyTorch Geometric datasets with edge features...
Processing 1141 SMILES for PyG graphs with edge features...


Creating Graphs: 100%|███████████████████████████████████████████████████████████████████████████████| 1141/1141 [00:01<00:00, 888.52it/s]


Successfully created 1141 graphs.
Processing 143 SMILES for PyG graphs with edge features...


Creating Graphs: 100%|█████████████████████████████████████████████████████████████████████████████████| 143/143 [00:00<00:00, 808.16it/s]


Successfully created 143 graphs.
Processing 143 SMILES for PyG graphs with edge features...


Creating Graphs: 100%|█████████████████████████████████████████████████████████████████████████████████| 143/143 [00:00<00:00, 609.79it/s]

Successfully created 143 graphs.
✅ PyG Datasets with edge features created.
✅ PyG DataLoaders with edge features created.





In [24]:
 # --- GAT Model Definition ---
class SiderGAT_PyG(nn.Module):
    def __init__(self, n_tasks, n_node_features, n_edge_features, hidden_channels=128, num_heads=4, dense_dim=256, dropout=0.3):
        super(SiderGAT_PyG, self).__init__()
        self.dropout_p = dropout
        self.n_tasks = n_tasks

        # GAT Layers (Using 2 layers, with multiple attention heads)
        # Note: Output dimension is hidden_channels * num_heads for the first layer if concat=True
        self.conv1 = GATConv(n_node_features, hidden_channels, heads=num_heads, dropout=dropout, edge_dim=n_edge_features)
        # For the second layer, input dimension must match output of first layer
        self.conv2 = GATConv(hidden_channels * num_heads, hidden_channels, heads=1, concat=False, dropout=dropout, edge_dim=n_edge_features) # Output is just hidden_channels

        # Dense Layers after pooling
        self.dense1 = nn.Linear(hidden_channels, dense_dim) # Input from pooling
        self.dropout_layer = nn.Dropout(p=self.dropout_p)
        self.output_layer = nn.Linear(dense_dim, n_tasks)

    def forward(self, data):
        x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch

        # Graph Attn Convolutions
        x = self.conv1(x, edge_index, edge_attr=edge_attr)
        x = F.elu(x) # ELU activation often used with GAT
        x = F.dropout(x, p=self.dropout_p, training=self.training) # Dropout between layers

        x = self.conv2(x, edge_index, edge_attr=edge_attr)
        x = F.elu(x)

        # Global Pooling (Mean pooling is common)
        x_pooled = global_mean_pool(x, batch) # Shape: [batch_size, hidden_channels]

        # Dense layers
        x = self.dense1(x_pooled)
        x = F.relu(x) # ReLU here
        x = self.dropout_layer(x)

        # Output logits
        logits = self.output_layer(x)
        return logits


In [25]:
# --- Instantiate GAT Model, Loss, Optimizer ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Get feature dimensions from the dataset
if not train_pyg_dataset_edge: raise ValueError("Train dataset is empty!")
n_node_features = train_pyg_dataset_edge[0].num_node_features
n_edge_features = train_pyg_dataset_edge[0].num_edge_features
n_tasks = len(sider_tasks)

# Use hyperparameters potentially better suited for GAT
lr_gat = 0.0005 # GATs sometimes benefit from slightly lower LR initially
hidden_gat = 128
heads_gat = 4
dense_gat = 256
dropout_gat = 0.3

model_gat = SiderGAT_PyG(
    n_tasks=n_tasks,
    n_node_features=n_node_features,
    n_edge_features=n_edge_features,
    hidden_channels=hidden_gat,
    num_heads=heads_gat,
    dense_dim=dense_gat,
    dropout=dropout_gat
).to(device)

optimizer_gat = optim.Adam(model_gat.parameters(), lr=lr_gat)
criterion = nn.BCEWithLogitsLoss()

print("✅ PyTorch Geometric GAT model instantiated.")

Using device: cpu
✅ PyTorch Geometric GAT model instantiated.


In [26]:
# --- Training Loop (Reuse train_epoch, eval_model functions) ---
n_epochs_gat = 100 # Start with 100, monitor validation loss

print(f"🚀 Training PyTorch Geometric GAT for {n_epochs_gat} epochs...")
# Ensure train_epoch and eval_model functions are defined from previous cells

best_valid_loss_gat = float('inf')
patience_counter_gat = 0
patience_gat = 20 # Early stopping patience

for epoch in range(1, n_epochs_gat + 1):
    train_loss = train_epoch(model_gat, train_loader_edge, criterion, optimizer_gat, device)
    valid_loss, _, _ = eval_model(model_gat, valid_loader_edge, criterion, device)

    print(f"Epoch {epoch:03d}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}")

    # Simple Early Stopping
    if valid_loss < best_valid_loss_gat:
        best_valid_loss_gat = valid_loss
        patience_counter_gat = 0
        # Optional: Save the best model state
        # torch.save(model_gat.state_dict(), 'best_gat_model.pth')
    else:
        patience_counter_gat += 1

    if patience_counter_gat >= patience_gat:
        print(f"Early stopping triggered at epoch {epoch}.")
        break

print("✅ GAT Training complete.")
# Optional: Load best model if saved
# model_gat.load_state_dict(torch.load('best_gat_model.pth'))

# --- Final Evaluation for GAT ---
print("⚙️ Generating final GAT predictions...")
_, train_logits_final_gat, _ = eval_model(model_gat, train_loader_edge, criterion, device)
_, valid_logits_final_gat, _ = eval_model(model_gat, valid_loader_edge, criterion, device)
_, test_logits_final_gat, _ = eval_model(model_gat, test_loader_edge, criterion, device)

# Convert final logits to probabilities and predictions
sigmoid = lambda x: 1 / (1 + np.exp(-x))
y_train_prob_gat = sigmoid(train_logits_final_gat)
y_valid_prob_gat = sigmoid(valid_logits_final_gat)
y_test_prob_gat = sigmoid(test_logits_final_gat)

y_train_pred_gat = (y_train_prob_gat >= 0.5).astype(int)
y_valid_pred_gat = (y_valid_prob_gat >= 0.5).astype(int)
y_test_pred_gat = (y_test_prob_gat >= 0.5).astype(int)

print("\n=== PyTorch Geometric GAT Evaluation ===")
# Ensure evaluate_multilabel_model function is defined
metrics_train_gat = evaluate_multilabel_model(y_train_true, y_train_pred_gat, y_train_prob_gat, "Train (GNN GAT)")
metrics_valid_gat = evaluate_multilabel_model(y_valid_true, y_valid_pred_gat, y_valid_prob_gat, "Validation (GNN GAT)")
metrics_test_gat = evaluate_multilabel_model(y_test_true, y_test_pred_gat, y_test_prob_gat, "Test (GNN GAT)")

🚀 Training PyTorch Geometric GAT for 100 epochs...
Epoch 001, Train Loss: 0.5982, Valid Loss: 0.4817
Epoch 002, Train Loss: 0.5433, Valid Loss: 0.4846
Epoch 003, Train Loss: 0.5349, Valid Loss: 0.4846
Epoch 004, Train Loss: 0.5313, Valid Loss: 0.4809
Epoch 005, Train Loss: 0.5289, Valid Loss: 0.4796
Epoch 006, Train Loss: 0.5275, Valid Loss: 0.4751
Epoch 007, Train Loss: 0.5253, Valid Loss: 0.4846
Epoch 008, Train Loss: 0.5266, Valid Loss: 0.4781
Epoch 009, Train Loss: 0.5235, Valid Loss: 0.4817
Epoch 010, Train Loss: 0.5235, Valid Loss: 0.4818
Epoch 011, Train Loss: 0.5235, Valid Loss: 0.4806
Epoch 012, Train Loss: 0.5228, Valid Loss: 0.4748
Epoch 013, Train Loss: 0.5218, Valid Loss: 0.4792
Epoch 014, Train Loss: 0.5200, Valid Loss: 0.4791
Epoch 015, Train Loss: 0.5194, Valid Loss: 0.4774
Epoch 016, Train Loss: 0.5188, Valid Loss: 0.4795
Epoch 017, Train Loss: 0.5222, Valid Loss: 0.4767
Epoch 018, Train Loss: 0.5213, Valid Loss: 0.4745
Epoch 019, Train Loss: 0.5207, Valid Loss: 0.4769

Let's make bayesian optimization

In [None]:
# --- Objective Function ---
def objective_gat(trial):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    n_node_features = train_pyg_dataset_edge[0].num_node_features
    n_edge_features = train_pyg_dataset_edge[0].num_edge_features
    n_tasks = len(sider_tasks)

    # Suggest Hyperparameters
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    hidden_channels = trial.suggest_categorical('hidden_channels', [64, 128, 256])
    num_heads = trial.suggest_categorical('num_heads', [2, 4, 8, 16])
    dense_dim = trial.suggest_categorical('dense_dim', [128, 256, 512])
    dropout = trial.suggest_float('dropout', 0.1, 0.3)
    n_epochs_objective = 100 # Fixed duration for trial

    # Instantiate Model, Loss, Optimizer
    model = SiderGAT_PyG( # Assumes SiderGAT_PyG class is defined
        n_tasks=n_tasks, n_node_features=n_node_features, n_edge_features=n_edge_features,
        hidden_channels=hidden_channels, num_heads=num_heads, dense_dim=dense_dim, dropout=dropout
    ).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCEWithLogitsLoss()

    # Training Loop with simple early stopping
    best_valid_loss = float('inf')
    patience_counter = 0
    patience = 15 # Stop after 15 epochs without validation loss improvement

    for epoch in range(1, n_epochs_objective + 1):
        train_loss = train_epoch(model, train_loader_edge, criterion, optimizer, device) # Assumes train_epoch defined
        valid_loss, valid_logits, valid_labels = eval_model(model, valid_loader_edge, criterion, device) # Assumes eval_model defined

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            patience_counter = 0
        else:
            patience_counter += 1

        trial.report(valid_loss, epoch)
        if trial.should_prune():
            del model, optimizer, criterion; gc.collect(); torch.cuda.empty_cache()
            raise optuna.exceptions.TrialPruned()
        if patience_counter >= patience:
            break # Early stopping

    # Evaluate on Validation Set (using last epoch's logits/labels from eval_model)
    sigmoid = lambda x: 1 / (1 + np.exp(-x))
    valid_probs = sigmoid(valid_logits)
    valid_preds = (valid_probs >= 0.5).astype(int)
    if valid_labels.ndim == 3 and valid_labels.shape[1] == 1: valid_labels = valid_labels.squeeze(1)
    macro_f1_valid = f1_score(valid_labels, valid_preds, average="macro", zero_division=0)

    # Clean up
    del model, optimizer, criterion, valid_logits, valid_probs, valid_preds, valid_labels
    gc.collect()
    if device == 'cuda': torch.cuda.empty_cache()

    return macro_f1_valid

# --- Create and Run Study ---
study_name_gat = "sider-gnn-gat-pyg-optimization"
storage_name_gat = f"sqlite:///{study_name_gat}.db"
study_gat = optuna.create_study(study_name=study_name_gat, storage=storage_name_gat, load_if_exists=True, direction="maximize")
n_trials_gat = 50 # Adjust as needed

print(f"🚀 Starting Optuna Bayesian Optimization for GAT ({n_trials_gat} trials)...")
study_gat.optimize(objective_gat, n_trials=n_trials_gat)

# --- Output Best Results ---
print("\n✅ GAT Optimization Finished!")
print("Best GAT trial:")
best_trial_gat = study_gat.best_trial
print(f"  Value (Max Macro F1 on Validation): {best_trial_gat.value:.4f}")
print("  Params: ", best_trial_gat.params)

# --- Remember to retrain the best model separately and evaluate on the test set ---

[I 2025-10-23 17:48:59,942] Using an existing study with name 'sider-gnn-gat-pyg-optimization' instead of creating a new one.


🚀 Starting Optuna Bayesian Optimization for GAT (50 trials)...


[I 2025-10-23 17:49:52,995] Trial 1 finished with value: 0.6328113409179352 and parameters: {'lr': 0.000644397751444598, 'hidden_channels': 64, 'num_heads': 8, 'dense_dim': 256, 'dropout': 0.10972932408803958}. Best is trial 1 with value: 0.6328113409179352.
