In [1]:
%cd E:\SRP
import os,sys
notebook_dir = os.getcwd()
path = os.path.abspath(os.path.join(notebook_dir, "Code"))
sys.path.append(path)
import optuna
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score,roc_curve,classification_report
from generateSplits import generateSplits
import pandas as pd
from trainModel import trainModel
from Dataset import ModelDataset
from model import Model
from torch.utils.data import DataLoader
import torch
import matplotlib.pyplot as plt
from optuna.storages import RDBStorage,RetryFailedTrialCallback
import logging
import gc

E:\SRP


In [2]:
device = torch.device("cuda") if torch.cuda.is_available else torch.device("cpu")

metadata = pd.read_csv("Datasets/BreastDCEDL_spy1/BreastDCEDL_spy1_metadata.csv")
train_df,val_df = generateSplits(metadata,0.2,seed=42)
train_df = train_df[["pid","pCR","ER","PR","HER2"]].set_index("pid",drop=True)
val_df = val_df[["pid","pCR","ER","PR","HER2"]].set_index("pid",drop=True)
skf = StratifiedKFold(n_splits=4,shuffle=True,random_state=42)
backup_path = "study_backup.db"

In [None]:
def evaluate(model:torch.nn.Module,val_loader:torch.utils.data.DataLoader):
    model.eval()
    model.to(device)
    
    y_score = []
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images,mols,labels in val_loader:
            images = images.to(device)
            mols = mols.to(device)
            logits = model(images,mols)
            scores = torch.nn.functional.softmax(logits,dim=1)[:,1]
            y_score.extend(scores.cpu().numpy())
            y_true.extend(labels.cpu().numpy())
    
    return roc_auc_score(y_true,y_score)
  

In [4]:
def objective(trial:optuna.Trial):
    optimisers = {"Adam":torch.optim.Adam,"SGD":torch.optim.SGD,"AdamW":torch.optim.AdamW}
    lr = trial.suggest_float("lr",1e-5,1e-3,log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-8, 1e-1, log=True)
    batch_size = trial.suggest_categorical("batch_size",[4,8,16,32,64,128])
    optimiser_name = trial.suggest_categorical("optimiser_name", ["Adam", "SGD","AdamW"])
    momentum = None  # Default value
    if optimiser_name == "SGD":
        momentum = trial.suggest_float("momentum",0.0,1.0)
    optimiser_class = optimisers[optimiser_name]
    
    scores = []
    
    for train_index,val_index in skf.split(train_df, train_df["pCR"]):
        fold_train_df = train_df.iloc[train_index]
        fold_val_df = train_df.iloc[val_index]
        fold_train_dataset = ModelDataset(fold_train_df,class_samples={0:3,1:8},loading_bar=False,caching=True)
        fold_train_loader = DataLoader(fold_train_dataset,batch_size=batch_size,shuffle=True)
        model = Model()
        if optimiser_name =="SGD":
            optimiser = optimiser_class(model.parameters(),lr=lr,weight_decay=weight_decay,momentum=momentum)
        else:
            optimiser = optimiser_class(model.parameters(),lr=lr,weight_decay=weight_decay)
        fold_val_dataset = ModelDataset(fold_val_df,class_samples={0:1,1:1},loading_bar=False,caching=True)
        fold_val_loader = DataLoader(fold_val_dataset,batch_size=batch_size)
        model,score = trainModel(model,fold_train_loader,optimiser,device=device,num_epochs=20,val_loader=fold_val_loader,patience=5)
        scores.append(score)
        print(f"ROC_AUC = {score}")
        
        trial.report(float(score),step=len(scores))
        if trial.should_prune():
            raise optuna.TrialPruned()
 
        
    avg_score = sum(scores)/len(scores)
    print(avg_score)
    return avg_score

In [None]:
with open("Code/url.txt","r") as f:
    url = f.read()

storage = RDBStorage(
    url=url,
    failed_trial_callback=RetryFailedTrialCallback(max_retry=3)
)

sampler = optuna.samplers.TPESampler(multivariate=True,gamma=lambda n:max(1, n // 2))
pruner = optuna.pruners.MedianPruner(n_startup_trials=5,n_warmup_steps=2,)

optuna_logger = optuna.logging.get_logger("optuna")

# Remove all handlers
for handler in optuna_logger.handlers[:]:  # make a copy of the list
    optuna_logger.removeHandler(handler)

optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))

study = optuna.create_study(
    storage=storage,
    sampler = sampler,
    pruner=pruner,
    study_name="SRP_Model_Study",
    load_if_exists=True,
    direction="maximize")

  failed_trial_callback=RetryFailedTrialCallback(max_retry=3)


Using an existing study with name 'SRP_Model_Study' instead of creating a new one.


## Optimise Hyperparameters

In [12]:
study.optimize(objective,n_trials=1,show_progress_bar=True)




    
try:
    if os.path.exists(backup_path):
        os.remove(backup_path)
    optuna.copy_study(
        from_study_name="SRP_Model_Study",
        from_storage=storage,
        to_storage=f"sqlite:///{backup_path}"
    )
except PermissionError:
    print("PermissionError: deleting reference to study")
    for var in list(globals()):
        if isinstance(globals()[var], optuna.study.Study):
            s = globals()[var]
            # only delete if the study is backed by SQLite (not your Aiven storage)
            if isinstance(s._storage, optuna.storages.RDBStorage) and \
            str(s._storage.url).startswith("sqlite:///"):
                del globals()[var]
    gc.collect()
    if os.path.exists(backup_path):
        os.remove(backup_path)
    optuna.copy_study(
        from_study_name="SRP_Model_Study",
        from_storage=storage,
        to_storage=f"sqlite:///{backup_path}"
    )

  0%|          | 0/1 [00:00<?, ?it/s]

Dataset initialised with 409 entries.
Dataset initialised with 31 entries.
Epoch 0 Done. Avg Loss: 7.7963
Epoch 1 Done. Avg Loss: 1.2581
Epoch 2 Done. Avg Loss: 0.6034
Epoch 3 Done. Avg Loss: 0.3771
Epoch 4 Done. Avg Loss: 0.2972
Epoch 5 Done. Avg Loss: 0.2124
Epoch 6 Done. Avg Loss: 0.1779
Epoch 7 Done. Avg Loss: 0.1339
Epoch 8 Done. Avg Loss: 0.0911
Epoch 9 Done. Avg Loss: 0.0781
Epoch 10 Done. Avg Loss: 0.0440
Epoch 11 Done. Avg Loss: 0.0375
Epoch 12 Done. Avg Loss: 0.0313
Early stopping triggered at epoch 12 (best AUC: 0.5543).
ROC_AUC = 0.5543478260869565
Dataset initialised with 409 entries.
Dataset initialised with 31 entries.
Epoch 0 Done. Avg Loss: 6.6724
Epoch 1 Done. Avg Loss: 1.3970
Epoch 2 Done. Avg Loss: 0.6514
Epoch 3 Done. Avg Loss: 0.4849
Epoch 4 Done. Avg Loss: 0.3666
Epoch 5 Done. Avg Loss: 0.3136
Early stopping triggered at epoch 5 (best AUC: 0.5000).
ROC_AUC = 0.5
Dataset initialised with 404 entries.
Dataset initialised with 31 entries.
Epoch 0 Done. Avg Loss: 6.9

[I 2025-08-21 23:30:41,370] A new study created in RDB with name: SRP_Model_Study


StorageInternalError: An exception is raised during the commit. This typically happens due to invalid data in the commit, e.g. exceeding max length. 

In [11]:
if os.path.exists(backup_path):
    os.remove(backup_path)

optuna.copy_study(
    from_study_name="SRP_Model_Study",
    from_storage=storage,
    to_storage=f"sqlite:///{backup_path}"
)

A new study created in RDB with name: SRP_Model_Study


[I 2025-08-21 20:27:32,082] A new study created in RDB with name: SRP_Model_Study


### Free up backup study

In [10]:
for var in list(globals()):
    if isinstance(globals()[var], optuna.study.Study):
        s = globals()[var]
        # only delete if the study is backed by SQLite (not your Aiven storage)
        if isinstance(s._storage, optuna.storages.RDBStorage) and \
           str(s._storage.url).startswith("sqlite:///"):
            del globals()[var]

gc.collect()


7004

In [None]:
def retry_failed_trials(study: optuna.Study, objective_func):
    failed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.FAIL]
    print(f"Found {len(failed_trials)} failed trials. Retrying...")

    for trial in failed_trials:
        print(f"Retrying trial #{trial.number} with params {trial.params}")

        def fixed_objective(trial_):
            trial_.set_user_attr("original_trial_number", trial.number)
            for key, value in trial.params.items():
                if isinstance(value, float):
                    trial_.suggest_float(key, value, value)  # no log param here
                elif isinstance(value, int):
                    trial_.suggest_int(key, value, value)
                else:
                    trial_.suggest_categorical(key, [value])
            return objective_func(trial_)

        study.optimize(fixed_objective, n_trials=1)

    print("Retried all failed trials.")


In [None]:
retry_failed_trials(study,objective)

## Delete study

In [19]:
if input("Confirm delete? [y/n]") == "y":
    optuna.delete_study(study_name="SRP_Model_Study",storage=storage)