In [27]:
%cd E:\SRP
import os,sys
notebook_dir = os.getcwd()
path = os.path.abspath(os.path.join(notebook_dir, "Code"))
sys.path.append(path)
import optuna
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score,roc_curve,classification_report
from generateSplits import generateSplits
import pandas as pd
from trainModel import trainModel
from Dataset import ModelDataset
from model import Model
from torch.utils.data import DataLoader
import torch
import matplotlib.pyplot as plt
from optuna.storages import RDBStorage,RetryFailedTrialCallback
import logging
import gc

E:\SRP


In [12]:
device = torch.device("cuda") if torch.cuda.is_available else torch.device("cpu")

metadata = pd.read_csv("Datasets/BreastDCEDL_spy1/BreastDCEDL_spy1_metadata.csv")
train_df,val_df = generateSplits(metadata,0.2,seed=42)
train_df = train_df[["pid","pCR","ER","PR","HER2"]].set_index("pid",drop=True)
val_df = val_df[["pid","pCR","ER","PR","HER2"]].set_index("pid",drop=True)
skf = StratifiedKFold(n_splits=4,shuffle=True,random_state=42)
backup_path = "study_backup.db"

In [4]:
def evaluate(model:torch.nn.Module,val_loader:torch.utils.data.DataLoader):
    model.eval()
    model.to(device)
    
    y_score = []
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images,mols,labels in val_loader:
            images = images.to(device)
            mols = mols.to(device)
            logits = model(images,mols)
            scores = torch.nn.functional.softmax(logits,dim=1)[:,1]
            y_score.extend(scores.cpu().numpy())
            y_true.extend(labels.cpu().numpy())
    
    return roc_auc_score(y_true,y_score)
  

In [5]:
def objective(trial:optuna.Trial):
    optimisers = {"Adam":torch.optim.Adam,"SGD":torch.optim.SGD,"AdamW":torch.optim.AdamW}
    lr = trial.suggest_float("lr",1e-5,1e-3,log=True)
    weight_decay = trial.suggest_categorical("weight_decay", [0.1, 0.01, 0.001, 0.0001])
    batch_size = trial.suggest_categorical("batch_size",[4,8,16,32,64])
    optimiser_name = trial.suggest_categorical("optimiser_name", ["Adam", "SGD","AdamW"])
    optimiser_class = optimisers[optimiser_name]
    
    scores = []
    
    for train_index,val_index in skf.split(train_df, train_df["pCR"]):
        fold_train_df = train_df.iloc[train_index]
        fold_val_df = train_df.iloc[val_index]
        fold_train_dataset = ModelDataset(fold_train_df,class_samples={0:3,1:8},loading_bar=False)
        fold_train_loader = DataLoader(fold_train_dataset,batch_size=batch_size,shuffle=True)
        model = Model()
        optimiser = optimiser_class(model.parameters(),lr=lr,weight_decay=weight_decay)
        fold_val_dataset = ModelDataset(fold_val_df,class_samples={0:1,1:1},loading_bar=False)
        fold_val_loader = DataLoader(fold_val_dataset,batch_size=batch_size)
        model,score = trainModel(model,fold_train_loader,optimiser,device=device,num_epochs=20,val_loader=fold_val_loader,patience=5)
        scores.append(score)
        print(f"ROC_AUC = {score}")
        
        trial.report(float(score),step=len(scores))
        if trial.should_prune():
            raise optuna.TrialPruned()
 
        
    avg_score = sum(scores)/len(scores)
    print(avg_score)
    return avg_score

In [7]:
with open("Code/url.txt","r") as f:
    url = f.read()

storage = RDBStorage(
    url=url,
    failed_trial_callback=RetryFailedTrialCallback(max_retry=3)
)

pruner = optuna.pruners.MedianPruner(n_startup_trials=5,n_warmup_steps=2,)

optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))

study = optuna.create_study(
    storage=storage,
    pruner=pruner,
    study_name="SRP_Model_Study",
    load_if_exists=True,
    direction="maximize")

  failed_trial_callback=RetryFailedTrialCallback(max_retry=3)
[I 2025-08-16 23:33:23,803] A new study created in RDB with name: SRP_Model_Study


A new study created in RDB with name: SRP_Model_Study


In [None]:
study.optimize(objective,n_trials=1,show_progress_bar=True)

if os.path.exists(backup_path):
    os.remove(backup_path)

optuna.copy_study(
    from_study_name="SRP_Model_Study",
    from_storage=storage,
    to_storage=f"sqlite:///{backup_path}"
)

  0%|          | 0/1 [00:00<?, ?it/s]

Dataset initialised with 409 entries.
Dataset initialised with 31 entries.
Epoch 0 Done. Avg Loss: 1.7968
Epoch 1 Done. Avg Loss: 0.5433
Epoch 2 Done. Avg Loss: 0.4174
Epoch 3 Done. Avg Loss: 0.2817
Epoch 4 Done. Avg Loss: 0.1302
Epoch 5 Done. Avg Loss: 0.0813
Epoch 6 Done. Avg Loss: 0.0699
Epoch 7 Done. Avg Loss: 0.0429
Early stopping triggered at epoch 7 (best AUC: 0.5978).
ROC_AUC = 0.5978260869565217
Dataset initialised with 409 entries.
Dataset initialised with 31 entries.
Epoch 0 Done. Avg Loss: 1.1548
Epoch 1 Done. Avg Loss: 0.5221
Epoch 2 Done. Avg Loss: 0.3890
Epoch 3 Done. Avg Loss: 0.2401
Epoch 4 Done. Avg Loss: 0.1051
Epoch 5 Done. Avg Loss: 0.0736
Epoch 6 Done. Avg Loss: 0.0548
Epoch 7 Done. Avg Loss: 0.0423
Epoch 8 Done. Avg Loss: 0.0579
Epoch 9 Done. Avg Loss: 0.0474
Epoch 10 Done. Avg Loss: 0.0252
Epoch 11 Done. Avg Loss: 0.0187
Epoch 12 Done. Avg Loss: 0.0523
Early stopping triggered at epoch 12 (best AUC: 0.5109).
ROC_AUC = 0.5108695652173912
Dataset initialised with 

In [34]:
if os.path.exists(backup_path):
    os.remove(backup_path)

optuna.copy_study(
    from_study_name="SRP_Model_Study",
    from_storage=storage,
    to_storage=f"sqlite:///{backup_path}"
)

A new study created in RDB with name: SRP_Model_Study


[I 2025-08-17 00:48:46,525] A new study created in RDB with name: SRP_Model_Study


### Free up studies

In [33]:
for var in list(globals()):
    if isinstance(globals()[var], optuna.study.Study):
        del globals()[var]

gc.collect()

10401

In [None]:
def retry_failed_trials(study: optuna.Study, objective_func):
    failed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.FAIL]
    print(f"Found {len(failed_trials)} failed trials. Retrying...")

    for trial in failed_trials:
        print(f"Retrying trial #{trial.number} with params {trial.params}")

        def fixed_objective(trial_):
            trial_.set_user_attr("original_trial_number", trial.number)
            for key, value in trial.params.items():
                if isinstance(value, float):
                    trial_.suggest_float(key, value, value)  # no log param here
                elif isinstance(value, int):
                    trial_.suggest_int(key, value, value)
                else:
                    trial_.suggest_categorical(key, [value])
            return objective_func(trial_)

        study.optimize(fixed_objective, n_trials=1)

    print("Retried all failed trials.")


In [None]:
retry_failed_trials(study,objective)

Found 5 failed trials. Retrying...
Retrying trial #0 with params {'lr': 2.2604443080058902e-05, 'weight_decay': 0.0001, 'batch_size': 8, 'optimiser_name': 'Adam'}


100%|███████████████████████████████████████████████████████████████████████| 93/93 [00:06<00:00, 13.53it/s]


Dataset initialised with 409 entries.
Epoch 0 Done. Avg Loss: 0.7136
Epoch 1 Done. Avg Loss: 0.7070


100%|███████████████████████████████████████████████████████████████████████| 31/31 [00:00<00:00, 68.27it/s]


Dataset initialised with 31 entries.


100%|███████████████████████████████████████████████████████████████████████| 93/93 [00:07<00:00, 13.17it/s]


Dataset initialised with 409 entries.
Epoch 0 Done. Avg Loss: 0.7712
Epoch 1 Done. Avg Loss: 0.7348


100%|███████████████████████████████████████████████████████████████████████| 31/31 [00:00<00:00, 75.91it/s]


Dataset initialised with 31 entries.


100%|███████████████████████████████████████████████████████████████████████| 93/93 [00:06<00:00, 13.46it/s]


Dataset initialised with 404 entries.
Epoch 0 Done. Avg Loss: 0.7040
Epoch 1 Done. Avg Loss: 0.7089


100%|███████████████████████████████████████████████████████████████████████| 31/31 [00:00<00:00, 69.90it/s]


Dataset initialised with 31 entries.


100%|███████████████████████████████████████████████████████████████████████| 93/93 [00:07<00:00, 13.27it/s]


Dataset initialised with 404 entries.
Epoch 0 Done. Avg Loss: 0.7053
Epoch 1 Done. Avg Loss: 0.7089


100%|███████████████████████████████████████████████████████████████████████| 31/31 [00:00<00:00, 71.75it/s]


Dataset initialised with 31 entries.


[I 2025-08-08 18:10:43,848] Trial 15 finished with value: 0.4547650417215635 and parameters: {'lr': 2.2604443080058902e-05, 'weight_decay': 0.1, 'batch_size': 16, 'optimiser_name': 'SGD'}. Best is trial 3 with value: 0.5290129556433903.


0.4547650417215635
Retrying trial #11 with params {}


100%|███████████████████████████████████████████████████████████████████████| 93/93 [00:07<00:00, 12.86it/s]


Dataset initialised with 409 entries.
Epoch 0 Done. Avg Loss: 0.7279


[W 2025-08-08 18:12:11,344] Trial 16 failed with parameters: {'lr': 0.00030469073234648785, 'weight_decay': 0.01, 'batch_size': 16, 'optimiser_name': 'SGD'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "E:\SRP\Code\env\Lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\USER\AppData\Local\Temp\ipykernel_19764\139076021.py", line 22, in fixed_objective
    return objective_func(trial_)
  File "C:\Users\USER\AppData\Local\Temp\ipykernel_19764\3261892532.py", line 18, in objective
    model = trainModel(model,fold_train_loader,optimiser,device=device,num_epochs=2)
  File "E:\SRP\Code\trainModel.py", line 26, in trainModel
    optimiser.step()
    ~~~~~~~~~~~~~~^^
  File "E:\SRP\Code\env\Lib\site-packages\torch\optim\optimizer.py", line 485, in wrapper
    out = func(*args, **kwargs)
  File "E:\SRP\Code\env\Lib\site-packages\torch\optim\optimizer.py", line 79, in _use_grad

KeyboardInterrupt: 

: 