<a href="https://colab.research.google.com/github/Sibusisongwenya/WIP-Project/blob/main/tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import sys
# Append the project root to the Python path
sys.path.append('/content/drive/MyDrive/uc')
!pip install torchbnn
os.chdir("/content/drive/MyDrive/uc")
print("Current working directory:", os.getcwd())

Current working directory: /content/drive/MyDrive/uc


In [None]:
!pip install optuna



In [None]:
#!/usr/bin/env python
# tuning_optuna.py

import os
import random
import logging
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
import optuna
import wandb

# Import modules from your project
from dataset.ucmayo4 import UCMayo4
from utils.magic import BayesianDenseNet121_LLSVI, DenseNet121_LLDropout

# ---------------------------
# Configuration
# ---------------------------
RANDOM_SEED = 35
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RANDOM_SEED)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Directories for your data
TRAIN_DIR = "/content/drive/MyDrive/uc/test_set/train"
VAL_DIR   = "/content/drive/MyDrive/uc/test_set/val"

# Combined loss function (MSE + beta * KL)
def combined_loss(outputs, targets, model, epoch, total_epochs, use_bayesian=True):
    mse = nn.MSELoss()(outputs, targets)
    if use_bayesian and hasattr(model, "kl_loss"):
        beta = min(1.0, (epoch + 1) / total_epochs)
        return mse + beta * model.kl_loss()
    else:
        return mse

def create_transform(resize=256, normalize=True, augment=True):
    transform_list = [transforms.Resize((resize, resize))]
    if augment:
        transform_list.extend([
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(20)
        ])
    transform_list.append(transforms.ToTensor())
    if normalize:
        transform_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                   std=[0.229, 0.224, 0.225]))
    return transforms.Compose(transform_list)

def objective(trial):
    """
    Optuna objective function that trains either Bayesian or MC-Dropout model
    on the UC Mayo dataset for a fixed number of epochs, returning a combined metric.
    """

    # --------------------------
    # Hyperparameters from Optuna
    # --------------------------
    model_type = trial.suggest_categorical("model_type", ["Bayesian", "Dropout"])
    dropout_rate = trial.suggest_categorical("dropout_rate", [0.2, 0.3, 0.5])
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
    epochs = 25  # e.g., fixed at 25

    # Initialize a wandb run for each trial
    wandb_run = wandb.init(
        project="my-ucmayo4-tuning-optuna",
        config={
            "model_type": model_type,
            "dropout_rate": dropout_rate,
            "learning_rate": learning_rate,
            "batch_size": batch_size,
            "epochs": epochs
        },
        reinit=True
    )

    # --------------------------
    # Data Loading
    # --------------------------
    train_transform = create_transform(256, normalize=True, augment=True)
    val_transform   = create_transform(256, normalize=True, augment=False)

    train_dataset = UCMayo4(root_dir=TRAIN_DIR, transform=train_transform)
    val_dataset   = UCMayo4(root_dir=VAL_DIR,   transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,  pin_memory=True)
    val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False, pin_memory=True)

    # --------------------------
    # Model Instantiation
    # --------------------------
    if model_type == "Bayesian":
        model = BayesianDenseNet121_LLSVI(pretrained=True).to(DEVICE)
        use_bayesian = True
    else:  # "Dropout"
        model = DenseNet121_LLDropout(pretrained=True, dropout_prob=dropout_rate).to(DEVICE)
        use_bayesian = False

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    best_combined_score = -float("inf")

    # --------------------------
    # Training Loop
    # --------------------------
    for epoch in range(epochs):
        model.train()
        running_loss, total_samples = 0.0, 0

        for images, targets in train_loader:
            images = images.to(DEVICE, non_blocking=True)
            targets = targets.to(DEVICE, non_blocking=True).float().unsqueeze(1)

            optimizer.zero_grad()
            outputs = model(images, sample=False)
            loss = combined_loss(outputs, targets, model, epoch, epochs, use_bayesian)
            loss.backward()
            optimizer.step()

            batch_sz = images.size(0)
            running_loss += loss.item() * batch_sz
            total_samples += batch_sz

        # --------------------------
        # Validation
        # --------------------------
        model.eval()
        val_preds, val_labels = [], []
        val_loss_sum = 0.0

        with torch.no_grad():
            for images, targets in val_loader:
                images = images.to(DEVICE, non_blocking=True)
                targets = targets.to(DEVICE, non_blocking=True).float().unsqueeze(1)

                out = model(images, sample=False)
                val_loss_sum += combined_loss(out, targets, model, epoch, epochs, use_bayesian).item() * images.size(0)

                val_preds.extend(out.cpu().numpy().squeeze().tolist())
                val_labels.extend(targets.cpu().numpy().squeeze().tolist())

        val_loss = val_loss_sum / len(val_dataset)

        # Discretize continuous predictions (threshold=1.5)
        def discretize_binary(prediction, threshold=1.5):
            return 0 if prediction < threshold else 1

        bin_preds = [discretize_binary(p) for p in val_preds]
        bin_true  = [0 if x < 2 else 1 for x in val_labels]

        from sklearn.metrics import accuracy_score
        val_acc = accuracy_score(bin_true, bin_preds)

        # Compute combined metric:
        # e.g., 0.5 * accuracy + 0.5 * (1/(1 + val_loss))
        combined_score = 0.65 * val_acc + 0.35 * (1.0 / (1.0 + val_loss))

        # Log to W&B
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": running_loss / total_samples,
            "val_loss": val_loss,
            "val_acc": val_acc,
            "combined_score": combined_score
        })

        if combined_score > best_combined_score:
            best_combined_score = combined_score

        # For early stopping, check if we should prune
        trial.report(combined_score, epoch)
        if trial.should_prune():
            wandb_run.finish()
            raise optuna.TrialPruned()

    # Done training
    wandb_run.finish()
    return best_combined_score

def main():
    # Create an Optuna study
    import optuna
    study = optuna.create_study(direction="maximize")

    # Optimize the objective
    study.optimize(
        objective,
        n_trials=10,   # number of trials
        timeout=None
    )

    # Print the best result
    print("Best trial:")
    best_trial = study.best_trial
    print(f"  Value (combined_score): {best_trial.value}")
    print("  Params:")
    for k, v in best_trial.params.items():
        print(f"    {k}: {v}")

if __name__ == "__main__":
    main()


[I 2025-03-19 08:07:38,883] A new study created in memory with name: no-name-9b5c7e7f-d8ab-40b7-a04e-d14ccaef5e2a
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mfngwenya[0m ([33mfngwenya-z[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




0,1
combined_score,▂▂▃▁▃▃▃▃▃▃▁▂▂▁▃▅▆█▇▆▇▆▇█▆
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
train_loss,▃▅▇███▇▆▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▅▅▆▃█▇██▇▇▃▄▅▁▅▄▁▇▄▁▄▁▄▆▂
val_loss,▄▆▇███▇▆▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
combined_score,0.73044
epoch,25.0
train_loss,0.79049
val_acc,0.81853
val_loss,0.76411


[I 2025-03-19 08:38:57,694] Trial 0 finished with value: 0.7850370401633455 and parameters: {'model_type': 'Bayesian', 'dropout_rate': 0.3, 'learning_rate': 0.0007386832638599356, 'batch_size': 32}. Best is trial 0 with value: 0.7850370401633455.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)


0,1
combined_score,▁▅▂▃▅▆▅▇█▅▆▇▆▄▄▆▇▆▄▇█▆▇█▆
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
train_loss,█▃▃▃▃▂▂▂▂▂▂▂▂▁▂▂▂▁▂▁▁▁▁▁▁
val_acc,▁▅▁▂▅▅▆▆█▆▅▇▅▆▄▆▇▄▄▆█▇▇█▆
val_loss,█▃▆▆▄▂▄▂▂▄▂▂▂▆▅▃▁▂▄▂▁▃▁▁▃

0,1
combined_score,0.88054
epoch,25.0
train_loss,0.17889
val_acc,0.92907
val_loss,0.26519


[I 2025-03-19 09:12:51,777] Trial 1 finished with value: 0.9027397452707924 and parameters: {'model_type': 'Dropout', 'dropout_rate': 0.2, 'learning_rate': 0.0014608462163864502, 'batch_size': 64}. Best is trial 1 with value: 0.9027397452707924.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)


0,1
combined_score,▃▅▃▃▄▆▅▃▇▁▇▄▇▆█▆▇▃▅▄▆▇▄▇▆
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
train_loss,█▅▅▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁
val_acc,▄▅▄▄▅▇▅▂█▁▇▄▇▅█▅▇▄▇▅▆▇▅▆▇
val_loss,█▅█▇▇▃▃▄▃▇▂▅▂▂▁▂▂▇▅▇▂▂▅▁▃

0,1
combined_score,0.89602
epoch,25.0
train_loss,0.10606
val_acc,0.93802
val_loss,0.22248


[I 2025-03-19 09:44:05,879] Trial 2 finished with value: 0.9070930246549063 and parameters: {'model_type': 'Dropout', 'dropout_rate': 0.2, 'learning_rate': 0.00039247966591329894, 'batch_size': 32}. Best is trial 2 with value: 0.9070930246549063.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)




0,1
combined_score,▃▃▃▃▃▁▃▃▃▂▃▁▂▂▄▇█▆▇▇▇██▇▆
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
train_loss,▄▆▇███▇▆▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▇██▇█▃█▇▇▆█▃▅▆▄▁▃▁▃▂▁▄▅▂▁
val_loss,▄▆▇███▇▆▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
combined_score,0.71534
epoch,25.0
train_loss,0.7899
val_acc,0.80767
val_loss,0.83865


[I 2025-03-19 10:14:56,716] Trial 3 finished with value: 0.7689595683058176 and parameters: {'model_type': 'Bayesian', 'dropout_rate': 0.3, 'learning_rate': 0.0003971084237816957, 'batch_size': 16}. Best is trial 2 with value: 0.9070930246549063.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)


0,1
combined_score,▅▃▁▅▇▆██▆█▅▂▇▇█▇█▆▆▆▆▂▂▆▇
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
train_loss,█▅▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁
val_acc,▆▅▁▆▆▇██▅▇▄▂▇▇▇▆█▆█▆▆▄▁▆▇
val_loss,▅▇▆▄▂▃▂▃▃▁▃▆▂▂▂▂▂▃▅▃▃█▄▄▃

0,1
combined_score,0.89581
epoch,25.0
train_loss,0.08683
val_acc,0.93866
val_loss,0.22513


[I 2025-03-19 10:46:05,993] Trial 4 finished with value: 0.9003017557481983 and parameters: {'model_type': 'Dropout', 'dropout_rate': 0.3, 'learning_rate': 0.000290528097140661, 'batch_size': 32}. Best is trial 2 with value: 0.9070930246549063.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)


0,1
combined_score,▁▃█▃█▅▇▇▇▆▆▃▆▇▅▆▇█▇▂█▆▅▄▄
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
train_loss,█▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_acc,▁▄▇▄▇▅▆▇▇▆▆▆▆▇▆▆▇█▆▄▇▆▆▅▅
val_loss,▆▇▁▇▁▃▂▁▃▄▃█▂▂▅▃▂▂▃█▂▄▅▅▆

0,1
combined_score,0.88364
epoch,25.0
train_loss,0.05132
val_acc,0.93035
val_loss,0.25486


[I 2025-03-19 11:20:06,047] Trial 5 finished with value: 0.9046174757485036 and parameters: {'model_type': 'Dropout', 'dropout_rate': 0.3, 'learning_rate': 0.0001974611792950046, 'batch_size': 64}. Best is trial 2 with value: 0.9070930246549063.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)


0,1
combined_score,▁
epoch,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
combined_score,0.79695
epoch,1.0
train_loss,0.64195
val_acc,0.86645
val_loss,0.49731


[I 2025-03-19 11:22:37,429] Trial 6 pruned. 
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)


0,1
combined_score,▁
epoch,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
combined_score,0.70248
epoch,1.0
train_loss,0.95251
val_acc,0.80703
val_loss,0.96724


[I 2025-03-19 11:25:09,392] Trial 7 pruned. 
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)




0,1
combined_score,▁
epoch,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
combined_score,0.57652
epoch,1.0
train_loss,10963.46964
val_acc,0.8869
val_loss,10349.44976


[I 2025-03-19 11:27:41,905] Trial 8 pruned. 
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)




0,1
combined_score,▁
epoch,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
combined_score,0.60351
epoch,1.0
train_loss,11373.9464
val_acc,0.92843
val_loss,11163.4185


[I 2025-03-19 11:30:15,676] Trial 9 pruned. 


Best trial:
  Value (combined_score): 0.9070930246549063
  Params:
    model_type: Dropout
    dropout_rate: 0.2
    learning_rate: 0.00039247966591329894
    batch_size: 32


In [None]:
#!pip install ray[tune] wandb

Collecting ray[tune]
  Downloading ray-2.43.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (19 kB)
Collecting tensorboardX>=1.9 (from ray[tune])
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)
Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ray-2.43.0-cp311-cp311-manylinux2014_x86_64.whl (67.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.7/67.7 MB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorboardX, ray
Successfully installed ray-2.43.0 tensorboardX-2.6.2.2
