In [8]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
"""
Experiment: 2026_02_02_exp_015_custom_cnn_lr_sweep
Goal: Finding the largest stable LR, The best generalizing LR range, Whether a scheduler is needed
Dataset:
Notes:
"""

'\nExperiment: 2026_02_02_exp_015_custom_cnn_lr_sweep\nGoal: Finding the largest stable LR, The best generalizing LR range, Whether a scheduler is needed\nDataset:\nNotes:\n'

In [10]:
import os
os.chdir("/content/drive/My Drive/Colab Notebooks/Data Science Group Project")

print(os.getcwd())
print(os.listdir())

/content/drive/My Drive/Colab Notebooks/Data Science Group Project
['data', 'experiments', 'JustTests', 'OLD', 'MoveImagesFinal.ipynb', 'Splitting Dataset Into Train_Validation_Test_Sets.ipynb', 'RemovingBlackFinal.ipynb', 'Top-View Image Selection From MRI and CT Dataset.ipynb', 'old experiments']


In [11]:
# =====================================================
# Imports
# =====================================================

import platform
from pathlib import Path
import os
import cv2
import torch
import torch.multiprocessing as mp
from torch import nn
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import yaml
import json
import pandas as pd
import random
import copy

# =====================================================
# Config & Reproducibility
# =====================================================

def load_config(path):
    with open(path, "r") as f:
        return yaml.safe_load(f)


# =====================================================
# Dataset
# =====================================================

class DualImageDataset(Dataset):
    def __init__(self, path):
        # Load once
        self.raw_imgs, self.proc_imgs, self.labels = torch.load(path)

        # Ensure proper dtype
        self.raw_imgs = self.raw_imgs.float()
        self.proc_imgs = self.proc_imgs.float()
        self.labels = self.labels.long()

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.raw_imgs[idx], self.proc_imgs[idx], self.labels[idx]

def dataset(transformed_data_dir, cfg):
    train_dataset = DualImageDataset(path=transformed_data_dir / cfg["data"]["train_path"])
    val_dataset = DualImageDataset(path=transformed_data_dir / cfg["data"]["val_path"])
    test_dataset = DualImageDataset(path=transformed_data_dir / cfg["data"]["test_path"])

    return train_dataset, val_dataset, test_dataset


# =====================================================
# Dataloaders
# =====================================================

def dataloader(cfg, train_dataset, val_dataset, test_dataset):
    train_dataloader = DataLoader(train_dataset, batch_size=cfg["training"]["batch_size"], shuffle=True, num_workers=0, pin_memory=True)
    val_dataloader = DataLoader(val_dataset, batch_size=cfg["training"]["batch_size"], shuffle=False, num_workers=0, pin_memory=True)
    test_dataloader = DataLoader(test_dataset, batch_size=cfg["training"]["batch_size"], shuffle=False, num_workers=0, pin_memory=True)

    print(f"Number of training samples: {len(train_dataset)}")
    print(f"Number of validation samples: {len(val_dataset)}")
    print(f"Number of testing samples: {len(test_dataset)}")

    print(f"Length of TrainDataloader: {len(train_dataloader)} batches of {cfg['training']['batch_size']}")
    print(f"Length of ValDataloader: {len(val_dataloader)} batches of {cfg['training']['batch_size']}")
    print(f"Length of TestDataloader: {len(test_dataloader)} batches of {cfg['training']['batch_size']}")

    return train_dataloader, val_dataloader, test_dataloader


# =====================================================
# Model
# =====================================================

class CustomCNN(nn.Module):
    def __init__(self, input_shape, hidden_units, output_shape, cfg):
        super().__init__()

        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        # Compute flatten size dynamically
        with torch.no_grad():
            x = torch.zeros(1, input_shape, *tuple(cfg["data"]["image_size"]))  # batch_size=1, input_shape channels
            x = self.conv_block_1(x)
            x = self.conv_block_2(x)
            n_features = x.numel() // x.shape[0]  # total features per sample

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=n_features, out_features=output_shape)
        )

    def forward(self, raw_img, processed_img):
        x = torch.cat([raw_img, processed_img], dim=1)  # Concatenate raw + processed channels -> [B, 2, H, W]
        x = self.conv_block_1(x)
        x = self.conv_block_2(x)
        x = self.classifier(x)
        return x


def make_model(cfg, classes, device, hyperparameter_value):
    model = CustomCNN(input_shape=cfg["model"]["input_dim"], hidden_units=cfg["model"]["hidden_units"],
                      output_shape=len(classes), cfg=cfg).to(device)

    loss_func = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=hyperparameter_value)

    return model, loss_func, optimizer


# =====================================================
# Training / Evaluation Utils
# =====================================================

# =====================================================
# Train
# =====================================================

def train_step(model, train_dataloader, loss_func, optimizer, device):
    train_loss, train_acc = 0, 0
    correct = 0
    total = 0

    model.train()

    for batch, (raw_X, processed_X, y) in enumerate(train_dataloader):
        raw_X, processed_X, y = raw_X.to(device, non_blocking=True), processed_X.to(device, non_blocking=True), y.to(device)

        train_y_pred = model(raw_X, processed_X)

        loss = loss_func(train_y_pred, y)
        train_loss += loss.item()

        # accuracy
        correct += (train_y_pred.argmax(dim=1) == y).sum().item()
        total += y.size(0)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

    train_loss /= len(train_dataloader)
    train_acc = 100.0 * correct / total

    print(f"Train loss: {train_loss:.5f} | Train acc: {train_acc:.2f}%\n")
    return train_loss, train_acc


# =====================================================
# Validation
# =====================================================

def val_step(model, val_dataloader, loss_func, device):
    val_loss, val_acc = 0, 0
    correct = 0
    total = 0

    model.eval()
    with torch.inference_mode():
        for raw_X, processed_X, y in val_dataloader:
            raw_X, processed_X, y = raw_X.to(device, non_blocking=True), processed_X.to(device, non_blocking=True), y.to(device)

            val_y_pred = model(raw_X, processed_X)

            val_loss += loss_func(val_y_pred, y).item()

            # accuracy
            correct += (val_y_pred.argmax(dim=1) == y).sum().item()
            total += y.size(0)

        val_loss /= len(val_dataloader)
        val_acc = 100.0 * correct / total

    print(f"Val loss: {val_loss:.5f} | Val acc: {val_acc:.2f}%\n")
    return val_loss, val_acc


def train_and_evaluate(model, epochs, train_dataloader, val_dataloader, loss_func, optimizer,
                       device):
    train_loss_list = []
    train_acc_list = []
    val_loss_list = []
    val_acc_list = []

    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model, train_dataloader, loss_func, optimizer, device)
        val_loss, val_acc = val_step(model, val_dataloader, loss_func, device)

        train_loss_list.append(train_loss)
        train_acc_list.append(train_acc)
        val_loss_list.append(val_loss)
        val_acc_list.append(val_acc)

    return train_loss_list, train_acc_list, val_loss_list, val_acc_list


# =====================================================
# Test
# =====================================================

def test_step(model, test_data_loader, loss_func, device):
    test_loss, test_acc = 0, 0
    correct = 0
    total = 0
    y_test_list = []
    y_pred_list = []
    y_pred_prob_list = []

    model.eval()
    with torch.inference_mode():
        for raw_X, processed_X, y in test_data_loader:
            raw_X, processed_X, y = raw_X.to(device, non_blocking=True), processed_X.to(device, non_blocking=True), y.to(device)

            test_y_pred = model(raw_X, processed_X)
            y_pred_prob_list.append(torch.softmax(test_y_pred, dim=1))

            test_loss += loss_func(test_y_pred, y).item()

            y_test_list.append(y.cpu())
            y_pred_list.append(test_y_pred.argmax(dim=1).cpu())

            correct += (test_y_pred.argmax(dim=1) == y).sum().item()
            total += y.size(0)

        test_loss /= len(test_data_loader)
        test_acc = 100.0 * correct / total

    print(f"Test loss: {test_loss:.5f} | Test acc: {test_acc:.2f}%\n")
    return y_test_list, y_pred_list, y_pred_prob_list, test_acc

In [12]:
experiment_path = Path("experiments/2026_02_02_exp_015_custom_cnn_lr_sweep")

config = load_config(experiment_path / "config.yaml")

hyperparameter_values = config["training"]["lr"]

best_loss = float("inf")
best_config = None
best_checkpoint = None
search = 1
metrics_d = {"Metric": ["train_loss_list", "train_acc_list", "val_loss_list", "val_acc_list", "hyperparameter"]}
testing_rows = []

for hyperparameter_value in hyperparameter_values:

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
    # torch.use_deterministic_algorithms(True)

    random.seed(config["seed"])
    np.random.seed(config["seed"])
    torch.manual_seed(config["seed"])
    torch.cuda.manual_seed(config["seed"])
    torch.cuda.manual_seed_all(config["seed"])

    device = "cuda" if config["device"] == "cuda" and torch.cuda.is_available() else "cpu"

    train_dataset, val_dataset, test_dataset = dataset(Path("data/processed/mri"), config)

    train_dataloader, val_dataloader, test_dataloader = dataloader(config, train_dataset, val_dataset, test_dataset)

    classes = ['glioma', 'meningioma', 'pituitary']

    model, loss_func, optimizer = make_model(config, classes, device, hyperparameter_value=hyperparameter_value)

    train_loss_list, train_acc_list, val_loss_list, val_acc_list = train_and_evaluate(model,
                                                                                        config["training"]["epochs"],
                                                                                        train_dataloader, val_dataloader,
                                                                                        loss_func, optimizer, device)

    metrics_d[f"Search_{search}"] = [train_loss_list, train_acc_list, val_loss_list, val_acc_list, hyperparameter_value]

    best_epoch = np.argmin(val_loss_list)
    val_loss = val_loss_list[best_epoch]

    if val_loss < best_loss:
        best_loss = val_loss
        best_config = copy.deepcopy(config)
        best_config["training"]["lr"] = hyperparameter_value

        best_checkpoint = {
            "epoch": best_epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "config": best_config,
            "best_val_loss": val_loss,
        }

    print(best_config, best_loss)

    y_test_list, y_pred_list, y_pred_prob_list, test_accuracy = test_step(model, test_dataloader, loss_func, device)

    y_test_list = torch.cat(y_test_list).numpy()  # true labels
    y_pred_list = torch.cat(y_pred_list).numpy()  # predicted class
    y_pred_prob_list = torch.cat(y_pred_prob_list).cpu().numpy()  # predicted probabilities

    for i in range(len(y_test_list)):
        testing_rows.append({
            "search_id": search,
            "lr": hyperparameter_value,
            "y_true": y_test_list[i],
            "y_pred": y_pred_list[i],
            "prob_glioma": y_pred_prob_list[i, 0],
            "prob_meningioma": y_pred_prob_list[i, 1],
            "prob_pituitary": y_pred_prob_list[i, 2],
        })

    search += 1

torch.save(best_checkpoint, experiment_path / "checkpoint.pth")

metrics_df = pd.DataFrame(metrics_d).set_index("Metric").T
metrics_df.to_csv(experiment_path / "train_val_metrics.csv", index=False)
print("Train/Val metrics saved to train_val_metrics.csv")

test_df = pd.DataFrame(testing_rows)
test_df.to_csv(experiment_path / "test_predictions.csv", index=False)
print("Test predictions saved to test_predictions.csv")

env_info = {
    "python_version": platform.python_version(),
    "pytorch_version": torch.__version__,
    "cuda_version": torch.version.cuda,
    "cudnn_version": torch.backends.cudnn.version(),
    "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
    "os": platform.platform(),
}

env_path = experiment_path / "env_info.json"
with open(env_path, "w") as f:
    json.dump(env_info, f, indent=4)

print(f"Environment Info saved to {env_path}")

Number of training samples: 3181
Number of validation samples: 908
Number of testing samples: 456
Length of TrainDataloader: 100 batches of 32
Length of ValDataloader: 29 batches of 32
Length of TestDataloader: 15 batches of 32


  0%|          | 0/15 [00:00<?, ?it/s]

Train loss: 0.76536 | Train acc: 63.97%

Val loss: 0.57520 | Val acc: 73.02%

Train loss: 0.50390 | Train acc: 77.05%

Val loss: 0.45120 | Val acc: 79.74%

Train loss: 0.42237 | Train acc: 82.18%

Val loss: 0.40266 | Val acc: 84.80%

Train loss: 0.36179 | Train acc: 85.04%

Val loss: 0.35972 | Val acc: 86.34%

Train loss: 0.30597 | Train acc: 87.83%

Val loss: 0.33164 | Val acc: 88.33%

Train loss: 0.26171 | Train acc: 90.35%

Val loss: 0.37398 | Val acc: 84.03%

Train loss: 0.27604 | Train acc: 89.15%

Val loss: 0.34796 | Val acc: 87.33%

Train loss: 0.22856 | Train acc: 91.76%

Val loss: 0.27517 | Val acc: 91.30%

Train loss: 0.20478 | Train acc: 92.77%

Val loss: 0.26172 | Val acc: 91.85%

Train loss: 0.18391 | Train acc: 93.40%

Val loss: 0.28027 | Val acc: 91.52%

Train loss: 0.19742 | Train acc: 92.58%

Val loss: 0.29178 | Val acc: 90.42%

Train loss: 0.18328 | Train acc: 92.58%

Val loss: 0.23690 | Val acc: 94.16%

Train loss: 0.14073 | Train acc: 95.47%

Val loss: 0.23871 | Val

  0%|          | 0/15 [00:00<?, ?it/s]

Train loss: 0.65566 | Train acc: 68.19%

Val loss: 0.58209 | Val acc: 73.13%

Train loss: 0.40581 | Train acc: 81.86%

Val loss: 0.36411 | Val acc: 82.38%

Train loss: 0.32904 | Train acc: 86.36%

Val loss: 0.29275 | Val acc: 88.99%

Train loss: 0.25627 | Train acc: 89.85%

Val loss: 0.27648 | Val acc: 89.98%

Train loss: 0.22743 | Train acc: 91.98%

Val loss: 0.26697 | Val acc: 92.40%

Train loss: 0.17552 | Train acc: 93.81%

Val loss: 0.27921 | Val acc: 90.97%

Train loss: 0.19889 | Train acc: 92.61%

Val loss: 0.30722 | Val acc: 89.54%

Train loss: 0.13147 | Train acc: 95.91%

Val loss: 0.23702 | Val acc: 93.94%

Train loss: 0.11123 | Train acc: 96.13%

Val loss: 0.19424 | Val acc: 95.93%

Train loss: 0.08410 | Train acc: 97.30%

Val loss: 0.41625 | Val acc: 87.33%

Train loss: 0.10363 | Train acc: 96.35%

Val loss: 0.24229 | Val acc: 94.49%

Train loss: 0.06986 | Train acc: 97.27%

Val loss: 0.26467 | Val acc: 94.38%

Train loss: 0.04576 | Train acc: 98.62%

Val loss: 0.27807 | Val

  0%|          | 0/15 [00:00<?, ?it/s]

Train loss: 0.61441 | Train acc: 71.27%

Val loss: 0.55360 | Val acc: 73.46%

Train loss: 0.36474 | Train acc: 84.75%

Val loss: 0.33222 | Val acc: 85.68%

Train loss: 0.27634 | Train acc: 89.56%

Val loss: 0.23217 | Val acc: 91.08%

Train loss: 0.19409 | Train acc: 92.58%

Val loss: 0.22703 | Val acc: 93.28%

Train loss: 0.15787 | Train acc: 93.93%

Val loss: 0.22350 | Val acc: 94.05%

Train loss: 0.12621 | Train acc: 95.79%

Val loss: 0.19170 | Val acc: 94.38%

Train loss: 0.10787 | Train acc: 95.82%

Val loss: 0.32515 | Val acc: 90.97%

Train loss: 0.06185 | Train acc: 98.18%

Val loss: 0.19540 | Val acc: 96.48%

Train loss: 0.04363 | Train acc: 98.68%

Val loss: 0.19971 | Val acc: 96.15%

Train loss: 0.03800 | Train acc: 98.87%

Val loss: 0.23743 | Val acc: 96.59%

Train loss: 0.01688 | Train acc: 99.56%

Val loss: 0.24470 | Val acc: 96.70%

Train loss: 0.02112 | Train acc: 99.31%

Val loss: 0.25314 | Val acc: 96.15%

Train loss: 0.04613 | Train acc: 98.43%

Val loss: 0.23313 | Val

  0%|          | 0/15 [00:00<?, ?it/s]

Train loss: 0.60195 | Train acc: 71.80%

Val loss: 0.44834 | Val acc: 80.84%

Train loss: 0.34051 | Train acc: 86.01%

Val loss: 0.31571 | Val acc: 87.11%

Train loss: 0.24541 | Train acc: 90.19%

Val loss: 0.21015 | Val acc: 93.61%

Train loss: 0.16073 | Train acc: 94.00%

Val loss: 0.21318 | Val acc: 94.60%

Train loss: 0.11689 | Train acc: 95.60%

Val loss: 0.26390 | Val acc: 93.50%

Train loss: 0.08776 | Train acc: 96.86%

Val loss: 0.20252 | Val acc: 95.93%

Train loss: 0.06658 | Train acc: 97.61%

Val loss: 0.24492 | Val acc: 93.28%

Train loss: 0.02997 | Train acc: 98.93%

Val loss: 0.21409 | Val acc: 96.92%

Train loss: 0.01071 | Train acc: 99.78%

Val loss: 0.25126 | Val acc: 97.25%

Train loss: 0.01404 | Train acc: 99.56%

Val loss: 0.23405 | Val acc: 96.92%

Train loss: 0.00682 | Train acc: 99.87%

Val loss: 0.26830 | Val acc: 97.14%

Train loss: 0.00584 | Train acc: 99.87%

Val loss: 0.28159 | Val acc: 97.25%

Train loss: 0.00218 | Train acc: 100.00%

Val loss: 0.29517 | Va

  0%|          | 0/15 [00:00<?, ?it/s]

Train loss: 0.60443 | Train acc: 71.02%

Val loss: 0.50996 | Val acc: 75.88%

Train loss: 0.32646 | Train acc: 86.48%

Val loss: 0.28470 | Val acc: 89.65%

Train loss: 0.23467 | Train acc: 90.57%

Val loss: 0.21351 | Val acc: 94.05%

Train loss: 0.15488 | Train acc: 94.44%

Val loss: 0.23010 | Val acc: 94.71%

Train loss: 0.11321 | Train acc: 95.76%

Val loss: 0.23936 | Val acc: 94.93%

Train loss: 0.08112 | Train acc: 97.04%

Val loss: 0.24415 | Val acc: 95.48%

Train loss: 0.06819 | Train acc: 97.55%

Val loss: 0.27934 | Val acc: 94.82%

Train loss: 0.02928 | Train acc: 99.21%

Val loss: 0.34278 | Val acc: 94.93%

Train loss: 0.01733 | Train acc: 99.47%

Val loss: 0.27897 | Val acc: 96.37%

Train loss: 0.01057 | Train acc: 99.75%

Val loss: 0.35937 | Val acc: 96.26%

Train loss: 0.03938 | Train acc: 98.52%

Val loss: 0.27677 | Val acc: 95.70%

Train loss: 0.02038 | Train acc: 99.37%

Val loss: 0.32214 | Val acc: 96.92%

Train loss: 0.00254 | Train acc: 99.97%

Val loss: 0.29755 | Val

  0%|          | 0/15 [00:00<?, ?it/s]

Train loss: 0.61486 | Train acc: 71.68%

Val loss: 0.44307 | Val acc: 83.92%

Train loss: 0.34538 | Train acc: 85.76%

Val loss: 0.27704 | Val acc: 91.41%

Train loss: 0.23212 | Train acc: 90.95%

Val loss: 0.21417 | Val acc: 93.72%

Train loss: 0.14410 | Train acc: 94.78%

Val loss: 0.20483 | Val acc: 94.71%

Train loss: 0.09271 | Train acc: 96.70%

Val loss: 0.24927 | Val acc: 95.81%

Train loss: 0.07022 | Train acc: 97.30%

Val loss: 0.21289 | Val acc: 95.81%

Train loss: 0.04915 | Train acc: 98.11%

Val loss: 0.32456 | Val acc: 94.93%

Train loss: 0.02842 | Train acc: 98.90%

Val loss: 0.37779 | Val acc: 94.60%

Train loss: 0.02975 | Train acc: 98.74%

Val loss: 0.25370 | Val acc: 96.70%

Train loss: 0.05274 | Train acc: 98.21%

Val loss: 0.27920 | Val acc: 95.26%

Train loss: 0.00746 | Train acc: 99.81%

Val loss: 0.31917 | Val acc: 96.26%

Train loss: 0.00801 | Train acc: 99.72%

Val loss: 0.28846 | Val acc: 97.14%

Train loss: 0.00156 | Train acc: 100.00%

Val loss: 0.36257 | Va