# **SETUP**

In [1]:
from __future__ import annotations

import itertools
import os
import pickle
import sys
from contextlib import redirect_stderr, redirect_stdout
from pathlib import Path

import torch
from tqdm.auto import tqdm

if "__file__" in globals():
    ROOT_DIR = Path(__file__).resolve().parent.parent
else:
    ROOT_DIR = Path(os.getcwd()).resolve().parent  

SRC_DIR = ROOT_DIR / "src"
sys.path.insert(0, str(SRC_DIR))  

from experiments import run_kfold_experiments
from utils.io_utils import DevNull

print("CUDA available:", torch.cuda.is_available())
print("Number of GPUs:", torch.cuda.device_count())
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")


  from .autonotebook import tqdm as notebook_tqdm


CUDA available: True
Number of GPUs: 2
GPU 0: NVIDIA L40
GPU 1: NVIDIA L40


In [2]:
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
ROOT = Path("/home/hoai-linh.dao/Works/BraTS/results/self-experiments/test")
K_FOLDS = 8
CHECKPOINT_FILE = ROOT / "checkpoint.pkl"
SUMMARY_FILE = ROOT / "summary.txt"
DTS_NAME = "Figshare_x10"

# # FOR CNNs
MODEL_NAMES = ["VGG16", "B16_1K"]
CASE_NUMS = ["4"]
OPTIMIZER_NAMES = ["adam"]

# # FOR ViTs
# MODEL_PATCH_SIZES = ["B16"]
# IMG_NETS = ["21K"]
# MODEL_NAMES = [f"{patch}_{net}" for patch in MODEL_PATCH_SIZES for net in IMG_NETS]
# CASE_NUMS = ["4"]
# OPTIMIZER_NAMES = ["adam"]

# **BENCH TEST**

In [3]:
# ---------------------------------------------------------------------------
# Recover previous progress (if any)
# ---------------------------------------------------------------------------
if CHECKPOINT_FILE.exists():
    all_results = pickle.loads(CHECKPOINT_FILE.read_bytes())
else:
    all_results: dict[str, dict] = {}

# ---------------------------------------------------------------------------
# Build list of runs still pending
# ---------------------------------------------------------------------------
combinations = list(itertools.product(MODEL_NAMES, CASE_NUMS, OPTIMIZER_NAMES))
remaining = [
    (model, case, opt)
    for model, case, opt in combinations
    if f"{model}_case{case}_{opt}" not in all_results
]

# ---------------------------------------------------------------------------
# Run the loop
# ---------------------------------------------------------------------------
ROOT.mkdir(parents=True, exist_ok=True)

pbar = tqdm(remaining, total=len(remaining), desc="Running k-fold experiments")
for model_name, case_num, optimizer in pbar:
    cfg_key = f"{model_name}_case{case_num}_{optimizer}"
    pbar.set_description(cfg_key)

    # compute where logs will go for this run
    log_folder = ROOT / "logs" / optimizer / f"{K_FOLDS}fold_{model_name}_case{case_num}"
    pbar.set_postfix(folder=str(log_folder))

    try:
        result = run_stragified_kfold_experiments(
            model_name      = model_name,
            case_num        = case_num,
            optimizer_name  = optimizer,
            dataset_name    = DTS_NAME,
            experiment_root = ROOT,
            k_folds         = K_FOLDS,
            seed            = 42,
            device          = DEVICE
        )

        all_results[cfg_key] = result
        CHECKPOINT_FILE.write_bytes(pickle.dumps(all_results))

    except Exception as exc:
        tqdm.write(f"[FAIL] {cfg_key}: {exc}")

# ---------------------------------------------------------------------------
# Write summary
# ---------------------------------------------------------------------------
with SUMMARY_FILE.open("w", encoding="utf-8") as fh:
    for cfg, res in all_results.items():
        if isinstance(res, dict):
            avg = res.get("avg_val")
            if avg is not None:
                fh.write(f"{cfg}: avg_val = {avg:.4f}\n")
            else:
                fh.write(f"{cfg}: no avg_val found\n")
        else:
            fh.write(f"{cfg}: FAILED or no result\n")

print("All done – see", SUMMARY_FILE)


Train + Validate: 100%|██████████| 1/1 [02:07<00:00, 127.53s/it]home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/logs/adam/8fold_VGG16_case4]

Epoch 1/1 | Train Loss: 0.1662 | Train Acc: 0.9380 | Val Loss: 0.0769 | Val f1_macro: 0.9707






Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_VGG16_case4/fold1.pth


Train + Validate: 100%|██████████| 1/1 [02:07<00:00, 127.90s/it]

Epoch 1/1 | Train Loss: 0.1577 | Train Acc: 0.9428 | Val Loss: 0.0720 | Val f1_macro: 0.9710






Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_VGG16_case4/fold2.pth


Train + Validate: 100%|██████████| 1/1 [02:07<00:00, 127.98s/it]

Epoch 1/1 | Train Loss: 0.1527 | Train Acc: 0.9439 | Val Loss: 0.1032 | Val f1_macro: 0.9542






Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_VGG16_case4/fold3.pth


Train + Validate: 100%|██████████| 1/1 [02:07<00:00, 127.59s/it]

Epoch 1/1 | Train Loss: 0.1641 | Train Acc: 0.9389 | Val Loss: 0.0767 | Val f1_macro: 0.9696






Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_VGG16_case4/fold4.pth


Train + Validate: 100%|██████████| 1/1 [02:07<00:00, 127.84s/it]

Epoch 1/1 | Train Loss: 0.1577 | Train Acc: 0.9423 | Val Loss: 0.0639 | Val f1_macro: 0.9790






Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_VGG16_case4/fold5.pth


Train + Validate: 100%|██████████| 1/1 [02:08<00:00, 128.17s/it]

Epoch 1/1 | Train Loss: 0.1684 | Train Acc: 0.9382 | Val Loss: 0.0622 | Val f1_macro: 0.9760






Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_VGG16_case4/fold6.pth


Train + Validate: 100%|██████████| 1/1 [02:07<00:00, 127.79s/it]

Epoch 1/1 | Train Loss: 0.1686 | Train Acc: 0.9375 | Val Loss: 0.0688 | Val f1_macro: 0.9718






Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_VGG16_case4/fold7.pth


Train + Validate: 100%|██████████| 1/1 [02:07<00:00, 127.78s/it]

Epoch 1/1 | Train Loss: 0.1618 | Train Acc: 0.9408 | Val Loss: 0.0943 | Val f1_macro: 0.9583



B16_1K_case4_adam:  50%|█████     | 1/2 [17:16<17:16, 1036.04s/it, folder=/home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/logs/adam/8fold_B16_1K_case4]


Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_VGG16_case4/fold8.pth


Train + Validate: 100%|██████████| 1/1 [03:13<00:00, 193.99s/it]

Epoch 1/1 | Train Loss: 0.1366 | Train Acc: 0.9485 | Val Loss: 0.1278 | Val f1_macro: 0.9448






Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_B16_1K_case4/fold1.pth


Train + Validate: 100%|██████████| 1/1 [03:14<00:00, 194.93s/it]

Epoch 1/1 | Train Loss: 0.1344 | Train Acc: 0.9495 | Val Loss: 0.0782 | Val f1_macro: 0.9667






Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_B16_1K_case4/fold2.pth


Train + Validate: 100%|██████████| 1/1 [03:13<00:00, 193.16s/it]

Epoch 1/1 | Train Loss: 0.1314 | Train Acc: 0.9520 | Val Loss: 0.0571 | Val f1_macro: 0.9727






Model weights saved to: /home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/weights/adam/8fold_B16_1K_case4/fold3.pth


Train + Validate:   0%|          | 0/1 [02:11<?, ?it/s]
B16_1K_case4_adam:  50%|█████     | 1/2 [29:18<29:18, 1758.87s/it, folder=/home/hoai-linh.dao/Works/BraTS/results/self-experiments/test/logs/adam/8fold_B16_1K_case4]


KeyboardInterrupt: 