How to train using all the best parameters for each split (but using different models) 4

In [None]:
import os
import pandas as pd

# Set the CUDA device if desired
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Path to the CSV file with best parameters per split
csv_path = "/home/mezher/Documents/Deauville_DeepLearning/best_params_summary.csv"
best_params = pd.read_csv(csv_path)

# Loop over each row (each split) and build the command using the tuned parameters
for _, row in best_params.iterrows():
    split = int(row['split'])
    # Build checkpoint path as before
    checkpoint_path = f"/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split{split}_run0.pth"
    
    # Get hyperparameters from the CSV row.
    lr = row['params_lr']              # e.g., 0.0000143417
    batch_size = row['params_batch_size']  # e.g., 128
    wd = row['params_wd']              # e.g., 7.60351e-06
    optimizer = row['params_optimizer']    # e.g., adam
    
    # Use the ft_mode field to decide which flag to pass:
    ft_mode = row['params_ft_mode'].strip().lower()  # should be 'finetune', 'full_retrain' or 'transfer_learning'
    ft_flag = ""
    if ft_mode == "finetune":
        ft_flag = "--finetune"
    elif ft_mode == "transfer_learning":
        ft_flag = "--transfer_learning"
    # for full_retrain, no additional flag is needed
    
    # If lr_scheduler is True, add that flag.
    lr_scheduler_flag = ""
    if str(row['params_lr_scheduler']).strip().lower() in ['true', '1']:
        lr_scheduler_flag = "--lr_scheduler"
    
    # If balance flag is True, add that flag.
    balance_flag = ""
    if str(row['params_balance']).strip().lower() in ['true', '1']:
        balance_flag = "--balance"
    
    # Here we fix the augmentation flag as 4 (or you can also tune it if needed)
    augm_flag = "--augm 4"
    
    # Construct the command string
    command = (
        f"python train.py --split_index {split} --run 4 --nepochs 20 "
        f"--checkpoint {checkpoint_path} "
        f"--lr {lr} --batch_size {batch_size} --wd {wd} "
        f"--optimizer {optimizer} --cls_arch simple {balance_flag} {lr_scheduler_flag} "
        f"--early_stopping {ft_flag} {augm_flag}"
    )
    print("Running command:", command)
    os.system(command)


Similar but just the same checkpoint, probably best approach. 5 

In [None]:
import os
import pandas as pd

# Set the CUDA device if desired
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Path to the CSV file with best parameters per split
csv_path = "/home/mezher/Documents/Deauville_DeepLearning/best_params_summary.csv"
best_params = pd.read_csv(csv_path)

# Loop over each row (each split) and build the command using the tuned parameters
for _, row in best_params.iterrows():
    split = int(row['split'])
    # Build checkpoint path as before
    checkpoint_path = f"/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split0_run0.pth"
    
    # Get hyperparameters from the CSV row.
    lr = row['params_lr']              # e.g., 0.0000143417
    batch_size = row['params_batch_size']  # e.g., 128
    wd = row['params_wd']              # e.g., 7.60351e-06
    optimizer = row['params_optimizer']    # e.g., adam
    
    # Use the ft_mode field to decide which flag to pass:
    ft_mode = row['params_ft_mode'].strip().lower()  # should be 'finetune', 'full_retrain' or 'transfer_learning'
    ft_flag = ""
    if ft_mode == "finetune":
        ft_flag = "--finetune"
    elif ft_mode == "transfer_learning":
        ft_flag = "--transfer_learning"
    # for full_retrain, no additional flag is needed
    
    # If lr_scheduler is True, add that flag.
    lr_scheduler_flag = ""
    if str(row['params_lr_scheduler']).strip().lower() in ['true', '1']:
        lr_scheduler_flag = "--lr_scheduler"
    
    # If balance flag is True, add that flag.
    balance_flag = ""
    if str(row['params_balance']).strip().lower() in ['true', '1']:
        balance_flag = "--balance"
    
    # Here we fix the augmentation flag as 4 (or you can also tune it if needed)
    augm_flag = "--augm 4"
    
    # Construct the command string
    command = (
        f"python train.py --split_index {split} --run 5 --nepochs 20 "
        f"--checkpoint {checkpoint_path} "
        f"--lr {lr} --batch_size {batch_size} --wd {wd} "
        f"--optimizer {optimizer} --cls_arch simple {balance_flag} {lr_scheduler_flag} "
        f"--early_stopping {ft_flag} {augm_flag}"
    )
    print("Running command:", command)
    os.system(command)


Different checkpoint modle but same parameter run 12

In [None]:
import os

# FINE TUNED MODEL


os.environ["CUDA_VISIBLE_DEVICES"] = "1"


for split in range(0, 10): 
    checkpoint_path = f"/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split{split}_run0.pth"
    command = (
        f"python train.py --split_index {split} --run 12 --nepochs 20 "
        f"--checkpoint {checkpoint_path} "
        f"--lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 "
        f"--optimizer adam --cls_arch simple --early_stopping --finetune --augm 4"
    )
    print("Running command:", command)
    os.system(command)


Running command: python train.py --split_index 0 --run 12 --nepochs 20 --checkpoint /home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split0_run0.pth --lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 --optimizer adam --cls_arch simple --early_stopping --finetune --augm 4
Namespace(output='training_results', normalize=True, checkpoint='/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split0_run0.pth', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=1.43417e-05, lr_anneal=15, momentum=0.9, wd=7.60351e-06, split_index=0, run=12, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=True, transfer_learning=False)
Loaded [218/218] keys from checkpoint
Fine-tuning mode enabled: classifier head and last block of feature extractor are trainable.
Datasets train:1948, val:530
Weight of each class, no tumor: 0.2032828282828283, tumor: 0.7967171717

Traceback (most recent call last):
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 389, in <module>
    main()
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 121, in main
    ch = torch.load(args.checkpoint, weights_only=False)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 1425, in load
    with _open_file_like(f, "rb") as opened_file:
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 751, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 732, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: '/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split10_run0.pth'


Running command: python train.py --split_index 11 --run 12 --nepochs 20 --checkpoint /home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split11_run0.pth --lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 --optimizer adam --cls_arch simple --early_stopping --finetune --augm 4
Namespace(output='training_results', normalize=True, checkpoint='/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split11_run0.pth', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=1.43417e-05, lr_anneal=15, momentum=0.9, wd=7.60351e-06, split_index=11, run=12, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=True, transfer_learning=False)


Traceback (most recent call last):
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 389, in <module>
    main()
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 121, in main
    ch = torch.load(args.checkpoint, weights_only=False)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 1425, in load
    with _open_file_like(f, "rb") as opened_file:
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 751, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 732, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: '/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split11_run0.pth'


Running command: python train.py --split_index 12 --run 12 --nepochs 20 --checkpoint /home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split12_run0.pth --lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 --optimizer adam --cls_arch simple --early_stopping --finetune --augm 4
Namespace(output='training_results', normalize=True, checkpoint='/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split12_run0.pth', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=1.43417e-05, lr_anneal=15, momentum=0.9, wd=7.60351e-06, split_index=12, run=12, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=True, transfer_learning=False)


Traceback (most recent call last):
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 389, in <module>
    main()
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 121, in main
    ch = torch.load(args.checkpoint, weights_only=False)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 1425, in load
    with _open_file_like(f, "rb") as opened_file:
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 751, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 732, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: '/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split12_run0.pth'


Running command: python train.py --split_index 13 --run 12 --nepochs 20 --checkpoint /home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split13_run0.pth --lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 --optimizer adam --cls_arch simple --early_stopping --finetune --augm 4
Namespace(output='training_results', normalize=True, checkpoint='/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split13_run0.pth', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=1.43417e-05, lr_anneal=15, momentum=0.9, wd=7.60351e-06, split_index=13, run=12, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=True, transfer_learning=False)


Traceback (most recent call last):
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 389, in <module>
    main()
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 121, in main
    ch = torch.load(args.checkpoint, weights_only=False)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 1425, in load
    with _open_file_like(f, "rb") as opened_file:
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 751, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 732, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: '/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split13_run0.pth'


Running command: python train.py --split_index 14 --run 12 --nepochs 20 --checkpoint /home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split14_run0.pth --lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 --optimizer adam --cls_arch simple --early_stopping --finetune --augm 4
Namespace(output='training_results', normalize=True, checkpoint='/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split14_run0.pth', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=1.43417e-05, lr_anneal=15, momentum=0.9, wd=7.60351e-06, split_index=14, run=12, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=True, transfer_learning=False)


Traceback (most recent call last):
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 389, in <module>
    main()
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 121, in main
    ch = torch.load(args.checkpoint, weights_only=False)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 1425, in load
    with _open_file_like(f, "rb") as opened_file:
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 751, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 732, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: '/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split14_run0.pth'


Running command: python train.py --split_index 15 --run 12 --nepochs 20 --checkpoint /home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split15_run0.pth --lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 --optimizer adam --cls_arch simple --early_stopping --finetune --augm 4
Namespace(output='training_results', normalize=True, checkpoint='/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split15_run0.pth', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=1.43417e-05, lr_anneal=15, momentum=0.9, wd=7.60351e-06, split_index=15, run=12, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=True, transfer_learning=False)


Traceback (most recent call last):
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 389, in <module>
    main()
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 121, in main
    ch = torch.load(args.checkpoint, weights_only=False)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 1425, in load
    with _open_file_like(f, "rb") as opened_file:
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 751, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 732, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: '/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split15_run0.pth'


Running command: python train.py --split_index 16 --run 12 --nepochs 20 --checkpoint /home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split16_run0.pth --lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 --optimizer adam --cls_arch simple --early_stopping --finetune --augm 4
Namespace(output='training_results', normalize=True, checkpoint='/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split16_run0.pth', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=1.43417e-05, lr_anneal=15, momentum=0.9, wd=7.60351e-06, split_index=16, run=12, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=True, transfer_learning=False)


Traceback (most recent call last):
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 389, in <module>
    main()
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 121, in main
    ch = torch.load(args.checkpoint, weights_only=False)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 1425, in load
    with _open_file_like(f, "rb") as opened_file:
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 751, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 732, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: '/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split16_run0.pth'


Running command: python train.py --split_index 17 --run 12 --nepochs 20 --checkpoint /home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split17_run0.pth --lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 --optimizer adam --cls_arch simple --early_stopping --finetune --augm 4
Namespace(output='training_results', normalize=True, checkpoint='/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split17_run0.pth', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=1.43417e-05, lr_anneal=15, momentum=0.9, wd=7.60351e-06, split_index=17, run=12, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=True, transfer_learning=False)


Traceback (most recent call last):
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 389, in <module>
    main()
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 121, in main
    ch = torch.load(args.checkpoint, weights_only=False)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 1425, in load
    with _open_file_like(f, "rb") as opened_file:
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 751, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 732, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: '/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split17_run0.pth'


Running command: python train.py --split_index 18 --run 12 --nepochs 20 --checkpoint /home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split18_run0.pth --lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 --optimizer adam --cls_arch simple --early_stopping --finetune --augm 4
Namespace(output='training_results', normalize=True, checkpoint='/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split18_run0.pth', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=1.43417e-05, lr_anneal=15, momentum=0.9, wd=7.60351e-06, split_index=18, run=12, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=True, transfer_learning=False)


Traceback (most recent call last):
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 389, in <module>
    main()
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 121, in main
    ch = torch.load(args.checkpoint, weights_only=False)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 1425, in load
    with _open_file_like(f, "rb") as opened_file:
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 751, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/home/mezher/Documents/Deauville_DeepLearning/thesis/lib/python3.10/site-packages/torch/serialization.py", line 732, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: '/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split18_run0.pth'


Run 20 - One checkpoint model loded and we use parameter of the best split AUC - basically what Haggstrom does.

In [2]:
import os

#Best parameter from optuna fine tuning

os.environ["CUDA_VISIBLE_DEVICES"] = "1"


for split in range(0,20): 
    checkpoint_path = f"/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split0_run0.pth"
    command = (
        f"python train.py --split_index {split} --run 20 --nepochs 20 "
        f"--checkpoint {checkpoint_path} "
        f"--lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 "
        f"--optimizer adam --cls_arch simple --early_stopping --finetune --augm 4"
    )
    print("Running command:", command)
    os.system(command)


Running command: python train.py --split_index 0 --run 20 --nepochs 20 --checkpoint /home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split0_run0.pth --lr 0.0000143417 --batch_size 128 --wd 7.60351e-06 --optimizer adam --cls_arch simple --early_stopping --finetune --augm 4
Namespace(output='training_results', normalize=True, checkpoint='/home/mezher/Documents/Deauville_DeepLearning/checkpoints/checkpoint_split0_run0.pth', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=1.43417e-05, lr_anneal=15, momentum=0.9, wd=7.60351e-06, split_index=0, run=20, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=True, transfer_learning=False)
Loaded [218/218] keys from checkpoint
Fine-tuning mode enabled: classifier head and last block of feature extractor are trainable.
Datasets train:1948, val:530
Weight of each class, no tumor: 0.2032828282828283, tumor: 0.7967171717

From scratch - run 6

In [1]:

import os

# FROM SCRATCH MODELS


# Optimal configuration based on your optimization results:
# Learning Rate: 0.00104
# Batch Size: 64
# Optimizer: Adam
# Weight Decay: 0.000138
# Learning Rate Scheduler: Enabled
# Balance Loss: Enabled
# Classifier Architecture: simple

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

for split in range(20):
    command = (
        f"python train.py --split_index {split} --run 6 --nepochs 20 "
        f"--lr 0.00104 --batch_size 64 --wd 0.000138 --oversample --lr_scheduler "
        f"--optimizer adam --cls_arch simple --early_stopping --augm 4"
    )
    print("Running command:", command)
    os.system(command)


Running command: python train.py --split_index 0 --run 6 --nepochs 20 --lr 0.00104 --batch_size 64 --wd 0.000138 --oversample --lr_scheduler --optimizer adam --cls_arch simple --early_stopping --augm 4
Namespace(output='training_results', normalize=True, checkpoint='', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='adam', lr=0.00104, lr_anneal=15, momentum=0.9, wd=0.000138, split_index=0, run=6, batch_size=64, nepochs=20, workers=4, augm=4, balance=False, oversample=True, lr_scheduler=True, early_stopping=True, finetune=False, transfer_learning=False)
Datasets train:1948, val:530
Weight of each class, no tumor: 0.2032828282828283, tumor: 0.7967171717171717
INFO: Initializing learning rate scheduler
INFO: Initializing early stopping




Class counts: [1536  412]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.4990	Val AUC: 0.5029
Epoch: [1/20]	Loss: 0.660246	Train AUC: 0.6843	Val AUC: 0.6470
Epoch: [2/20]	Loss: 0.535453	Train AUC: 0.8308	Val AUC: 0.7738
Epoch: [3/20]	Loss: 0.469450	Train AUC: 0.8558	Val AUC: 0.7820
Epoch: [4/20]	Loss: 0.420316	Train AUC: 0.8299	Val AUC: 0.6994
INFO: Early stopping counter 1 of 5
Epoch: [5/20]	Loss: 0.437765	Train AUC: 0.9301	Val AUC: 0.7377
INFO: Early stopping counter 2 of 5
Epoch: [6/20]	Loss: 0.298763	Train AUC: 0.9591	Val AUC: 0.7738
INFO: Early stopping counter 3 of 5
Epoch: [7/20]	Loss: 0.282108	Train AUC: 0.9768	Val AUC: 0.7515
INFO: Early stopping counter 4 of 5
Epoch: [8/20]	Loss: 0.194704	Train AUC: 0.9831	Val AUC: 0.7868
Epoch: [9/20]	Loss: 0.155622	Train AUC: 0.9831	Val AUC: 0.7928
Epoch: [10/20]	Loss: 0.132341	Train AUC: 0.9909	Val AUC: 0.7852
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python 



Class counts: [1578  436]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.4408	Val AUC: 0.4152
Epoch: [1/20]	Loss: 0.613683	Train AUC: 0.7525	Val AUC: 0.7362
Epoch: [2/20]	Loss: 0.505517	Train AUC: 0.8588	Val AUC: 0.8238
Epoch: [3/20]	Loss: 0.442865	Train AUC: 0.8261	Val AUC: 0.7129
INFO: Early stopping counter 1 of 5
Epoch: [4/20]	Loss: 0.400313	Train AUC: 0.8537	Val AUC: 0.7453
INFO: Early stopping counter 2 of 5
Epoch: [5/20]	Loss: 0.306078	Train AUC: 0.9667	Val AUC: 0.8377
Epoch: [6/20]	Loss: 0.237853	Train AUC: 0.9731	Val AUC: 0.8409
Epoch: [7/20]	Loss: 0.190656	Train AUC: 0.9856	Val AUC: 0.8219
INFO: Early stopping counter 3 of 5
Epoch: [8/20]	Loss: 0.166337	Train AUC: 0.9915	Val AUC: 0.8012
INFO: Early stopping counter 4 of 5
Epoch: [9/20]	Loss: 0.112347	Train AUC: 0.9943	Val AUC: 0.8174
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 2 --run 6 --nepochs 20 --lr 0.00104 --bat



Class counts: [1572  392]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5691	Val AUC: 0.5213
Epoch: [1/20]	Loss: 0.592767	Train AUC: 0.6410	Val AUC: 0.5963
Epoch: [2/20]	Loss: 0.466768	Train AUC: 0.8758	Val AUC: 0.7707
Epoch: [3/20]	Loss: 0.422492	Train AUC: 0.9155	Val AUC: 0.7388
INFO: Early stopping counter 1 of 5
Epoch: [4/20]	Loss: 0.377067	Train AUC: 0.9109	Val AUC: 0.7340
INFO: Early stopping counter 2 of 5
Epoch: [5/20]	Loss: 0.310008	Train AUC: 0.9636	Val AUC: 0.7475
INFO: Early stopping counter 3 of 5
Epoch: [6/20]	Loss: 0.241622	Train AUC: 0.9774	Val AUC: 0.7270
INFO: Early stopping counter 4 of 5
Epoch: [7/20]	Loss: 0.178372	Train AUC: 0.9851	Val AUC: 0.7634
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 3 --run 6 --nepochs 20 --lr 0.00104 --batch_size 64 --wd 0.000138 --oversample --lr_scheduler --optimizer adam --cls_arch simple --early_stopping --augm 4
Namespace(ou



Class counts: [1534  448]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.4761	Val AUC: 0.4569
Epoch: [1/20]	Loss: 0.636020	Train AUC: 0.7806	Val AUC: 0.7405
Epoch: [2/20]	Loss: 0.541722	Train AUC: 0.8168	Val AUC: 0.8106
Epoch: [3/20]	Loss: 0.462494	Train AUC: 0.8644	Val AUC: 0.8556
Epoch: [4/20]	Loss: 0.410891	Train AUC: 0.8732	Val AUC: 0.7573
INFO: Early stopping counter 1 of 5
Epoch: [5/20]	Loss: 0.390792	Train AUC: 0.9154	Val AUC: 0.8270
INFO: Early stopping counter 2 of 5
Epoch: [6/20]	Loss: 0.307658	Train AUC: 0.9662	Val AUC: 0.8453
INFO: Early stopping counter 3 of 5
Epoch: [7/20]	Loss: 0.224327	Train AUC: 0.9766	Val AUC: 0.8457
INFO: Early stopping counter 4 of 5
Epoch: [8/20]	Loss: 0.200631	Train AUC: 0.9869	Val AUC: 0.8408
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 4 --run 6 --nepochs 20 --lr 0.00104 --batch_size 64 --wd 0.000138 --oversample --lr_scheduler --optimize



Class counts: [1528  420]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5037	Val AUC: 0.5156
Epoch: [1/20]	Loss: 0.636075	Train AUC: 0.5948	Val AUC: 0.6082
Epoch: [2/20]	Loss: 0.532932	Train AUC: 0.7942	Val AUC: 0.7060
Epoch: [3/20]	Loss: 0.479219	Train AUC: 0.8358	Val AUC: 0.7504
Epoch: [4/20]	Loss: 0.456633	Train AUC: 0.8332	Val AUC: 0.7281
INFO: Early stopping counter 1 of 5
Epoch: [5/20]	Loss: 0.396068	Train AUC: 0.8993	Val AUC: 0.7378
INFO: Early stopping counter 2 of 5
Epoch: [6/20]	Loss: 0.330483	Train AUC: 0.9503	Val AUC: 0.7737
Epoch: [7/20]	Loss: 0.250793	Train AUC: 0.9740	Val AUC: 0.7791
Epoch: [8/20]	Loss: 0.198753	Train AUC: 0.9834	Val AUC: 0.7847
Epoch: [9/20]	Loss: 0.173603	Train AUC: 0.9890	Val AUC: 0.8005
Epoch: [10/20]	Loss: 0.135876	Train AUC: 0.9920	Val AUC: 0.7809
INFO: Early stopping counter 3 of 5
Epoch: [11/20]	Loss: 0.081537	Train AUC: 0.9936	Val AUC: 0.7871
INFO: Early stopping counter 4 of 5
Epoch: [12/20]	Lo



Class counts: [1552  424]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5298	Val AUC: 0.4762
Epoch: [1/20]	Loss: 0.625531	Train AUC: 0.8092	Val AUC: 0.7053
Epoch: [2/20]	Loss: 0.522408	Train AUC: 0.8323	Val AUC: 0.7208
Epoch: [3/20]	Loss: 0.450233	Train AUC: 0.8554	Val AUC: 0.7140
INFO: Early stopping counter 1 of 5
Epoch: [4/20]	Loss: 0.420606	Train AUC: 0.9080	Val AUC: 0.7505
Epoch: [5/20]	Loss: 0.367909	Train AUC: 0.8918	Val AUC: 0.7337
INFO: Early stopping counter 2 of 5
Epoch: [6/20]	Loss: 0.323844	Train AUC: 0.8961	Val AUC: 0.7593
Epoch: [7/20]	Loss: 0.321332	Train AUC: 0.9180	Val AUC: 0.6941
INFO: Early stopping counter 3 of 5
Epoch: [8/20]	Loss: 0.278447	Train AUC: 0.9649	Val AUC: 0.7394
INFO: Early stopping counter 4 of 5
Epoch: [9/20]	Loss: 0.176538	Train AUC: 0.9910	Val AUC: 0.7793
Epoch: [10/20]	Loss: 0.114085	Train AUC: 0.9953	Val AUC: 0.7729
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python 



Class counts: [1548  446]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.4417	Val AUC: 0.5218
Epoch: [1/20]	Loss: 0.633162	Train AUC: 0.7320	Val AUC: 0.7227
Epoch: [2/20]	Loss: 0.548962	Train AUC: 0.8253	Val AUC: 0.7908
Epoch: [3/20]	Loss: 0.502096	Train AUC: 0.8134	Val AUC: 0.7227
INFO: Early stopping counter 1 of 5
Epoch: [4/20]	Loss: 0.441330	Train AUC: 0.7861	Val AUC: 0.6855
INFO: Early stopping counter 2 of 5
Epoch: [5/20]	Loss: 0.435815	Train AUC: 0.9345	Val AUC: 0.8147
Epoch: [6/20]	Loss: 0.337939	Train AUC: 0.9545	Val AUC: 0.8157
Epoch: [7/20]	Loss: 0.268582	Train AUC: 0.9580	Val AUC: 0.8017
INFO: Early stopping counter 3 of 5
Epoch: [8/20]	Loss: 0.250354	Train AUC: 0.9712	Val AUC: 0.8031
INFO: Early stopping counter 4 of 5
Epoch: [9/20]	Loss: 0.217689	Train AUC: 0.9796	Val AUC: 0.8018
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 7 --run 6 --nepochs 20 --lr 0.00104 --bat



Class counts: [1596  410]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5133	Val AUC: 0.5029
Epoch: [1/20]	Loss: 0.644530	Train AUC: 0.7346	Val AUC: 0.6209
Epoch: [2/20]	Loss: 0.518785	Train AUC: 0.7453	Val AUC: 0.7123
Epoch: [3/20]	Loss: 0.493655	Train AUC: 0.8610	Val AUC: 0.7581
Epoch: [4/20]	Loss: 0.423748	Train AUC: 0.8930	Val AUC: 0.7671
Epoch: [5/20]	Loss: 0.405121	Train AUC: 0.9234	Val AUC: 0.7324
INFO: Early stopping counter 1 of 5
Epoch: [6/20]	Loss: 0.325236	Train AUC: 0.9039	Val AUC: 0.7285
INFO: Early stopping counter 2 of 5
Epoch: [7/20]	Loss: 0.309364	Train AUC: 0.9650	Val AUC: 0.7595
INFO: Early stopping counter 3 of 5
Epoch: [8/20]	Loss: 0.244726	Train AUC: 0.9783	Val AUC: 0.7552
INFO: Early stopping counter 4 of 5
Epoch: [9/20]	Loss: 0.185433	Train AUC: 0.9869	Val AUC: 0.7647
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 8 --run 6 --nepochs 20 --lr 0.00104 --bat



Class counts: [1564  410]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5258	Val AUC: 0.5120
Epoch: [1/20]	Loss: 0.629366	Train AUC: 0.7488	Val AUC: 0.6448
Epoch: [2/20]	Loss: 0.505593	Train AUC: 0.8290	Val AUC: 0.6917
Epoch: [3/20]	Loss: 0.454919	Train AUC: 0.8616	Val AUC: 0.7383
Epoch: [4/20]	Loss: 0.408379	Train AUC: 0.9108	Val AUC: 0.7749
Epoch: [5/20]	Loss: 0.360621	Train AUC: 0.8448	Val AUC: 0.6484
INFO: Early stopping counter 1 of 5
Epoch: [6/20]	Loss: 0.320322	Train AUC: 0.9413	Val AUC: 0.7627
INFO: Early stopping counter 2 of 5
Epoch: [7/20]	Loss: 0.255537	Train AUC: 0.9810	Val AUC: 0.7491
INFO: Early stopping counter 3 of 5
Epoch: [8/20]	Loss: 0.191901	Train AUC: 0.9873	Val AUC: 0.7715
INFO: Early stopping counter 4 of 5
Epoch: [9/20]	Loss: 0.142556	Train AUC: 0.9926	Val AUC: 0.7540
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 9 --run 6 --nepochs 20 --lr 0.00104 --bat



Class counts: [1576  416]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.4811	Val AUC: 0.4646
Epoch: [1/20]	Loss: 0.611483	Train AUC: 0.6760	Val AUC: 0.6964
Epoch: [2/20]	Loss: 0.502089	Train AUC: 0.7772	Val AUC: 0.6816
INFO: Early stopping counter 1 of 5
Epoch: [3/20]	Loss: 0.496864	Train AUC: 0.8262	Val AUC: 0.8039
Epoch: [4/20]	Loss: 0.453579	Train AUC: 0.8808	Val AUC: 0.8070
Epoch: [5/20]	Loss: 0.435892	Train AUC: 0.8374	Val AUC: 0.7378
INFO: Early stopping counter 2 of 5
Epoch: [6/20]	Loss: 0.377718	Train AUC: 0.8484	Val AUC: 0.7913
INFO: Early stopping counter 3 of 5
Epoch: [7/20]	Loss: 0.341626	Train AUC: 0.9635	Val AUC: 0.8831
Epoch: [8/20]	Loss: 0.259626	Train AUC: 0.9775	Val AUC: 0.8625
INFO: Early stopping counter 4 of 5
Epoch: [9/20]	Loss: 0.197897	Train AUC: 0.9840	Val AUC: 0.8651
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 10 --run 6 --nepochs 20 --lr 0.00104 --ba



Class counts: [1584  386]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5121	Val AUC: 0.5311
Epoch: [1/20]	Loss: 0.622197	Train AUC: 0.8272	Val AUC: 0.7209
Epoch: [2/20]	Loss: 0.469006	Train AUC: 0.8353	Val AUC: 0.7739
Epoch: [3/20]	Loss: 0.448980	Train AUC: 0.8358	Val AUC: 0.6802
INFO: Early stopping counter 1 of 5
Epoch: [4/20]	Loss: 0.411833	Train AUC: 0.9132	Val AUC: 0.7759
Epoch: [5/20]	Loss: 0.363203	Train AUC: 0.9037	Val AUC: 0.7958
Epoch: [6/20]	Loss: 0.317544	Train AUC: 0.8714	Val AUC: 0.7324
INFO: Early stopping counter 2 of 5
Epoch: [7/20]	Loss: 0.307576	Train AUC: 0.9285	Val AUC: 0.7356
INFO: Early stopping counter 3 of 5
Epoch: [8/20]	Loss: 0.217745	Train AUC: 0.9830	Val AUC: 0.8025
Epoch: [9/20]	Loss: 0.155009	Train AUC: 0.9901	Val AUC: 0.7920
INFO: Early stopping counter 4 of 5
Epoch: [10/20]	Loss: 0.110008	Train AUC: 0.9948	Val AUC: 0.8051
Epoch: [11/20]	Loss: 0.098994	Train AUC: 0.9950	Val AUC: 0.7912
INFO: Early stopp



Class counts: [1554  406]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5223	Val AUC: 0.5026
Epoch: [1/20]	Loss: 0.587924	Train AUC: 0.7803	Val AUC: 0.6357
Epoch: [2/20]	Loss: 0.485747	Train AUC: 0.8333	Val AUC: 0.6649
Epoch: [3/20]	Loss: 0.434641	Train AUC: 0.9021	Val AUC: 0.6875
Epoch: [4/20]	Loss: 0.378905	Train AUC: 0.8819	Val AUC: 0.6822
INFO: Early stopping counter 1 of 5
Epoch: [5/20]	Loss: 0.335957	Train AUC: 0.9388	Val AUC: 0.7295
Epoch: [6/20]	Loss: 0.302758	Train AUC: 0.9500	Val AUC: 0.7254
INFO: Early stopping counter 2 of 5
Epoch: [7/20]	Loss: 0.227496	Train AUC: 0.9766	Val AUC: 0.7580
Epoch: [8/20]	Loss: 0.229476	Train AUC: 0.9755	Val AUC: 0.6848
INFO: Early stopping counter 3 of 5
Epoch: [9/20]	Loss: 0.200088	Train AUC: 0.9793	Val AUC: 0.7363
INFO: Early stopping counter 4 of 5
Epoch: [10/20]	Loss: 0.121088	Train AUC: 0.9971	Val AUC: 0.7235
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python 



Class counts: [1582  422]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5089	Val AUC: 0.5229
Epoch: [1/20]	Loss: 0.637461	Train AUC: 0.7730	Val AUC: 0.7123
Epoch: [2/20]	Loss: 0.504947	Train AUC: 0.8180	Val AUC: 0.6726
INFO: Early stopping counter 1 of 5
Epoch: [3/20]	Loss: 0.485815	Train AUC: 0.7965	Val AUC: 0.6872
INFO: Early stopping counter 2 of 5
Epoch: [4/20]	Loss: 0.440784	Train AUC: 0.9192	Val AUC: 0.7437
Epoch: [5/20]	Loss: 0.338198	Train AUC: 0.9469	Val AUC: 0.7325
INFO: Early stopping counter 3 of 5
Epoch: [6/20]	Loss: 0.305730	Train AUC: 0.9570	Val AUC: 0.7589
Epoch: [7/20]	Loss: 0.258195	Train AUC: 0.9766	Val AUC: 0.7595
INFO: Early stopping counter 4 of 5
Epoch: [8/20]	Loss: 0.197678	Train AUC: 0.9780	Val AUC: 0.7624
Epoch: [9/20]	Loss: 0.191392	Train AUC: 0.9863	Val AUC: 0.7530
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 13 --run 6 --nepochs 20 --lr 0.00104 --ba



Class counts: [1566  378]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.4679	Val AUC: 0.4611
Epoch: [1/20]	Loss: 0.609684	Train AUC: 0.7636	Val AUC: 0.7206
Epoch: [2/20]	Loss: 0.471440	Train AUC: 0.8083	Val AUC: 0.7112
INFO: Early stopping counter 1 of 5
Epoch: [3/20]	Loss: 0.473091	Train AUC: 0.7266	Val AUC: 0.6520
INFO: Early stopping counter 2 of 5
Epoch: [4/20]	Loss: 0.426204	Train AUC: 0.9088	Val AUC: 0.7678
Epoch: [5/20]	Loss: 0.358908	Train AUC: 0.9364	Val AUC: 0.7726
Epoch: [6/20]	Loss: 0.317828	Train AUC: 0.9546	Val AUC: 0.7620
INFO: Early stopping counter 3 of 5
Epoch: [7/20]	Loss: 0.232331	Train AUC: 0.9707	Val AUC: 0.7293
INFO: Early stopping counter 4 of 5
Epoch: [8/20]	Loss: 0.234558	Train AUC: 0.9771	Val AUC: 0.7481
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 14 --run 6 --nepochs 20 --lr 0.00104 --batch_size 64 --wd 0.000138 --oversample --lr_scheduler --optimiz



Class counts: [1618  420]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5313	Val AUC: 0.5285
Epoch: [1/20]	Loss: 0.626747	Train AUC: 0.6524	Val AUC: 0.6874
Epoch: [2/20]	Loss: 0.529170	Train AUC: 0.7759	Val AUC: 0.7273
Epoch: [3/20]	Loss: 0.464573	Train AUC: 0.8757	Val AUC: 0.7535
Epoch: [4/20]	Loss: 0.429672	Train AUC: 0.7881	Val AUC: 0.7071
INFO: Early stopping counter 1 of 5
Epoch: [5/20]	Loss: 0.374591	Train AUC: 0.8960	Val AUC: 0.7901
Epoch: [6/20]	Loss: 0.305931	Train AUC: 0.9364	Val AUC: 0.7342
INFO: Early stopping counter 2 of 5
Epoch: [7/20]	Loss: 0.316605	Train AUC: 0.8552	Val AUC: 0.6585
INFO: Early stopping counter 3 of 5
Epoch: [8/20]	Loss: 0.261488	Train AUC: 0.9780	Val AUC: 0.8030
Epoch: [9/20]	Loss: 0.177736	Train AUC: 0.9871	Val AUC: 0.7949
INFO: Early stopping counter 4 of 5
Epoch: [10/20]	Loss: 0.151339	Train AUC: 0.9922	Val AUC: 0.7825
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python 



Class counts: [1566  410]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.4490	Val AUC: 0.4887
Epoch: [1/20]	Loss: 0.574521	Train AUC: 0.8639	Val AUC: 0.6881
Epoch: [2/20]	Loss: 0.470988	Train AUC: 0.8346	Val AUC: 0.6934
Epoch: [3/20]	Loss: 0.425342	Train AUC: 0.8620	Val AUC: 0.6621
INFO: Early stopping counter 1 of 5
Epoch: [4/20]	Loss: 0.386893	Train AUC: 0.9208	Val AUC: 0.7120
Epoch: [5/20]	Loss: 0.335419	Train AUC: 0.9274	Val AUC: 0.6695
INFO: Early stopping counter 2 of 5
Epoch: [6/20]	Loss: 0.294325	Train AUC: 0.9395	Val AUC: 0.7059
INFO: Early stopping counter 3 of 5
Epoch: [7/20]	Loss: 0.218922	Train AUC: 0.9871	Val AUC: 0.7024
INFO: Early stopping counter 4 of 5
Epoch: [8/20]	Loss: 0.127244	Train AUC: 0.9932	Val AUC: 0.7029
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 16 --run 6 --nepochs 20 --lr 0.00104 --batch_size 64 --wd 0.000138 --oversample --lr_scheduler --optimiz



Class counts: [1446  398]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5306	Val AUC: 0.5422
Epoch: [1/20]	Loss: 0.643451	Train AUC: 0.7192	Val AUC: 0.6243
Epoch: [2/20]	Loss: 0.546558	Train AUC: 0.8265	Val AUC: 0.7908
Epoch: [3/20]	Loss: 0.476868	Train AUC: 0.7858	Val AUC: 0.6988
INFO: Early stopping counter 1 of 5
Epoch: [4/20]	Loss: 0.438164	Train AUC: 0.8629	Val AUC: 0.8349
Epoch: [5/20]	Loss: 0.422438	Train AUC: 0.9252	Val AUC: 0.8172
INFO: Early stopping counter 2 of 5
Epoch: [6/20]	Loss: 0.391961	Train AUC: 0.9253	Val AUC: 0.7835
INFO: Early stopping counter 3 of 5
Epoch: [7/20]	Loss: 0.288337	Train AUC: 0.9621	Val AUC: 0.8273
INFO: Early stopping counter 4 of 5
Epoch: [8/20]	Loss: 0.235350	Train AUC: 0.9798	Val AUC: 0.8384
Epoch: [9/20]	Loss: 0.194426	Train AUC: 0.9847	Val AUC: 0.8253
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 17 --run 6 --nepochs 20 --lr 0.00104 --ba



Class counts: [1578  426]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5536	Val AUC: 0.5367
Epoch: [1/20]	Loss: 0.594773	Train AUC: 0.8141	Val AUC: 0.6679
Epoch: [2/20]	Loss: 0.492235	Train AUC: 0.8562	Val AUC: 0.7458
Epoch: [3/20]	Loss: 0.465745	Train AUC: 0.8821	Val AUC: 0.7356
INFO: Early stopping counter 1 of 5
Epoch: [4/20]	Loss: 0.419214	Train AUC: 0.7997	Val AUC: 0.6664
INFO: Early stopping counter 2 of 5
Epoch: [5/20]	Loss: 0.336904	Train AUC: 0.9597	Val AUC: 0.7245
INFO: Early stopping counter 3 of 5
Epoch: [6/20]	Loss: 0.278874	Train AUC: 0.9782	Val AUC: 0.7587
Epoch: [7/20]	Loss: 0.199920	Train AUC: 0.9842	Val AUC: 0.7054
INFO: Early stopping counter 4 of 5
Epoch: [8/20]	Loss: 0.156275	Train AUC: 0.9900	Val AUC: 0.6867
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 18 --run 6 --nepochs 20 --lr 0.00104 --batch_size 64 --wd 0.000138 --oversample --lr_scheduler --optimiz



Class counts: [1588  442]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.4689	Val AUC: 0.5470
Epoch: [1/20]	Loss: 0.589107	Train AUC: 0.8196	Val AUC: 0.7166
Epoch: [2/20]	Loss: 0.511927	Train AUC: 0.8046	Val AUC: 0.6742
INFO: Early stopping counter 1 of 5
Epoch: [3/20]	Loss: 0.462244	Train AUC: 0.8626	Val AUC: 0.7566
Epoch: [4/20]	Loss: 0.392867	Train AUC: 0.8947	Val AUC: 0.7431
INFO: Early stopping counter 2 of 5
Epoch: [5/20]	Loss: 0.390470	Train AUC: 0.9200	Val AUC: 0.6427
INFO: Early stopping counter 3 of 5
Epoch: [6/20]	Loss: 0.285238	Train AUC: 0.9701	Val AUC: 0.7489
INFO: Early stopping counter 4 of 5
Epoch: [7/20]	Loss: 0.203809	Train AUC: 0.9825	Val AUC: 0.7296
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 19 --run 6 --nepochs 20 --lr 0.00104 --batch_size 64 --wd 0.000138 --oversample --lr_scheduler --optimizer adam --cls_arch simple --early_stopping --augm 4
Namespace(o



Class counts: [1506  432]
Using oversampling with WeightedRandomSampler for training.
Epoch: [0/20]	Loss: nan	Train AUC: 0.5244	Val AUC: 0.5321
Epoch: [1/20]	Loss: 0.613600	Train AUC: 0.8208	Val AUC: 0.7474
Epoch: [2/20]	Loss: 0.494667	Train AUC: 0.8233	Val AUC: 0.7208
INFO: Early stopping counter 1 of 5
Epoch: [3/20]	Loss: 0.458234	Train AUC: 0.7100	Val AUC: 0.4209
INFO: Early stopping counter 2 of 5
Epoch: [4/20]	Loss: 0.371817	Train AUC: 0.9442	Val AUC: 0.7145
INFO: Early stopping counter 3 of 5
Epoch: [5/20]	Loss: 0.316844	Train AUC: 0.9619	Val AUC: 0.7110
INFO: Early stopping counter 4 of 5
Epoch: [6/20]	Loss: 0.249989	Train AUC: 0.9711	Val AUC: 0.6968
INFO: Early stopping counter 5 of 5
INFO: Early stopping


ACTUALLY WE SHOULD PROBABLY RE RUN OPTUN.PY cause now oversampling and augmentation included

need to rety 6 with those parameters : 

The best result was achieved in the run with a value of 0.88161. For that run (number 14, split 3), the optimal training parameters were:

• Balance: False
• Batch Size: 16
• Classifier Architecture: simple
• Learning Rate: ~0.00444
• LR Scheduler: Enabled (True)
• Optimizer: SGD
• Weight Decay: ~0.00241

Since it used a simple classifier architecture, dropout and hidden dimension parameters are not applicable in this case.

From scratch best parameter for each split - 10 - Nul

In [None]:
import os
import pandas as pd
import math

# Set the CUDA device if desired
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Path to the CSV file with best parameters for training from scratch
csv_path = "/home/mezher/Documents/Deauville_DeepLearning/best_params_summary_scratch.csv"
best_params = pd.read_csv(csv_path)

for _, row in best_params.iterrows():
    split = int(row["split"])
    
    # Retrieve hyperparameters from CSV
    lr = row["params_lr"]
    batch_size = row["params_batch_size"]
    wd = row["params_wd"]
    optimizer = row["params_optimizer"]
    cls_arch = row["params_cls_arch"]  # e.g., 'simple' or 'complex'
    
    # If 'params_dropout' or 'params_hidden_dim' are missing or NaN, handle gracefully
    dropout_flag = ""
    if "params_dropout" in row and not pd.isna(row["params_dropout"]):
        dropout_flag = f"--dropout {row['params_dropout']}"
    
    hidden_dim_flag = ""
    if "params_hidden_dim" in row and not pd.isna(row["params_hidden_dim"]):
        # If hidden_dim is a float like 448.0, convert to int if appropriate
        hidden_dim_val = int(row["params_hidden_dim"]) if not math.isnan(row["params_hidden_dim"]) else None
        if hidden_dim_val is not None:
            hidden_dim_flag = f"--hidden_dim {hidden_dim_val}"
    
    # Check if balance is True/1
    balance_flag = ""
    if str(row["params_balance"]).strip().lower() in ["true", "1"]:
        balance_flag = "--balance"

    # Check if LR scheduler is True/1
    lr_scheduler_flag = ""
    if str(row["params_lr_scheduler"]).strip().lower() in ["true", "1"]:
        lr_scheduler_flag = "--lr_scheduler"
    
    # Example augmentation flag (adjust as needed)
    augm_flag = "--augm 4"
    
    # Build the command string
    command = (
        f"python train.py "
        f"--split_index {split} "
        f"--run 10 "
        f"--nepochs 20 "
        f"--lr {lr} "
        f"--batch_size {batch_size} "
        f"--wd {wd} "
        f"--optimizer {optimizer} "
        f"--cls_arch {cls_arch} "     # Classification layer architecture
        f"{dropout_flag} "           # e.g., --dropout 0.48
        f"{hidden_dim_flag} "        # e.g., --hidden_dim 448
        f"{balance_flag} "           # e.g., --balance
        f"{lr_scheduler_flag} "      # e.g., --lr_scheduler
        f"--early_stopping "
        f"{augm_flag}"
    )
    
    print("Running command:", command)
    os.system(command)


Running command: python train.py --split_index 0 --run 10 --nepochs 20 --lr 0.0004194869845244 --batch_size 128 --wd 0.0001030377652283 --optimizer adam --cls_arch complex --dropout 0.4851196771362177 --hidden_dim 448   --early_stopping --augm 4
Namespace(output='training_results', normalize=True, checkpoint='', resume=False, cls_arch='complex', hidden_dim=448, dropout=0.4851196771362177, optimizer='adam', lr=0.0004194869845244, lr_anneal=15, momentum=0.9, wd=0.0001030377652283, split_index=0, run=10, batch_size=128, nepochs=20, workers=4, augm=4, balance=False, oversample=False, lr_scheduler=False, early_stopping=True, finetune=False, transfer_learning=False)
Datasets train:1948, val:530
Weight of each class, no tumor: 0.2032828282828283, tumor: 0.7967171717171717
INFO: Initializing early stopping
Epoch: [0/20]	Loss: nan	Train AUC: 0.5100	Val AUC: 0.5164
Epoch: [1/20]	Loss: 0.486573	Train AUC: 0.7784	Val AUC: 0.7786
Epoch: [2/20]	Loss: 0.392271	Train AUC: 0.8084	Val AUC: 0.7121
INFO: 



Epoch: [0/20]	Loss: nan	Train AUC: 0.5044	Val AUC: 0.5054
Epoch: [1/20]	Loss: 0.451798	Train AUC: 0.8248	Val AUC: 0.7051
Epoch: [2/20]	Loss: 0.352247	Train AUC: 0.8791	Val AUC: 0.7100
Epoch: [3/20]	Loss: 0.302450	Train AUC: 0.9193	Val AUC: 0.7476
Epoch: [4/20]	Loss: 0.245730	Train AUC: 0.9130	Val AUC: 0.7614
Epoch: [5/20]	Loss: 0.256158	Train AUC: 0.9842	Val AUC: 0.7527
INFO: Early stopping counter 1 of 5
Epoch: [6/20]	Loss: 0.166663	Train AUC: 0.9478	Val AUC: 0.7565
INFO: Early stopping counter 2 of 5
Epoch: [7/20]	Loss: 0.107347	Train AUC: 0.9967	Val AUC: 0.7918
Epoch: [8/20]	Loss: 0.076244	Train AUC: 0.9989	Val AUC: 0.7877
INFO: Early stopping counter 3 of 5
Epoch: [9/20]	Loss: 0.048558	Train AUC: 0.9993	Val AUC: 0.7914
INFO: Early stopping counter 4 of 5
Epoch: [10/20]	Loss: 0.047038	Train AUC: 0.9995	Val AUC: 0.7913
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 3 --run 10 --nepochs 20 --lr 0.0044413525595453 --batch_size 16



Epoch: [0/20]	Loss: nan	Train AUC: 0.5230	Val AUC: 0.5242
Epoch: [1/20]	Loss: 0.494933	Train AUC: 0.8167	Val AUC: 0.7935
Epoch: [2/20]	Loss: 0.431691	Train AUC: 0.8045	Val AUC: 0.7416
INFO: Early stopping counter 1 of 5
Epoch: [3/20]	Loss: 0.409694	Train AUC: 0.8830	Val AUC: 0.8066
Epoch: [4/20]	Loss: 0.371018	Train AUC: 0.8735	Val AUC: 0.7535
INFO: Early stopping counter 2 of 5
Epoch: [5/20]	Loss: 0.357411	Train AUC: 0.9155	Val AUC: 0.7904
INFO: Early stopping counter 3 of 5
Epoch: [6/20]	Loss: 0.304487	Train AUC: 0.9196	Val AUC: 0.8553
Epoch: [7/20]	Loss: 0.292766	Train AUC: 0.9631	Val AUC: 0.7967
INFO: Early stopping counter 4 of 5
Epoch: [8/20]	Loss: 0.231660	Train AUC: 0.9709	Val AUC: 0.7765
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 4 --run 10 --nepochs 20 --lr 0.0002686823779312 --batch_size 32 --wd 0.0001273895275569 --optimizer adam --cls_arch simple     --early_stopping --augm 4
Namespace(output='training_results', 



Epoch: [0/20]	Loss: nan	Train AUC: 0.4838	Val AUC: 0.5012
Epoch: [1/20]	Loss: 0.480471	Train AUC: 0.8027	Val AUC: 0.7144
Epoch: [2/20]	Loss: 0.405873	Train AUC: 0.8658	Val AUC: 0.7307
Epoch: [3/20]	Loss: 0.371275	Train AUC: 0.8912	Val AUC: 0.7535
Epoch: [4/20]	Loss: 0.361468	Train AUC: 0.8443	Val AUC: 0.6595
INFO: Early stopping counter 1 of 5
Epoch: [5/20]	Loss: 0.345547	Train AUC: 0.8595	Val AUC: 0.7272
INFO: Early stopping counter 2 of 5
Epoch: [6/20]	Loss: 0.257845	Train AUC: 0.9626	Val AUC: 0.7797
Epoch: [7/20]	Loss: 0.192181	Train AUC: 0.9744	Val AUC: 0.7409
INFO: Early stopping counter 3 of 5
Epoch: [8/20]	Loss: 0.165230	Train AUC: 0.9859	Val AUC: 0.7446
INFO: Early stopping counter 4 of 5
Epoch: [9/20]	Loss: 0.122916	Train AUC: 0.9904	Val AUC: 0.7579
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 6 --run 10 --nepochs 20 --lr 0.0002452885525442 --batch_size 64 --wd 0.0003298415408496 --optimizer adam --cls_arch simple   --



Epoch: [0/20]	Loss: nan	Train AUC: 0.5033	Val AUC: 0.4949
Epoch: [1/20]	Loss: 0.327025	Train AUC: 0.7841	Val AUC: 0.6493
Epoch: [2/20]	Loss: 0.238769	Train AUC: 0.8935	Val AUC: 0.7568
Epoch: [3/20]	Loss: 0.201800	Train AUC: 0.8886	Val AUC: 0.6926
INFO: Early stopping counter 1 of 5
Epoch: [4/20]	Loss: 0.163188	Train AUC: 0.9463	Val AUC: 0.7993
Epoch: [5/20]	Loss: 0.136893	Train AUC: 0.9441	Val AUC: 0.7328
INFO: Early stopping counter 2 of 5
Epoch: [6/20]	Loss: 0.108050	Train AUC: 0.9876	Val AUC: 0.7741
INFO: Early stopping counter 3 of 5
Epoch: [7/20]	Loss: 0.063131	Train AUC: 0.9969	Val AUC: 0.7638
INFO: Early stopping counter 4 of 5
Epoch: [8/20]	Loss: 0.046894	Train AUC: 0.9991	Val AUC: 0.7795
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 8 --run 10 --nepochs 20 --lr 0.0002798076914403 --batch_size 16 --wd 0.0003082771512274 --optimizer adam --cls_arch complex --dropout 0.1504799058962193 --hidden_dim 384 --balance --lr_sched



Epoch: [0/20]	Loss: nan	Train AUC: 0.4999	Val AUC: 0.5123
Epoch: [1/20]	Loss: 0.317192	Train AUC: 0.7284	Val AUC: 0.6157
Epoch: [2/20]	Loss: 0.281781	Train AUC: 0.7984	Val AUC: 0.6841
Epoch: [3/20]	Loss: 0.268158	Train AUC: 0.8351	Val AUC: 0.6970
Epoch: [4/20]	Loss: 0.250808	Train AUC: 0.8569	Val AUC: 0.7453
Epoch: [5/20]	Loss: 0.236830	Train AUC: 0.7299	Val AUC: 0.6613
INFO: Early stopping counter 1 of 5
Epoch: [6/20]	Loss: 0.228957	Train AUC: 0.9343	Val AUC: 0.7791
Epoch: [7/20]	Loss: 0.218600	Train AUC: 0.9205	Val AUC: 0.7481
INFO: Early stopping counter 2 of 5
Epoch: [8/20]	Loss: 0.180942	Train AUC: 0.9468	Val AUC: 0.7287
INFO: Early stopping counter 3 of 5
Epoch: [9/20]	Loss: 0.132589	Train AUC: 0.9898	Val AUC: 0.7609
INFO: Early stopping counter 4 of 5
Epoch: [10/20]	Loss: 0.098112	Train AUC: 0.9940	Val AUC: 0.7637
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 9 --run 10 --nepochs 20 --lr 0.0054920816430505 --batch_size 64



Epoch: [0/20]	Loss: nan	Train AUC: 0.4520	Val AUC: 0.4765
Epoch: [1/20]	Loss: 0.350593	Train AUC: 0.7607	Val AUC: 0.7375
Epoch: [2/20]	Loss: 0.301991	Train AUC: 0.8209	Val AUC: 0.7608
Epoch: [3/20]	Loss: 0.271187	Train AUC: 0.8600	Val AUC: 0.7761
Epoch: [4/20]	Loss: 0.215191	Train AUC: 0.8710	Val AUC: 0.7892
Epoch: [5/20]	Loss: 0.233745	Train AUC: 0.9165	Val AUC: 0.8050
Epoch: [6/20]	Loss: 0.195567	Train AUC: 0.9627	Val AUC: 0.7830
INFO: Early stopping counter 1 of 5
Epoch: [7/20]	Loss: 0.154519	Train AUC: 0.9831	Val AUC: 0.8442
Epoch: [8/20]	Loss: 0.108299	Train AUC: 0.9604	Val AUC: 0.8197
INFO: Early stopping counter 2 of 5
Epoch: [9/20]	Loss: 0.104070	Train AUC: 0.9867	Val AUC: 0.8152
INFO: Early stopping counter 3 of 5
Epoch: [10/20]	Loss: 0.102874	Train AUC: 0.9848	Val AUC: 0.8339
INFO: Early stopping counter 4 of 5
Epoch: [11/20]	Loss: 0.062680	Train AUC: 0.9905	Val AUC: 0.7944
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index



Epoch: [0/20]	Loss: nan	Train AUC: 0.4590	Val AUC: 0.4910
Epoch: [1/20]	Loss: 0.343284	Train AUC: 0.4131	Val AUC: 0.4402
INFO: Early stopping counter 1 of 5
Epoch: [2/20]	Loss: 0.349879	Train AUC: 0.6250	Val AUC: 0.5752
Epoch: [3/20]	Loss: 0.349650	Train AUC: 0.5786	Val AUC: 0.6031
Epoch: [4/20]	Loss: 0.351751	Train AUC: 0.5169	Val AUC: 0.5026
INFO: Early stopping counter 2 of 5
Epoch: [5/20]	Loss: 0.351154	Train AUC: 0.4574	Val AUC: 0.4779
INFO: Early stopping counter 3 of 5
Epoch: [6/20]	Loss: 0.341000	Train AUC: 0.6721	Val AUC: 0.6621
Epoch: [7/20]	Loss: 0.330166	Train AUC: 0.7097	Val AUC: 0.6324
INFO: Early stopping counter 4 of 5
Epoch: [8/20]	Loss: 0.320770	Train AUC: 0.7310	Val AUC: 0.6673
Epoch: [9/20]	Loss: 0.310151	Train AUC: 0.7233	Val AUC: 0.5889
INFO: Early stopping counter 5 of 5
INFO: Early stopping
Running command: python train.py --split_index 12 --run 10 --nepochs 20 --lr 0.0018325450716217 --batch_size 32 --wd 0.0080493558068109 --optimizer sgd --cls_arch simple   --

Traceback (most recent call last):
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 389, in <module>
    main()
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 304, in main
    val_loss = validate_loss(val_loader, model, criterion)
  File "/home/mezher/Documents/Deauville_DeepLearning/train.py", line 94, in validate_loss
    running_loss += loss.item() * input.size(0)
KeyboardInterrupt


Running command: python train.py --split_index 13 --run 10 --nepochs 20 --lr 0.0020462138489769 --batch_size 32 --wd 0.0011857387115643 --optimizer sgd --cls_arch simple   --balance --lr_scheduler --early_stopping --augm 4
Namespace(output='training_results', normalize=True, checkpoint='', resume=False, cls_arch='simple', hidden_dim=256, dropout=0.3, optimizer='sgd', lr=0.0020462138489769, lr_anneal=15, momentum=0.9, wd=0.0011857387115643, split_index=13, run=10, batch_size=32, nepochs=20, workers=4, augm=4, balance=True, oversample=False, lr_scheduler=True, early_stopping=True, finetune=False, transfer_learning=False)
Datasets train:1944, val:534
Weight of each class, no tumor: 0.2032828282828283, tumor: 0.7967171717171717
Balance loss with weights: [np.float64(0.2032828282828283), np.float64(0.7967171717171717)]
INFO: Initializing learning rate scheduler
INFO: Initializing early stopping




Epoch: [0/20]	Loss: nan	Train AUC: 0.4619	Val AUC: 0.5522
