# Index

1. [Imports](#Imports)
2. [ResNet 50](#ResNet-50)
    - [Optuna Optimization](#Optuna-optimization)
    - [Optuna Optimization - Intensive search for the best optimizer](#Intensive-search-for-the-Best-Optimizer)
3. [Train ResNet50 (best hyperparameters)](#Train-ResNet50-with-the-hyperparameters-found)

# Imports

In [12]:
# Add project src folder to PYTHONPATH
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))

src_path = os.path.join(project_root, "src")
sys.path.append(src_path)

# Standard Libraries
import json
import glob
import csv
import random
import numpy as np
import matplotlib.pyplot as plt

# yTorch & Torchvision
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from torchvision.models import ResNet50_Weights

# sklearn metrics
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix
)

from tqdm import tqdm

# Reproducibility
from optimization.utils.reproducibility import set_seed, seed_worker

# Data Loading
from optimization.loaders.data_loader import LungCancerDataset, create_dataloader

# Objective Functions
from optimization.objectives.objective_generic import objective_generic
from optimization.objectives.objective_sgd import objective_sgd
from optimization.objectives.objective_sgd_steplr import objective_sgd_steplr
from optimization.objectives.objective_sgd_steplr_intensive_search import (
    objective_sgd_steplr_intensive_search
)

# Optuna training orchestrator
from modeling.resnet50.train_resnet50 import train_ResNet50_optuna_hyperparams

# Evaluation script
from modeling.resnet50.eval_resnet50 import evaluate_resnet50

## Seting seed (for reproducibility)

In [2]:
set_seed(42)

# Dataset (ResNet50)

In [3]:
# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet expects 224x224 input
    transforms.ToTensor(),  # Convert to tensor (scaling to 0-1 already done by division by 255)
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize to ImageNet's scale
])

# File Paths
train_files = os.path.join(project_root, '/data/yolo/LUNG_PET_CT/patient_split_augmented/train')
val_files = os.path.join(project_root, '/data/yolo/LUNG_PET_CT/patient_split_augmented/val')

# Dataset and DataLoader
train_dataset = LungCancerDataset(train_files, transform=transform)
val_dataset = LungCancerDataset(val_files, transform=transform)

# Optuna optimization 

In [None]:
torch.multiprocessing.set_start_method('spawn', force=True)

In [None]:
optuna_results_dir = os.path.join(project_root, '/src/optimization/optuna_results')

Optuna to search for the best hyperparameters for ResNet-50

## Optuna - Search for best optimizer (Adam, SGD, RMSprop)

In [4]:
# Directory to store results
RESULTS_DIR = os.path.join(project_root, '/src/optimization/optuna_results/trial_results')
os.makedirs(RESULTS_DIR, exist_ok=True)

In [None]:
study = optuna.create_study(
    direction='maximize', 
    sampler=optuna.samplers.TPESampler(seed=42) 
)

study.optimize(objective,
               n_trials=50,
               n_jobs=1,
               gc_after_trial=True,
               callbacks=[progress_callback]
              )

#### Save Results

In [None]:
# Save best hyperparameters
best_params_path_objective_generic= os.path.join(optuna_results_dir, "best_hyperparameters_objective_generic.json")
with open(best_params_path_objective_generic, "w") as f:
    json.dump(study.best_params, f, indent=4)

print(f"Best hyperparameters for ResNet50: {best_params_path_objective_generic}")

In [39]:
print(study.best_value)

0.39654512852208934


#### Load Results

In [7]:
with open(best_params_path_objective_generic, "r") as f:
    best_params_path_objective_generic = json.load(f)

print("Loaded Hyperparameters:", best_params_path_objective_generic)

Loaded Hyperparameters: {'learning_rate': 1.3788939487801654e-05, 'batch_size': 16, 'weight_decay': 0.0022593474031394286, 'trainable_layers': 3, 'dropout_rate': 0.15066989041838022, 'optimizer': 'SGD', 'scheduler': 'StepLR', 'label_smoothing': 0.1165395758341762, 'momentum': 0.7961977213986026, 'step_size': 4, 'gamma': 0.6177769059141647}


## Optuna - Search for SGD optimizer

In [34]:
# Directory to store results
RESULTS_DIR = os.path.join(project_root, '/src/optimization/optuna_results/trial_results_SGD"
os.makedirs(RESULTS_DIR, exist_ok=True)

In [None]:
study_SGD = optuna.create_study(
    direction='maximize', 
    sampler=optuna.samplers.TPESampler(seed=42)  # Set seed for Optuna
)
study_SGD.optimize(objective_sgd,
                   n_trials=50,
                   n_jobs=1,
                   gc_after_trial=True,
                   callbacks=[progress_callback]
                  )

#### Save Results

In [None]:
# Save best hyperparameters
best_params_path_SGD = os.path.join(optuna_results_dir, "best_hyperparameters_SGD.json")
with open(best_params_path_SGD, "w") as f:
    json.dump(study_SGD.best_params, f, indent=4)

print(f"Best hyperparameters for ResNet50: {best_params_path_SGD}")

In [40]:
print(study_SGD.best_value)

0.39430431372284513


0.39430431372284513

#### Load Results

In [38]:
with open(best_params_path_SGD, "r") as f:
    best_params_SGD = json.load(f)

print("Loaded Hyperparameters:", best_params_SGD)

Loaded Hyperparameters: {'learning_rate': 4.8733423774825025e-06, 'batch_size': 16, 'weight_decay': 0.004493403189321242, 'trainable_layers': 4, 'dropout_rate': 0.29000000000000004, 'scheduler': 'StepLR', 'label_smoothing': 0.04811257676075818, 'momentum': 0.9433103253272241, 'step_size': 5, 'gamma': 0.5417209163329839}


## Optuna - Search for SGD optimizer + StepLR

| **Parameter**       | **Higher Value Effect**                            | **Lower Value Effect**                           | **Common Practical Range** |
|--------------------|---------------------------------------------------|------------------------------------------------|-----------------------------|
| **Momentum**        | Faster convergence, but risk of overshooting     | Slower convergence, more stable                | 0.85 - 0.95                 |
| **Step Size**       | Slower decay, longer exploration                 | Faster decay, quicker convergence              | 3 - 10 epochs               |
| **Gamma**           | Gradual LR decay, stable learning                | Faster decay, risk of premature convergence    | 0.5 - 0.9                   |
| **Label Smoothing** | Better generalization, less overfitting          | More confidence, risk of overfitting           | 0.05 - 0.2                  |


In [58]:
# Directory to store results
RESULTS_DIR = os.path.join(project_root, '/src/optimization/optuna_results/trial_results_SGD_StepLR"
os.makedirs(RESULTS_DIR, exist_ok=True)

In [None]:
study_SGD_StepLR = optuna.create_study(
    direction='maximize', 
    sampler=optuna.samplers.TPESampler(seed=42)  # Set seed for Optuna
)
study_SGD_StepLR.optimize(objective,
                          n_trials=70,
                          n_jobs=1,
                          gc_after_trial=True,
                          callbacks=[progress_callback]
                         )

#### Save Results

In [None]:
best_params_path_SGD_StepLR = os.path.join(optuna_results_dir, "best_hyperparameters_SGD_StepLR.json")
with open(best_params_path_SGD_StepLR, "w") as f:
    json.dump(study_SGD_StepLR.best_params, f, indent=4)

print(f"Best hyperparameters for ResNet50: {best_params_path_SGD_StepLR}")

In [61]:
print(study_SGD_StepLR.best_value)

0.399760811078408


0.399760811078408

#### Load Results

In [10]:
with open(best_params_path_SGD_StepLR, "r") as f:
    best_params_path_SGD_StepLR = json.load(f)

print("Loaded Hyperparameters:", best_params_path_SGD_StepLR)

Loaded Hyperparameters: {'learning_rate': 4.422338615410478e-06, 'batch_size': 16, 'weight_decay': 0.005336806545024176, 'trainable_layers': 4, 'dropout_rate': 0.20876095379041198, 'label_smoothing': 0.01587980220249954, 'momentum': 0.9397323702926935, 'step_size': 4, 'gamma': 0.6498315263050394}


## Optuna - Search for SGD optimizer + StepLR (Intensive search)

In [58]:
# Directory to store results
RESULTS_DIR = os.path.join(project_root, '/src/optimization/optuna_results/trial_results_SGD_StepLR_50epochs_2"
os.makedirs(RESULTS_DIR, exist_ok=True)

In [None]:
study_SGD_StepLR_50_2 = optuna.create_study(
    direction='maximize', 
    sampler=optuna.samplers.TPESampler(seed=42) 
)
study_SGD_StepLR_50_2.optimize(objective,
                               n_trials=30, n_jobs=1,
                               gc_after_trial=True,
                               callbacks=[progress_callback]
                              )

#### Save Results

In [None]:
# Save best hyperparameters
best_params_path_SGD_StepLR_50_2 = os.path.join(optuna_results_dir, "best_hyperparameters_SGD_StepLR_50epochs_2.json")
with open(best_params_path_SGD_StepLR_50_2, "w") as f:
    json.dump(study_SGD_StepLR_50_2.best_params, f, indent=4)

print(f"Best hyperparameters for ResNet50: {best_params_path_SGD_StepLR_50_2}")

In [9]:
print(study_SGD_StepLR_50_2.best_value)

0.4212451693489931


#### Load Results

In [10]:
with open(best_params_path_SGD_StepLR_50_2, "r") as f:
    best_params_path_SGD_StepLR_50_2 = json.load(f)

print("Loaded Hyperparameters:", best_params_path_SGD_StepLR_50_2)

Loaded Hyperparameters: {'learning_rate': 1.035046459447574e-06, 'batch_size': 32, 'weight_decay': 0.0006775499840802566, 'trainable_layers': 4, 'dropout_rate': 0.20144072044874892, 'label_smoothing': 0.07199073214843173, 'momentum': 0.9474227002735129, 'step_size': 16, 'gamma': 0.5763511540167814}


# Train ResNet50 with the hyperparameters found

## Trial trial_results_SGD_StepLR_50epochs -> Trial 3

In [None]:
# Get the best parameters
path_1 = os.path.join(optuna_results_dir, "/trial_results_SGD_StepLR_50epochs/trial_3_GOOD.json")
with open(path_1, "r") as f:
    best_params_1 = json.load(f)
    
best_params_1 = best_params_1['hyperparameters']

best_params_1['momentum'] = best_params_1['optimizer']['parameters']['momentum']
best_params_1['gamma'] = best_params_1['scheduler']['parameters']['gamma']
best_params_1['step_size'] = best_params_1['scheduler']['parameters']['step_size']

best_params_1['optimizer'] = {'type': best_params_1['optimizer']}
best_params_1['scheduler'] = {'type': best_params_1['scheduler']}


# Print all hyperparameters dynamically
print("Best Hyperparameters:")
for key, value in best_params_1.items():
    if isinstance(value, dict):  # If the value is a dictionary (e.g., optimizer, scheduler)
        for sub_key, sub_value in value.items():
            print(f"{key}.{sub_key}: {sub_value}")
    else:
        print(f"{key}: {value}")


# Path to save metrics and model
metrics_file = os.path.join(src_path, "/training/ResNet50/final_training/ResNet50_history_1.csv")
model_save_path = os.path.join(src_path, "/training/ResNet50/final_training/ResNet50_final_model_1.pth")

# Ensure directories exist
os.makedirs(os.path.dirname(metrics_file), exist_ok=True)
os.makedirs(os.path.dirname(model_save_path), exist_ok=True)

# Initialize CSV file with headers if it doesn't exist
if not os.path.exists(metrics_file):
    with open(metrics_file, mode="w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow([
            "Epoch", "Train Loss", "Train Accuracy", "Train Precision", 
            "Train Recall", "Train F1", "Validation Loss", 
            "Validation Accuracy", "Validation Precision", 
            "Validation Recall", "Validation F1"
        ])
        

train_ResNet50_optuna_hyperparams(best_params_1, train_dataset, val_dataset)

## Trial trial_results_SGD_StepLR_50epochs_2 -> Trial 5

In [None]:
# Get the best parameters
path_2 = os.path.join(optuna_results_dir, "/trial_results_SGD_StepLR_50epochs_2/trial_5_GOOD.json")
with open(path_2, "r") as f:
    best_params_2 = json.load(f)
    

best_params_2 = best_params_2['hyperparameters']

best_params_2['momentum'] = best_params_2['optimizer']['parameters']['momentum']
best_params_2['gamma'] = best_params_2['scheduler']['parameters']['gamma']
best_params_2['step_size'] = best_params_2['scheduler']['parameters']['step_size']

best_params_2['optimizer'] = {'type': best_params_2['optimizer']}
best_params_2['scheduler'] = {'type': best_params_2['scheduler']}

# Print all hyperparameters dynamically
print("Best Hyperparameters:")
for key, value in best_params_2.items():
    if isinstance(value, dict):  # If the value is a dictionary (e.g., optimizer, scheduler)
        for sub_key, sub_value in value.items():
            print(f"{key}.{sub_key}: {sub_value}")
    else:
        print(f"{key}: {value}")


# Path to save metrics and model
metrics_file = os.path.join(src_path, "/training/ResNet50/final_training/ResNet50_history_2.csv")
model_save_path = os.path.join(src_path, "/training/ResNet50/final_training/ResNet50_final_model_2.pth")


# Ensure directories exist
os.makedirs(os.path.dirname(metrics_file), exist_ok=True)
os.makedirs(os.path.dirname(model_save_path), exist_ok=True)

# Initialize CSV file with headers if it doesn't exist
if not os.path.exists(metrics_file):
    with open(metrics_file, mode="w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow([
            "Epoch", "Train Loss", "Train Accuracy", "Train Precision", 
            "Train Recall", "Train F1", "Validation Loss", 
            "Validation Accuracy", "Validation Precision", 
            "Validation Recall", "Validation F1"
        ])
        

train_ResNet50_optuna_hyperparams(best_params_2, train_dataset, val_dataset)

## Trial trial_results_SGD_StepLR_50epochs_2 -> Trial 27

In [None]:
# Get the best parameters
path_3 = os.path.join(optuna_results_dir, "/trial_results_SGD_StepLR_50epochs_2/trial_27_GOOD.json")
with open(path_3, "r") as f:
    best_params_4 = json.load(f)
    
best_params_4 = best_params_4['hyperparameters']

best_params_4['momentum'] = best_params_4['optimizer']['parameters']['momentum']
best_params_4['gamma'] = best_params_4['scheduler']['parameters']['gamma']
best_params_4['step_size'] = best_params_4['scheduler']['parameters']['step_size']

best_params_4['optimizer'] = {'type': best_params_4['optimizer']}
best_params_4['scheduler'] = {'type': best_params_4['scheduler']}

# Print all hyperparameters dynamically
print("Best Hyperparameters:")
for key, value in best_params_4.items():
    if isinstance(value, dict):  # If the value is a dictionary (e.g., optimizer, scheduler)
        for sub_key, sub_value in value.items():
            print(f"{key}.{sub_key}: {sub_value}")
    else:
        print(f"{key}: {value}")


# Path to save metrics and model
metrics_file = os.path.join(src_path, "/training/ResNet50/final_training/ResNet50_history_4.csv")
model_save_path = os.path.join(src_path, "/training/ResNet50/final_training/ResNet50_final_model_4.pth")

# Ensure directories exist
os.makedirs(os.path.dirname(metrics_file), exist_ok=True)
os.makedirs(os.path.dirname(model_save_path), exist_ok=True)

# Initialize CSV file with headers if it doesn't exist
if not os.path.exists(metrics_file):
    with open(metrics_file, mode="w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow([
            "Epoch", "Train Loss", "Train Accuracy", "Train Precision", 
            "Train Recall", "Train F1", "Validation Loss", 
            "Validation Accuracy", "Validation Precision", 
            "Validation Recall", "Validation F1"
        ])
        

train_ResNet50_optuna_hyperparams(best_params_4, train_dataset, val_dataset)

# Evaluate ResNet50 - Test set

In [17]:
evaluate_resnet50(
    checkpoint_path= os.path.join(src_path, "/training/ResNet50/final_training/ResNet50_final_model_1.pth",
    test_data_path= os.path.join(project_root, "/data/yolo/LUNG_PET_CT/patient_split/test/images)",
    save_path= "test_metrics_model_1_weighted.csv"
)

Evaluating Test Set: 100%|███████████████████| 43/43 [00:26<00:00,  1.63batch/s]


Test Loss: 1.2777
Test Accuracy: 0.4219
Test Precision: 0.4154
Test Recall: 0.4219
Test F1-Score: 0.4166

Classification Report:

              precision    recall  f1-score   support

           A       0.48      0.50      0.49       475
           B       0.33      0.25      0.28       354
           E       0.36      0.40      0.38        43
           G       0.42      0.48      0.45       486

    accuracy                           0.42      1358
   macro avg       0.40      0.40      0.40      1358
weighted avg       0.42      0.42      0.42      1358

Test metrics saved to /Users/catarinasilva/Desktop/Master Thesis/lung_cancer/ResNet50/final_training/test_metrics_model_1_weighted.csv

Confusion Matrix:

[[237  75  22 141]
 [ 87  88   6 173]
 [ 20   4  17   2]
 [150 103   2 231]]





In [25]:
evaluate_resnet50(
    checkpoint_path= os.path.join(src_path, "/training/ResNet50/final_training/ResNet50_final_model_2.pth",
    test_data_path= os.path.join(project_root, "/data/yolo/LUNG_PET_CT/patient_split/test/images)",
    save_path= "test_metrics_model_2_weighted.csv"
)

Evaluating Test Set: 100%|███████████████████| 85/85 [00:26<00:00,  3.17batch/s]


Test Loss: 1.2797
Test Accuracy: 0.4116
Test Precision: 0.4085
Test Recall: 0.4116
Test F1-Score: 0.4048

Classification Report:

              precision    recall  f1-score   support

           A       0.51      0.48      0.50       475
           B       0.30      0.20      0.24       354
           E       0.29      0.47      0.35        43
           G       0.39      0.49      0.44       486

    accuracy                           0.41      1358
   macro avg       0.37      0.41      0.38      1358
weighted avg       0.41      0.41      0.40      1358

Test metrics saved to /Users/catarinasilva/Desktop/Master Thesis/lung_cancer/ResNet50/final_training/test_metrics_model_2_.csv





In [8]:
evaluate_resnet50(
    checkpoint_path= os.path.join(src_path, "/training/ResNet50/final_training/ResNet50_final_model_4.pth",
    test_data_path= os.path.join(project_root, "/data/yolo/LUNG_PET_CT/patient_split/test/images)",
    save_path= "test_metrics_model_4_weighted.csv"
)

Evaluating Test Set: 100%|███████████████████| 43/43 [00:20<00:00,  2.10batch/s]


Test Loss: 1.2239
Test Accuracy: 0.4286
Test Precision: 0.4226
Test Recall: 0.4286
Test F1-Score: 0.4199

Classification Report:

              precision    recall  f1-score   support

           A       0.51      0.51      0.51       475
           B       0.34      0.26      0.30       354
           E       0.30      0.07      0.11        43
           G       0.40      0.50      0.45       486

    accuracy                           0.43      1358
   macro avg       0.39      0.34      0.34      1358
weighted avg       0.42      0.43      0.42      1358

Test metrics saved to /Users/catarinasilva/Desktop/Master Thesis/lung_cancer/ResNet50/final_training/test_metrics_model_4_weighted.csv



