# Fine Tuning

In this notebook we demonstrates how to run the entire experimental pipeline (Hyperparameter Optimization, Final Training, and Evaluation) for a model variation that will be selected in a later cell. We achive this by calling the core functions defined in `hpo.py`, `final_training.py`, and `evaluation.py`.

### Selecting a model
Due to time contraints we have the models in list that will be ran as batches. This will be changed later. For now please only select one of the following cells.

In [9]:
SETUP_IDS_TO_RUN = ['resnet_head_aug', 'resnet_mid_noaug', 'mobilenet_head_noaug']


In [1]:
SETUP_IDS_TO_RUN = ['resnet_head_noaug', 'mobilenet_mid_aug']


In [None]:
SETUP_IDS_TO_RUN = ['resnet_mid_aug', 'mobilenet_head_aug', 'mobilenet_mid_noaug']


In [2]:
print(SETUP_IDS_TO_RUN)

['resnet_head_noaug', 'mobilenet_mid_aug']


In [4]:
import torch
import os
import time # For potential timing if desired, though scripts handle internal timing
import sys
sys.path.append('../scripts') 

# Import shared utilities and configurations 
import utils
import config # Contains HPO_CONFIG_LIST, UNFREEZE_MAPS, output dirs, defaults

# Import the core processing functions from our scripts
from hpo import perform_hpo_for_setup
from final_train import perform_final_training_for_setup
from evaluate import perform_evaluation_for_setup

# --- Define the Single Setup to Run ---
print(f"### Will run pipeline for Setups ID: {SETUP_IDS_TO_RUN} ###")

# --- Global Parameters for this Run (can override config defaults if needed) ---
# These would typically come from argparse in the scripts, here we set them directly
# Or, we can rely on defaults set within config.py and the script functions
HPO_EPOCHS = config.DEFAULT_HPO_EPOCHS
FINAL_TRAIN_EPOCHS = config.DEFAULT_FINAL_TRAIN_EPOCHS
PATIENCE = config.DEFAULT_PATIENCE
BATCH_SIZE = config.DEFAULT_BATCH_SIZE # Using one batch size for simplicity here, scripts might allow separate

# --- Device Setup ---
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(f"Using device: {device}")

# --- Ensure Output Directories Exist ---
config.create_output_dirs()

### Will run pipeline for Setups ID: ['resnet_head_noaug', 'mobilenet_mid_aug'] ###
Using device: cuda
Output directories ensured.


## Stage 1: Hyperparameter Optimization

In [7]:
print(f"\n{'='*5} STAGE 1: Hyperparameter Optimization {'='*5}")

for setup_id in SETUP_IDS_TO_RUN:
    print(f"\nRunning HPO for setup: {setup_id}")
    hpo_start_time = time.time()

    # Call the HPO function from run_hpo.py
    best_hpo_params_found = perform_hpo_for_setup(
        setup_id=setup_id,
        hpo_grid=config.HPO_CONFIG_LIST,
        num_epochs_hpo=HPO_EPOCHS,
        batch_size=BATCH_SIZE,
        device=device
        # The function saves its outputs to files within config.HPO_DIR
    )

    hpo_end_time = time.time()
    print(f"\n{'*'*10} HPO Stage for {setup_id} finished in {(hpo_end_time - hpo_start_time)/60:.2f} minutes {'*'*10}")

    if best_hpo_params_found:
        print(f"Best HPO parameters found for {setup_id}: {best_hpo_params_found}")
        # These are also saved to results/hpo/best_params_{setup_id}.json
    else:
        print(f"HPO did not complete successfully or find best parameters for {setup_id}. Cannot proceed.")
        # Consider raising an error or stopping the notebook here if HPO is critical for next steps



===== STAGE 1: Hyperparameter Optimization =====

Running HPO for setup: resnet_head_noaug
--- Starting HPO for Setup ID: resnet_head_noaug ---
  Trial 1/12 with params: {'lr_backbone': 0.0001, 'lr_head': 0.005, 'weight_decay': 0.01}
Downloading https://thor.robots.ox.ac.uk/pets/images.tar.gz to data/oxford-iiit-pet/images.tar.gz


  2%|▏         | 16.8M/792M [00:03<02:45, 4.67MB/s]


KeyboardInterrupt: 

## Stage 2: Final Model Training

In [None]:
for setup_id in SETUP_IDS_TO_RUN:
    if best_hpo_params_found:  # Proceed only if HPO was successful
        print(f"\n{'='*20} STAGE 2: Final Model Training for {setup_id} {'='*20}")
        final_train_start_time = time.time()

        # Call the final training function from run_final_training.py
        # It will load the best_params file itself based on setup_id.
        saved_model_filepath = perform_final_training_for_setup(
            setup_id=setup_id,
            max_epochs=FINAL_TRAIN_EPOCHS,
            patience=PATIENCE,
            batch_size=BATCH_SIZE,
            device=device
            # The function saves its outputs to files within config.FINAL_TRAINING_DIR
        )

        final_train_end_time = time.time()
        print(f"\n{'*'*10} Final Training Stage for {setup_id} finished in {(final_train_end_time - final_train_start_time)/60:.2f} minutes {'*'*10}")

        if saved_model_filepath:
            print(f"Best model saved to: {saved_model_filepath}")
            # This path is also saved internally by the function to results/final_training/best_model_{setup_id}.pth
        else:
            print(f"Final training did not complete successfully or model was not saved for {setup_id}. Cannot proceed to evaluation.")
    else:
        print(f"\nSkipping Stage 2: Final Model Training for {setup_id} due to HPO failure or no best parameters found.")
        saved_model_filepath = None  # Ensure it's None if HPO failed

## Stage 3: Evaluation

In [None]:
for setup_id in SETUP_IDS_TO_RUN:
    if saved_model_filepath and os.path.exists(saved_model_filepath):  # Proceed only if final training produced a model
        print(f"\n{'='*20} STAGE 3: Evaluation for {setup_id} {'='*20}")
        eval_start_time = time.time()

        # Call the evaluation function from run_evaluation.py
        # It will load the model file itself based on setup_id and config.FINAL_TRAINING_DIR
        perform_evaluation_for_setup(
            setup_id=setup_id,
            device=device
            # The function saves its outputs to files within config.EVALUATION_DIR
        )

        eval_end_time = time.time()
        print(f"\n{'*'*10} Evaluation Stage for {setup_id} finished in {(eval_end_time - eval_start_time)/60:.2f} minutes {'*'*10}")
        print(f"\nEvaluation results (predictions and metrics CSVs) are saved in: {config.EVALUATION_DIR}")
    else:
        print(f"\nSkipping Stage 3: Evaluation for {setup_id} due to final training failure or model not found.")