<div style="display: flex; align-items: center; justify-content: center; flex-wrap: wrap;">
    <div style="flex: 1; max-width: 400px; display: flex; justify-content: center;">
        <img src="https://i.ibb.co/JBPWVYR/Logo-Nova-IMS-Black.png" style="max-width: 50%; height: auto; margin-top: 50px; margin-bottom: 50px;margin-left: 6rem;">
    </div>
    <div style="flex: 2; text-align: center; margin-top: 20px;margin-left: 8rem;">
        <div style="font-size: 28px; font-weight: bold; line-height: 1.2;">
            <span style="color: #22c1c3;">DL Project |</span> <span style="color: #08529C;">Predicting Rare Species from Images using Deep Learning</span>
        </div>
        <div style="font-size: 17px; font-weight: bold; margin-top: 10px;">
            Spring Semester | 2024 - 2025
        </div>
        <div style="font-size: 17px; font-weight: bold;">
            Master in Data Science and Advanced Analytics
        </div>
        <div style="margin-top: 20px;">
            <div>André Silvestre, 20240502</div>
            <div>Diogo Duarte, 20240525</div>
            <div>Filipa Pereira, 20240509</div>
            <div>Maria Cruz, 20230760</div>
            <div>Umeima Mahomed, 20240543</div>
        </div>
        <div style="margin-top: 20px; font-weight: bold;">
            Group 37
        </div>
    </div>
</div>

<div style="background: linear-gradient(to right, #22c1c3, #27b1dd, #2d9cfd, #090979); 
            padding: 1px; color: white; border-radius: 500px; text-align: center;">
</div>

## **📚 Libraries Import**

In [None]:
# System imports
import os
import sys
import time
import datetime
from tqdm import tqdm
from typing_extensions import Self, Any      # For Python 3.10
# from typing import Self, Any               # For Python >3.11

from pathlib import Path

# Data manipulation imports
import numpy as np
import pandas as pd  
import warnings
warnings.filterwarnings("ignore")

# Data visualization imports
import matplotlib.pyplot as plt
import seaborn as sns

# Deep learning imports
import tensorflow as tf
from keras.ops import add
from keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras import Model, Sequential, Input
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Rescaling, Lambda, BatchNormalization, Activation, GlobalAveragePooling2D
# import visualkeras

# Evaluation imports
from keras.metrics import CategoricalAccuracy, AUC, F1Score, Precision, Recall

# Other imports
from itertools import product

# Set the style of the visualization
pd.set_option('future.no_silent_downcasting', True)   # use int instead of float in DataFrame
pd.set_option("display.max_columns", None)            # display all columns

# Disable warnings (FutureWarning)
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

# Set random seed for reproducibility
np.random.seed(2025)

# Set random seed for reproducibility
# Source: https://keras.io/getting_started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development
tf.keras.utils.set_random_seed(2025)

In [None]:
# Creates a SSL context that does not verify the server’s certificate - Needed for downloading pretrained models
# Source: https://precli.readthedocs.io/0.3.4/rules/python/stdlib/ssl_create_unverified_context.html
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
print("TensorFlow Version:", tf.__version__)
print("Is TensorFlow built with CUDA?", tf.test.is_built_with_cuda())
print("GPU Available:", tf.config.list_physical_devices('GPU'))
print("GPU Device Name:", tf.test.gpu_device_name())                                # (if error in Google Colab: Make sure your Hardware accelerator is set to GPU. 
                                                                                    # Runtime > Change runtime type > Hardware Accelerator)

In [None]:
# Get build information from TensorFlow
build_info = tf.sysconfig.get_build_info()

print("TensorFlow version:", tf.__version__)
print("Python version:", sys.version)
print("CUDA version:", build_info.get("cuda_version", "Not available"))
print("cuDNN version:", build_info.get("cudnn_version", "Not available"))

In [None]:
# Import custom module for importing data, visualization, and utilities
import utilities

## **🧮 Import Databases**

#### **🟨 Google Collab**

In [None]:
# # Run in Google Collab to download the dataset already splitted
# # Source: https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drivez
# # Download the file from Google Drive using wget
# !wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate \
#   "https://drive.usercontent.google.com/download?id=1dmr2cGxgM-kp1aXlmd9cQzVCkcl4JTFo&export=download" -O- | \
#   sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p' > /tmp/confirm.txt

# # Read the confirmation token from the temporary file
# with open('/tmp/confirm.txt', 'r') as f:
#     confirm_token = f.read().strip()

# # Download the file using the confirmation token and cookies
# !wget --load-cookies /tmp/cookies.txt \
#   "https://drive.usercontent.google.com/download?id=1dmr2cGxgM-kp1aXlmd9cQzVCkcl4JTFo&export=download&confirm={confirm_token}" \
#   -O /content/RareSpecies_Split.zip

# # Clean up temporary files
# !rm /tmp/cookies.txt /tmp/confirm.txt

# # List files in the /content directory to verify the download
# !ls -lh /content/

# # Unzip the downloaded file
# !unzip /content/RareSpecies_Split.zip -d /content/

# # List the unzipped files to verify
# !ls -lh /content/

### **🖌️ SMOTE (Data Augmentation)**


In [None]:
# Image Generators 
n_classes = 202                                     # Number of classes (we already know this based on previous notebook)
image_size = (224, 224)                             # Image size (224x224)
img_height, img_width = image_size                  # Image dimensions
batch_size = 64                                     # Batch size (keep consistent with previous training)
input_shape = (img_height, img_width, 3)            # Input shape of the model
value_range = (0.0, 1.0)                            # Range of pixel values

In [None]:
# Import SMOTE training data
train_DataAugmentationSMOTE_dir = Path("data/RareSpecies_Split/train_DataAugmentationSMOTE")
val_dir = Path("data/RareSpecies_Split/val")
test_dir = Path("data/RareSpecies_Split/test")

# train_DataAugmentationSMOTE_dir = Path("/content/RareSpecies_Split/train_DataAugmentationSMOTE")
# val_dir = Path("/content/RareSpecies_Split/val")
# test_dir = Path("/content/RareSpecies_Split/test")

# Get class names from directory
class_names = sorted(os.listdir(train_DataAugmentationSMOTE_dir))
class_indices = {name: i for i, name in enumerate(class_names)}

# Import the image dataset from the directory
from utilities import load_images_from_directory
train_DataAugmentationSMOTE_datagen, val_datagen, test_datagen = load_images_from_directory(train_DataAugmentationSMOTE_dir, val_dir, test_dir,
                                                                      labels='inferred', label_mode='categorical',
                                                                      class_names=class_names, color_mode='rgb',
                                                                      batch_size=batch_size, image_size=image_size, seed=2025, 
                                                                      interpolation='bilinear', crop_to_aspect_ratio=False, pad_to_aspect_ratio=False)
# Check the shape of the data (batch_size, img_width, img_height, 3)
for x, y in train_DataAugmentationSMOTE_datagen.take(1):
    print("Train batch shape:", x.shape, y.shape)
for x, y in val_datagen.take(1):
    print("Val batch shape:", x.shape, y.shape)
for x, y in test_datagen.take(1):
    print("Test batch shape:", x.shape, y.shape)

# <a class='anchor' id='3'></a>
<br>
<style>
@import url('https://fonts.cdnfonts.com/css/avenir-next-lt-pro?styles=29974');
</style>

<div style="background: linear-gradient(to right, #22c1c3, #27b1dd, #2d9cfd, #090979); 
            padding: 10px; color: white; border-radius: 300px; text-align: center;">
    <center><h1 style="margin-left: 140px;margin-top: 10px; margin-bottom: 4px; color: white;
                       font-size: 32px; font-family: 'Avenir Next LT Pro', sans-serif;">
        <b>Tuning Best Model</b></h1></center>
</div>

<br><br>

## **💡 Best Model Building Function for Keras Tuner** (ConvNeXtBase)

In [None]:
# Function to tune best combination model
# Source: https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/
#         https://keras.io/guides/transfer_learning/
#         https://keras.io/keras_tuner/getting_started/

In [None]:
# --- Keras Tuner ---
# Source: https://keras.io/keras_tuner/
# Make sure it's installed: 

# !pip install keras-tuner -q
import keras_tuner as kt

In [None]:
from tensorflow.keras.applications import ConvNeXtBase               # Specific model
from tensorflow.keras.applications.convnext import preprocess_input  # Specific preprocessing

def build_model(hp):
    """
    Builds a ConvNeXtBase model with hyperparameters for tuning.

    Args:
        hp (keras_tuner.HyperParameters): Hyperparameters object from Keras Tuner.

    Returns:
        keras.Model: Compiled Keras model.
    """
    
    # Hyperparameters
    # Source: https://keras.io/keras_tuner/api/hyperparameters/
    hp_unfreeze = hp.Boolean("unfreeze_base", default=False)
    # Note: If unfreezing, use a lower learning rate -> We choose LR < 1e-3 (used in all previous models)
    # Source: https://keras.io/guides/transfer_learning/#do-a-round-of-finetuning-of-the-entire-model
    hp_learning_rate = hp.Choice("learning_rate", values=[1e-3, 1e-4, 1e-5])
    hp_optimizer_choice = hp.Choice("optimizer", values=['adam', 'sgd', 'rmsprop'])
    hp_dropout_rate = hp.Float("dropout_rate", min_value=0.3, max_value=0.7, step=0.1)

    # ----------------------------------------------------------------------------------------------------------------
    # Base Model (ConvNeXtBase)
    # Source: https://keras.io/api/applications/convnext/#convnextbase-function
    base_model = ConvNeXtBase(
        include_top=False,
        weights='imagenet',
        input_shape=input_shape
    )

    # Set Trainability
    # Freeze the base model initially
    base_model.trainable = False

    # Unfreeze layers if the hyperparameter is set
    # Source: https://keras.io/guides/transfer_learning/
    if hp_unfreeze:
        print("NOTE: Unfreezing the base ConvNeXtBase model for this trial.")
        base_model.trainable = True
        # Optional: Fine-tune only the top layers instead of all
        # fine_tune_at = 100 # Example: Unfreeze layers from this index onwards
        # for layer in base_model.layers[:fine_tune_at]:
        #     layer.trainable = False

    # Model Construction
    inputs = Input(shape=input_shape, name="Input_Layer")

    # Apply the specific preprocessing for ConvNeXtBase
    # Source: https://keras.io/api/applications/convnext/#preprocessinput-function
    x = Lambda(lambda img: preprocess_input(img), name='ConvNeXtBase_Preprocess')(inputs)

    # Pass through the base model
    x = base_model(x, training=base_model.trainable) # training=False if frozen, True if unfrozen

    # Classification Head (Same as class RareSpeciesCNN_ConvNeXtBase - 7nd notebook)
    x = GlobalAveragePooling2D(name="Global_Average_Pooling")(x)
    x = Dense(128, name="Dense_Layer1")(x)
    # Use the hyperparameter for dropout rate
    x = Dropout(hp_dropout_rate, name="Dropout_Layer")(x)
    outputs = Dense(n_classes, activation='softmax', name="Output_Layer")(x)

    model = Model(inputs, outputs)

    # Optimizer Selection
    # Source: https://keras.io/api/optimizers/
    if hp_optimizer_choice == 'adam':
        optimizer = Adam(learning_rate=hp_learning_rate)               # Removed weight decay for simplicity in tuning
    elif hp_optimizer_choice == 'sgd':
        optimizer = SGD(learning_rate=hp_learning_rate, momentum=0.9) # Added momentum, common for SGD
    elif hp_optimizer_choice == 'rmsprop':
        optimizer = RMSprop(learning_rate=hp_learning_rate)

    # --- Compile Model ---
    # Source: https://keras.io/api/models/model_training_apis/
    loss = CategoricalCrossentropy(name="Loss")
    metrics = [
        CategoricalAccuracy(name="accuracy"),
        F1Score(average="macro", name="f1_score"), # Using F1 Macro as the primary metric
        Precision(name="precision"),
        Recall(name="recall"),
        AUC(name="auc")
    ]

    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

    return model

In [None]:
# Test the build function
hp = kt.HyperParameters()
test_model = build_model(hp)
test_model.summary()

### **⚙️ Keras Tuner Setup (`Hyperband`)**

In [None]:
# Source: https://keras.io/keras_tuner/tuners/hyperband/
tuner = kt.Hyperband(
    hypermodel=build_model,
    objective=kt.Objective("val_f1_score", direction="max"), # Primary objective: maximize validation F1 macro score
    max_epochs=15,                                           # Max epochs *per trial execution* within Hyperband brackets
    factor=3,                                                # Reduction factor for epochs and number of models per bracket
    hyperband_iterations=1,                                  # Number of times to iterate over the full Hyperband algorithm
    seed=2025,                                               # Seed for reproducibility within the tuner
    directory="keras_tuner_dir",
    project_name=f"ConvNeXtBase_FineTune_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}",
    overwrite=True                                           # Set to False to resume previous tuning runs
)

# Print a summary of the search space
tuner.search_space_summary()

### **🚀 Run the Hyperparameter Search**

In [None]:
# Define callbacks for the search phase (applied to each trial)
# Early stopping is crucial here to stop non-promising trials quickly
search_callbacks = [
    EarlyStopping(
        monitor='val_loss', # Monitor validation loss for stopping trials
        patience=5,         # Stop after 5 epochs of no improvement in val_loss
        verbose=1,
        restore_best_weights=True # Restore weights from the epoch with the best val_loss
    )
    # Note: ReduceLROnPlateau is usually NOT used during the search itself,
    #       as the tuner is already exploring different learning rates.
    # We will use it when retraining the final best model.
]

In [None]:
# Start the search
# Use a moderate number of epochs for the overall search.
# `max_epochs` in the tuner definition controls the limit *within* Hyperband.
search_start_time = time.time()
tuner.search(
    train_DataAugmentationSMOTE_datagen,
    epochs=30,                              # Overall epochs budget for the search process
    validation_data=val_datagen,
    callbacks=search_callbacks,
    verbose=1                               # Set to 2 for more detailed logs per epoch, 1 for progress bar per epoch
)
search_time = round(time.time() - search_start_time, 2)
print(f"\n\033[1mHyperparameter Search Completed in\033[0m {search_time} seconds ({str(datetime.timedelta(seconds=search_time))} h)")

#### **📊 Get and Display Best Hyperparameters**

In [None]:
# Get the optimal hyperparameters
# Source: https://keras.io/keras_tuner/api/tuners/base_tuner/
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete.
- \033[1mOptimal Learning Rate:\033[0m {best_hps.get('learning_rate')}
- \033[1mOptimal Optimizer:\033[0m {best_hps.get('optimizer')}
- \033[1mOptimal Dropout Rate:\033[0m {best_hps.get('dropout_rate')}
- \033[1mUnfreeze Base Model:\033[0m {best_hps.get('unfreeze_base')}
""")

# Show summary of top results
tuner.results_summary(num_trials=5)

### **🚂 Retrain the Best Model**

In [None]:
# Create directories for saving model checkpoints and evaluation logs
os.makedirs("./ModelCallbacks/8_ConvNeXtBaseFinetuned", exist_ok=True)      # exist_ok=True | Create directory if it doesn't exist
os.makedirs("./ModelsEvaluation/8_ConvNeXtBaseFinetuned", exist_ok=True)

In [None]:
# Build the model with the optimal hyperparameters
final_model = build_model(best_hps)
final_model.summary()

# Define callbacks for the final training phase
model_name = f"ConvNeXtBase_BestFineTuned_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}"
final_callbacks = [
    ModelCheckpoint(
        f"./ModelCallbacks/8_ConvNeXtBaseFinetuned/BestModel_checkpoint_{model_name}.keras",
        monitor="val_f1_score", # Save based on best validation F1 score
        mode="max",             # Maximize F1 score
        save_best_only=True,
        verbose=1
    ),
    CSVLogger(f"./ModelCallbacks/8_ConvNeXtBaseFinetuned/BestModel_metrics_{model_name}.csv"),
    ReduceLROnPlateau(          # Now we use ReduceLROnPlateau
        monitor='val_loss',
        factor=0.2,
        patience=5,
        verbose=1,
        min_lr=1e-7             # Set a lower minimum learning rate
    ),
    EarlyStopping(
        monitor='val_loss',
        patience=10,              # Allow more patience for final training
        verbose=1,
        restore_best_weights=True # Restore best weights based on val_loss
    )
]

# Train the final model
# Use a larger number of epochs for final training, relying on EarlyStopping
print(f"\nStarting Final Training for {model_name}")
start_time = time.time()
history = final_model.fit(
    train_DataAugmentationSMOTE_datagen,
    epochs=100,                                 # Set a high epoch number, EarlyStopping will handle it
    validation_data=val_datagen,
    callbacks=final_callbacks,
    verbose=1
)
train_time = round(time.time() - start_time, 2)
print(f"\nFinal Training completed in {train_time} seconds ({str(datetime.timedelta(seconds=train_time))} h)")

#### **🧪 Final Model Evaluation**

In [None]:
# Evaluate model
from utilities import plot_metrics
plot_metrics(history, file_path=f"./ModelsEvaluation/8_ConvNeXtBaseFinetuned/8_BestModel_TrainingValidationMetrics_{model_name}.png")

In [None]:
# Evaluate on validation and test sets
train_results = {'accuracy': history.history['accuracy'][-1], 'precision': history.history['precision'][-1], 'recall': history.history['recall'][-1], 'f1_score': history.history['f1_score'][-1], 'auc': history.history['auc'][-1]}
val_results = final_model.evaluate(val_datagen, batch_size=batch_size, return_dict=True, verbose=1)
test_results = final_model.evaluate(test_datagen, batch_size=batch_size, return_dict=True, verbose=1)

In [None]:
# Display results
from utilities import display_side_by_side, create_evaluation_dataframe
results_df = create_evaluation_dataframe(
    model_name="ConvNeXtBase_FineTuned",
    variation=f"LR={best_hps.get('learning_rate')}, Optim={best_hps.get('optimizer')}, Dropout={best_hps.get('dropout_rate'):.2f}, Unfreeze={best_hps.get('unfreeze_base')}",
    train_metrics=train_results,
    val_metrics=val_results,
    test_metrics=test_results,
    train_time=train_time, # This is the final retraining time
    csv_save_path=f"./ModelsEvaluation/8_ConvNeXtBaseFinetuned/8_BestModel_EvaluationResults_{model_name}.csv"
)
display_side_by_side(results_df, super_title="Final Fine-Tuned Model Evaluation Results")

In [None]:
# Plot n right and n wrong predictions
from utilities import plot_predictions
plot_predictions(
    model=final_model,
    class_names=class_names,
    train_dir=train_DataAugmentationSMOTE_dir,
    test_data=test_datagen,
    num_images=10,
    file_path=f"./ModelsEvaluation/8_ConvNeXtBaseFinetuned/8_TestPredictions_{model_name}.png",
)

---

# **🔗 Bibliography/References**

**[[1]](https://keras.io/keras_tuner/api/)** Team, K. (2025). Keras documentation: KerasTuner API documentation. Keras.io. https://keras.io/keras_tuner/api/