In [2]:
from utils import *

2025-07-17 06:14:16.055651: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752707656.069290   47311 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752707656.072990   47311 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1752707656.084074   47311 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752707656.084101   47311 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752707656.084103   47311 computation_placer.cc:177] computation placer alr

In [None]:
from typing import *
import random as rd
import os
import pandas as pd
import shutil
from tqdm.notebook import tqdm
import tensorflow as tf
from tensorflow import keras
import numpy as np
from IPython.display import clear_output
import optuna
import time
from sklearn.utils.class_weight import compute_class_weight
from pynvml import *
import re
import sys
import wandb
wandb.login()

# Get OS information and Python version
print(sys.version)
print(f"Operating System: {os.name}")
try:
    print(f"Platform: {os.uname()}")
except:
    pass
# Check GPU availability
gpu_devices = tf.config.list_physical_devices('GPU')
gpu_name = tf.config.experimental.get_device_details(gpu_devices[0])['device_name']
print(f"GPU Name: {gpu_name}")


# Checkpointing AKA saveopoint
checkpoint_filepath = (
   f'RUN/epoch_{{epoch:03d}}-val_f1score{{val_f1score:.4f}}.keras'
   )
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
                            filepath=checkpoint_filepath,
                            monitor='val_f1score',
                            verbose=0,
                            save_best_only=False,
                            save_weights_only=False,
                            save_freq='epoch'
                            )

# Early Stopping, if validation accuracy does not improve for 20 epochs, then stop.
kerasmodel_earlystopping_callbackls = keras.callbacks.EarlyStopping(monitor='val_f1score', patience=20),

# WANDB Logger,gets the val accuracy and loss from the model and logs it to WANDB. 
class simpleWANDBLogger(keras.callbacks.Callback):
    def __init__(self):
      super(simpleWANDBLogger, self).__init__()

    def on_epoch_end(self, epoch, logs=None):
        loggedtrain, loggedval = logs["accuracy"], logs["val_accuracy"]
        loggedf1train, loggedf1val = logs["f1score"], logs["val_f1score"]
        logged_loss, loggedvalloss = logs["loss"], logs["val_loss"]
        learning_rate = self.model.optimizer.learning_rate.numpy()
        wandb.log({
            "train_accuracy": loggedtrain,
            "val_accuracy": loggedval,
            "train_f1score": loggedf1train,
            "val_f1score": loggedf1val,
            "train_loss": logged_loss,
            "val_loss": loggedvalloss,
            "learning_rate": learning_rate
        })

# Reduce learning rate if validation loss does not improve for 5 epochs, then reduce the learning rate by a factor of 0.5.
reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-6,
    verbose=1
)

# All callbacks wrapped in a list
callbacks=[
    model_checkpoint_callback,
    kerasmodel_earlystopping_callbackls,
    simpleWANDBLogger(),
    reduce_lr,
]

def simplePipeline_CNN(dataset_name: str, batch_size: int = 32, target_sample = 400):
    '''
    THIS IS JUST FOR TESTING PURPOSES, NOT FOR PRODUCTION USE! :c

    A simple pipeline to train a CNN model on a given dataset.

    Parameters
    ------------
    - dataset_name: 
        The name of the dataset to be used for training. The dataset should be located in `DATA/!TEMP/<dataset_name>`.
        The dataset should be split into train and validation sets in the following structure:
        ```
        DATA/!TEMP/<dataset_name>
            ├── train
            │   ├── class1
            │   ├── class2
            │   └── ...
            ├── val
            │   ├── class1
            │   ├── class2
            │   └── ...
        ```
    - batch_size:
        The batch size to be used for training the model. Default is 32.
    - target_sample:
        The target number of samples per class for balancing the dataset. Default is 400.
    '''
    dataset_path = os.path.join("../DATA/!Temp/", dataset_name)

    # Split the datasets into train and validation
    split_train_eval(
        dataset_name,
        composition= [0.85, 0.15],  # 85% train, 15% validation
    )

    # Adjust the class balance to have a target number of samples per class
    adjustClassBalance(
        dataset_name,
        target_sample_class=target_sample
    )

    # Gets generators for all datasets
    train_gen, eval_gen = get_train_eval_Generator(
        dataset_name,
        batch_size= batch_size,
    )
    test_gen = get_test_generator(batch_size=batch_size)

    model = get_model(num_classes= len(train_gen.class_indices))

    # Initialize WANDB
    wandb.init(project="OPTUNA_CNN_NEW", entity="rheyhanfahry", name=dataset_name)

    # Train the model
    st = time.time()
    history = model.fit(
        train_gen,
        validation_data=eval_gen,
        epochs=1000,
        callbacks=callbacks
    )
    et = time.time()
    training_time = et - st

    # Find the best validation accuracy and load the corresponding model
    best_f1score_val = max(history.history['val_f1score'])
    fileName = sorted(os.listdir("RUN"))[history.history["val_f1score"].index(best_f1score_val)]
    model = keras.models.load_model(f'RUN/{fileName}')

    # gets metrics from the model
    st = time.time()
    result = checkMetrics(model, test_gen, plot_confusion_matrix=False, mode="detailed")
    et = time.time()
    inference_time = et - st

    # Logs how much epoch it took before the training stops
    result["Epoch"] = history.history["val_f1score"].index(best_f1score_val)+1

    # Logs time taken for training and inference
    class_table = wandb.Table(dataframe=pd.DataFrame({
        "Training Time (seconds)": [training_time],
        "Inference Time (seconds)": [inference_time]
    }))
    wandb.log({
        "TIME" : class_table
    })

    # log each class metrics to WANDB
    log_class = pd.DataFrame(result["Class Metrics"]).T
    log_class[["True Positives", "False Positives", "False Negatives"]] = log_class[["True Positives", "False Positives", "False Negatives"]].astype(int)
    log_class["Class"] = log_class.index
    log_class = log_class[["Class", "True Positives", "False Positives", "False Negatives", "Precision", "Recall", "F1 Score"]]
    class_table = wandb.Table(dataframe=log_class)
    wandb.log({"each class metrics": class_table})

    # logs overall metrics to WANDB
    result.pop("Class Metrics")
    log_overall = pd.DataFrame(result, index=[0])
    log_overall = log_overall[["Epoch", "Balanced Accuracy", "Precision", "Recall", "F1 Score"]]
    overall_table = wandb.Table(dataframe=log_overall)
    wandb.log({"overall metrics": overall_table})

    # Finish the WANDB run
    wandb.finish()

    # Clean up temporary directories
    shutil.rmtree("RUN", ignore_errors=True)
    shutil.rmtree(dataset_path, ignore_errors=True)

    return log_overall

class getBestHyperparamms_optuna_CNN:
    ''''
    A class to perform hyperparameter optimization for a CNN model using Optuna.

    THE HYPERPARAMETERS TO BE OPTIMIZED ARE:
    - num_of_balancing:
        The number of samples to balance each class to. This is used in the `adjustClassBalance` function.
    - dropout_rate:
        The dropout rate to be used in the model. This is used in the `get_model` function.
    - batch_size:
        The batch size to be used in the model. This is used in the `get_train_eval_Generator` function.
    - learning_rate:
        The starting learning rate to be used in the model. This is used in the `get_model` function.
    - optimizer:
        The optimizer to be used in the model. This is used in the `get_model` function.
    - num_conv_blocks:
        The number of convolutional blocks to be used in the model. This is used in the `get_model` function.
    - conv2d_filters:
        The number of filters in each convolutional layer. This is used in the `get_model` function.
    
    HYPERPARAMETERS ARE DEFINED IN THE `objective` FUNCTION.

    Pipeline:
    - Initialize the class with the number of folds (k) and the directory as a way of autosaving each trial.
    - Call the `start` function to begin the optimization process.
    - The `objective` function will be called by Optuna for each trial, where a hyperparameter set will be suggested.
        - The `stratified_k_fold_cross_validation` function will be used to perform k-fold cross-validation and return the average F1 score for the model where it'll measure the next hyperparameter set to optuna.
            - The `stratified_k_fold_split` function will be used to split the dataset into training and validation sets. Where each validation will be unique each fold.
            - The `adjustClassBalance` function will be used to balance the classes in the training set.
            - the `getBestEpochsandMetrics` function will be used to train the model and get the best epoch and metrics.
            - logs the metrics and hyperparameters to WANDB.
            - The `autosave` function will be called to save the the metrics of the trial to a CSV file.
    - After all trials are completed, the overall metrics will be saved to a CSV file.
    - The average metrics will be calculated and saved to a separate CSV file.
    '''
    def __init__(self, k: int = 4, savedir: str = ""):
        self.K = k
        
        # LOGS FOR ALL TRIALS 
        self.overallMetrics = pd.DataFrame(columns=[
            "fold", "num_of_balancing", "dropout_rate", "batch_size", "learning_rate", "optimizer", "num_conv_blocks", 
            "conv2d_filters", "Epoch", "Balanced Accuracy", "Precision", "Recall", "F1 Score"])
        
        # Dir of autosave
        self.savedir = savedir
        self.index = 0
        if os.path.exists(self.savedir):
            raise FileExistsError("Please provide a new directory name, the directory already exists.")
        os.makedirs(self.savedir)

    def autosave(self):
        '''
        Better be safe than sorry.

        Saves the overall metrics of the trial to a CSV file in the specified directory.
        The CSV file will be named with the current index of the trial.
        '''
        self.overallMetrics.to_csv(os.path.join(self.savedir, f'{self.index}.csv'), index=False)
        self.index +=1

    def start(self, trial: int = 50, ):
        '''
        Starts the hyperparameter optimization process using Optuna.

        Parameters
        ------------
        - trial:
            The number of trials to run for hyperparameter optimization. Default is 50.

            The trials will be used to find the best hyperparameters for the CNN model.
        '''
        # Initialized Optuna
        storage = optuna.storages.RDBStorage(url="sqlite:///optuna_study_CNN.db")
        study = optuna.create_study(direction='maximize', study_name='CNN_Optuna', storage=storage, load_if_exists=True)

        # START THE OPTIMIZATION PROCESS
        study.optimize(self.objective, n_trials=trial)

        # Save the overall metrics to a CSV file
        self.overallMetrics.to_csv("CNN_overallMetrics.csv", index=False)
        
        # Get the average metrics of each fold that has the same hyparameter sets to a CSV file
        filters = [i for i in list(self.overallMetrics.columns) if i not in ["fold", "Balanced Accuracy", "Precision", "Recall", "F1 Score", "Epoch"]]
        self.overallMetrics[filters] = self.overallMetrics[filters].astype(str)
        avg_metrics = self.overallMetrics.drop(columns=["fold", "Epoch"]).groupby(filters).mean().reset_index().sort_values(by="F1 Score", ascending=False)
        avg_metrics["Epoch"] = avg_metrics["Epoch"].astype(int)
        avg_metrics["Balanced Accuracy"] = avg_metrics["Balanced Accuracy"].round(4)
        avg_metrics["Precision"] = avg_metrics["Precision"].round(4)
        avg_metrics["Recall"] = avg_metrics["Recall"].round(4)
        avg_metrics["F1 Score"] = avg_metrics["F1 Score"].round(4)
        avg_metrics.to_csv("CNN_averageMetrics.csv", index=False)

    def objective(self, trial):
        '''
        The objective function for Optuna to optimize the hyperparameters of the CNN model.
        '''
        self.num_of_balancing = trial.suggest_int('target_sample_class', 250, 500, step=25)
        self.dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5)
        self.batch_size = trial.suggest_categorical('batch_size', [8, 16, 32])
        self.learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True)
        self.optimizer = trial.suggest_categorical('optimizer', ["adam", "rmsprop"])
        self.num_conv_blocks = trial.suggest_int('num_conv_blocks', 3, 6)

        # ik this is somewhat ugly, but I want to make sure that the filters are increasing in size.
        filters_layer1 = trial.suggest_int('filters_layer1', 16, 64, step=16)
        filters_layer2 = trial.suggest_int('filters_layer2', filters_layer1, 128, step=16)
        filters_layer3 = trial.suggest_int('filters_layer3', filters_layer2, 256, step=32)
        filters_layer4 = trial.suggest_int('filters_layer4', filters_layer3, 256, step=32)
        filters_layer5 = trial.suggest_int('filters_layer5', filters_layer4, 512, step=32)
        filters_layer6 = trial.suggest_int('filters_layer6', filters_layer5, 512, step=32)
        self.conv2d_filters=[filters_layer1, filters_layer2, filters_layer3,  filters_layer4, filters_layer5, filters_layer6]

        # TEMP DIR name for the dataset. Will be used to create a new dataset for each trial.
        self.dataset_name = f"{self.num_of_balancing}, {self.dropout_rate}, {self.batch_size}, {self.learning_rate}, {self.optimizer}, {self.num_conv_blocks}, {str(self.conv2d_filters)}"
        # Start the K-Fold Cross Validation and get the average F1 score of the validation set.
        avg_f1_val = self.stratified_k_fold_cross_validation()
        trial.set_user_attr('avg_f1_val', avg_f1_val)   # For logging purposes, just in case!

        # Search the next hyperparameter set by focusing on the average F1 score of the validation set
        return avg_f1_val

    def stratified_k_fold_split(self, dst_name, composition = [0.85, 0.15], seed=rd.randint(0, 100000)):
        """
        Splits the dataset into training and validation sets based on the specified composition.

        The dataset is assumed to be located in `DATA/!FINAL/train_val` and will be split into `DATA/!TEMP/<dst_name>/train` and `DATA/!TEMP/<dst_name>/val`.

        The split is done in a stratified manner. THIS MEANS EACH VALIDATION SET WILL BE UNIQUE EACH FOLD. 

        Parameters
        ------------
        - dst_name:
            The name of the destination directory where the split dataset will be saved.
        - composition:
            A list of two floats representing the proportion of training and validation data.
        """
        assert sum(composition) == 1, "Composition must sum to 1"
        
        # Split dataset into train and val equally on each class
        for class_name in tqdm(os.listdir(f'../DATA/!FINAL/train_val'), desc="Splitting data into train and eval"):
            class_path = os.path.join(f'../DATA/!FINAL/train_val', class_name)

            # Create paths for train and val directories
            for split in ["train", "val"]:
                os.makedirs(os.path.join("../DATA/!TEMP/", dst_name, split, class_name), exist_ok=True)

            # Split the images into train and eval sets
            eval_target = int(len(os.listdir(class_path)) * composition[1])
            list_of_images = os.listdir(class_path)

            # Shuffle the images and get unique image for evaluation each fold by excluding blacklisted images
            rd.seed(seed)
            rd.shuffle(list_of_images)
            eval_image = [i for i in list_of_images if i not in self.blacklist[class_name]][:eval_target]
            train_image = [i for i in list_of_images if i not in eval_image]

            # Copy images to the respective directories
            for image in train_image:
                shutil.copy(os.path.join(class_path, image), os.path.join("../DATA/!TEMP", dst_name, "train", class_name, image))
            for image in eval_image:
                shutil.copy(os.path.join(class_path, image), os.path.join("../DATA/!TEMP", dst_name, "val", class_name, image))
            
            # Update the blacklist for the next fold
            self.blacklist[class_name].extend(eval_image)
    
    def stratified_k_fold_cross_validation(self):
        '''
        Performs K-Fold Cross Validation on the dataset.

        Also 
        '''

        # The blacklit dictionary is used to keep track of the images that have been used in the validation set so it won't be used again in the next fold!
        self.blacklist = {"Darna_trima": [], "Parasa_lepida": [], "Setora_nitens": [], "Setothosea_asigna": []}

        f1_fold = []

        for i in tqdm(range(self.K), desc="K-Fold Cross Validation"):
            # Create a unique temporary directory for each fold
            fold_name = f"{i}_fold_{self.dataset_name}"
            dataset_path = os.path.join("../DATA/!TEMP", fold_name)

            # Create the directory for the fold where it composes the train and validation sets
            self.stratified_k_fold_split(fold_name, composition=[0.85, 0.15])
            # Adjust the class balance for the training set
            adjustClassBalance(fold_name, target_sample_class=self.num_of_balancing)

            # Get the train and validation generators
            self.train_generator, self.validation_generator = get_train_eval_Generator(fold_name, batch_size=self.batch_size, target_size=(224, 224))

            # Initialize WANDB
            wandb.init(project="OPTUNA_CNN_NEW", entity="rheyhanfahry", name=fold_name)

            # Train and get the best epochs and its metrics 
            result = self.getBestEpochsandMetrics()

            # Clean up the temporary directories
            shutil.rmtree(dataset_path, ignore_errors=True)

            # logs the hyperparameters used
            result["num_of_balancing"] = self.num_of_balancing
            result["dropout_rate"] = self.dropout_rate
            result["batch_size"] = self.batch_size
            result["learning_rate"] = self.learning_rate
            result["optimizer"] = self.optimizer
            result["num_conv_blocks"] = self.num_conv_blocks
            result["conv2d_filters"] = str(self.conv2d_filters)
            result["fold"] = i + 1
            self.overallMetrics = pd.concat([self.overallMetrics, result], ignore_index=True)

            clear_output(wait=True)

            self.autosave()

            f1_fold.append(result["F1 Score"].values[0])

        return  np.mean(f1_fold)

    def getBestEpochsandMetrics(self):
        """
        Trains the model and gets the best epoch and its metrics.

        This function trains the model using the training generator and validation generator.

        It uses the callbacks defined earlier to save the best model and log the metrics to WANDB
        
        After training, it evaluates the model on the validation generator and logs the metrics to WANDB

        and returns the overall metrics as a DataFrame.
        """
        # Try except block to catch any errors during training and evaluation. 
        try:
            model = get_model(num_classes=len(self.train_generator.class_indices),
                            conv2d_filters=self.conv2d_filters,
                            num_conv_blocks=self.num_conv_blocks,
                            dropout_rate=self.dropout_rate,
                            optimizer=self.optimizer,
                            learning_rate=self.learning_rate)
            
            # Compute class weights to handle class imbalance
            class_weights = compute_class_weight('balanced', classes=np.unique(self.train_generator.classes), y=self.train_generator.classes)
            class_weight_dict = dict(enumerate(class_weights))

            # Start training the model
            st = time.time()
            history = model.fit(
                self.train_generator,
                validation_data=self.validation_generator,
                epochs=1000,
                callbacks=[callbacks],
                verbose=1,
                class_weight=class_weight_dict,)
            et = time.time()
            training_time = et - st

            # Find the best validation accuracy and load the corresponding model
            best_f1score_val = max(history.history['val_f1score'])
            fileName = sorted(os.listdir("RUN"))[history.history["val_f1score"].index(best_f1score_val)]
            model = keras.models.load_model(f'RUN/{fileName}')

            # gets metrics from the model
            st = time.time()
            result = checkMetrics(model, self.validation_generator, plot_confusion_matrix=False, mode="detailed")
            et = time.time()
            inference_time = et - st
            # Logs how much epoch it took before the training stops
            result["Epoch"] = history.history["val_f1score"].index(best_f1score_val)+1

            # Logs time taken for training and inference
            class_table = wandb.Table(dataframe=pd.DataFrame({
                "Training Time (seconds)": [training_time],
                "Inference Time (seconds)": [inference_time]
            }))
            wandb.log({"TIME" : class_table})

            # Logs each class metrics
            log_class = pd.DataFrame(result["Class Metrics"]).T
            log_class[["True Positives", "False Positives", "False Negatives"]] = log_class[["True Positives", "False Positives", "False Negatives"]].astype(int)
            log_class["Class"] = log_class.index
            log_class = log_class[["Class", "True Positives", "False Positives", "False Negatives", "Precision", "Recall", "F1 Score"]]
            class_table = wandb.Table(dataframe=log_class)
            wandb.log({"each class metrics": class_table})

            # Logs overall metrics
            result.pop("Class Metrics")
            log_overall = pd.DataFrame(result, index=[0])
            log_overall = log_overall[["Epoch", "Balanced Accuracy", "Precision", "Recall", "F1 Score"]]
            overall_table = wandb.Table(dataframe=log_overall)
            wandb.log({"overall metrics": overall_table})

            # used_vram, total_vram = get_vram_usage()
            
            del model
            # if (used_vram / total_vram) > 0.7:
            #     gc.collect()
            #     keras.backend.clear_session()
            # Clean up temporary directories
            shutil.rmtree("RUN", ignore_errors=True)

            # Finish the WANDB run
            wandb.finish()
        
        except:
            # If there is an error, send an email and raise an exception so it'll stop!.
            send_email(
                text=f"Error in fold {self.index} with parameters: num_of_balancing={self.num_of_balancing}, dropout_rate={self.dropout_rate}, batch_size={self.batch_size}, learning_rate={self.learning_rate}, optimizer={self.optimizer}, num_conv_blocks={self.num_conv_blocks}, conv2d_filters={self.conv2d_filters}")
            raise Exception(f"FOUND ERROR, PLEASE RESTART!")

        return log_overall

class get_test_score(getBestHyperparamms_optuna_CNN):
    """
    A class to get the test score of a CNN model using the best hyperparameters found by Optuna.
    This class inherits from `getBestHyperparamms_optuna_CNN` and uses the same hyperparameter optimization process.

    Pipeline:
    - Initialize the class with the number of folds (k) and the directory as a way of autosaving each trial.
    - Call the `start` function to begin the optimization process.
    - The `start` function will read the best hyperparameters from the `CNN_averageMetrics.csv` file.
    - For each set of hyperparameters, it will perform K-Fold Cross Validation to get the test score.
    - The `stratified_k_fold_split` function will be used to split the dataset into training and validation sets.
    - The `adjustClassBalance` function will be used to balance the classes in the training set.
    - The `RESULT_getBestEpochsandMetrics` function will be used to train the model and get the best epoch based on val sets.
    - Gets the metrics of test set and logs the metrics and hyperparameters to WANDB.
    - The `autosave` function will be called to save the the metrics of the trial to a CSV file.
    - After all trials are completed, the overall metrics will be saved to a CSV file.
    - The average metrics will be calculated and saved to a separate CSV file.

    """
    def __init__(self, n=10, k=4):

        # Validate input parameters
        assert n > 0, "n must be greater than 0"
        assert k > 0, "k must be greater than 0"

        self.n = n
        self.K = k

        # the logs for all trials
        self.overallMetrics = pd.DataFrame(columns=[
            "fold", "num_of_balancing", "dropout_rate", "batch_size", "learning_rate", "optimizer", "num_conv_blocks", 
            "conv2d_filters", "Epoch", "Balanced Accuracy", "Precision", "Recall", "F1 Score"])
        
        # Directory for autosaving. Hope this syntax will only be run once.
        self.savedir = "TESTRESULTS_CHECKPOINTS"
        self.index = 0
        if os .path.exists(self.savedir):
            raise FileExistsError("Please provide a new directory name, the directory already exists.")
        os.makedirs(self.savedir)

        self.start()

    def start(self):
        """
        Starts the process of getting the test score of the CNN model using the best hyperparameters found by Optuna on the previous syntax.
        This function reads the best hyperparameters from the `CNN_averageMetrics.csv` file and performs K-Fold Cross Validation to get the test score.
        """
        df = pd.read_csv("CNN_averageMetrics.csv")

        # Uses the first n rows of the DataFrame to get the best hyperparameters.
        for params in df.loc[:self.n-1].values:
            # Unpack the parameters
            self.num_of_balancing = params[0]
            self.dropout_rate = params[1]
            self.batch_size = params[2]
            self.learning_rate = params[3]
            self.optimizer = params[4]
            self.num_conv_blocks = params[5]
            self.conv2d_filters = []
            filters = re.findall(r'\d+', params[6]) # This is a shit method, but it works. So I will use it! :D
            for i in range(self.num_conv_blocks):
                self.conv2d_filters.append(int(filters[i]))

            self.test_generator = get_test_generator(batch_size=self.batch_size)

            self.blacklist = {"Darna_trima": [], "Parasa_lepida": [], "Setora_nitens": [], "Setothosea_asigna": []}

            # The name of the run with this hyperparameter sets, will be used for naming temporary dirs
            self.name = f"RESULT {self.num_of_balancing}_{self.dropout_rate}_{self.batch_size}_{self.learning_rate}_{self.optimizer}_{self.num_conv_blocks}_{str(self.conv2d_filters)}"

            # Starts of K-fold cross validation
            for i in tqdm(range(self.K), desc="K-Fold Cross Validation for Test Score"):
                fold_name = f"{i}_fold_{self.name}"

                # Create train and validation dataset
                self.stratified_k_fold_split(fold_name, composition=[0.85, 0.15])
                # Adjust the class balance for the training set
                adjustClassBalance(fold_name, target_sample_class=self.num_of_balancing)

                self.train_generator, self.validation_generator = get_train_eval_Generator(fold_name, batch_size=self.batch_size, target_size=(224, 224))

                # Initialize WANDB for this fold
                wandb.init(project="OPTUNA_CNN_NEW", entity="rheyhanfahry", name=fold_name)

                # Train the model and get the best epoch based on validation sets
                result = self.RESULT_getBestEpochsandMetrics()

                # Clean up the temporary directories
                dataset_path = os.path.join("../DATA/!TEMP", fold_name)
                shutil.rmtree(dataset_path, ignore_errors=True)

                # logs the hyperparameters used
                result["num_of_balancing"] = self.num_of_balancing
                result["dropout_rate"] = self.dropout_rate
                result["batch_size"] = self.batch_size
                result["learning_rate"] = self.learning_rate
                result["optimizer"] = self.optimizer
                result["num_conv_blocks"] = self.num_conv_blocks
                result["conv2d_filters"] = str(self.conv2d_filters)
                result["fold"] = i + 1
                self.overallMetrics = pd.concat([self.overallMetrics, result], ignore_index=True)

                clear_output(wait=True)

                self.autosave()

        # Save the overall metrics to a CSV file
        self.overallMetrics.to_csv("CNN_TEST_RESULTS_OVERALL.csv", index=False)

        # Get the average metrics of each fold that has the same hyperparameter sets to a CSV file
        filters = [i for i in list(self.overallMetrics.columns) if i not in ["fold", "Balanced Accuracy", "Precision", "Recall", "F1 Score", "Epoch"]]
        self.overallMetrics[filters] = self.overallMetrics[filters].astype(str)
        avg_metrics = self.overallMetrics.drop(columns=["fold", "Epoch"]).groupby(filters).mean().reset_index().sort_values(by="F1 Score", ascending=False)
        avg_metrics["Balanced Accuracy"] = avg_metrics["Balanced Accuracy"].round(4)
        avg_metrics["Precision"] = avg_metrics["Precision"].round(4)
        avg_metrics["Recall"] = avg_metrics["Recall"].round(4)
        avg_metrics["F1 Score"] = avg_metrics["F1 Score"].round(4)
        avg_metrics.sort_values(by="F1 Score", ascending=False)
        avg_metrics.to_csv("CNN_TEST_RESULTS.csv", index=False)

    def RESULT_getBestEpochsandMetrics(self):
        '''
        Trains the model and gets the best epoch based from validation sets.

        Then evaluates the model on the test set and logs the metrics to WANDB.
        '''
        try:
            model = get_model(num_classes=len(self.train_generator.class_indices),
                            conv2d_filters=self.conv2d_filters,
                            num_conv_blocks=self.num_conv_blocks,
                            dropout_rate=self.dropout_rate,
                            optimizer=self.optimizer,
                            learning_rate=self.learning_rate)
            # Compute class weights to handle class imbalance
            class_weights = compute_class_weight('balanced', classes=np.unique(self.train_generator.classes), y=self.train_generator.classes)
            class_weight_dict = dict(enumerate(class_weights))

            # Start training the model
            st = time.time()
            history = model.fit(
                self.train_generator,
                validation_data=self.validation_generator,
                epochs=1000,
                callbacks=[callbacks],
                verbose=1,
                class_weight=class_weight_dict)
            et = time.time()
            training_time = et - st

            # Find the best validation f1_accuracy and load the corresponding model
            best_f1score_val = max(history.history['val_f1score'])
            fileName = sorted(os.listdir("RUN"))[history.history["val_f1score"].index(best_f1score_val)]
            model = keras.models.load_model(f'RUN/{fileName}')

            # gets test sets metrics from the model
            st = time.time()
            result = checkMetrics(model, self.test_generator, plot_confusion_matrix=False, mode="detailed")
            et = time.time()
            inference_time = et - st

            # Logs how much epoch it took before the training stops
            result["Epoch"] = history.history["val_f1score"].index(best_f1score_val)+1

            # Logs time taken for training and inference
            class_table = wandb.Table(dataframe=pd.DataFrame({
                "Training Time (seconds)": [training_time],
                "Inference Time (seconds)": [inference_time]
            }))
            wandb.log({"TIME" : class_table})

            # each class metrics
            log_class = pd.DataFrame(result["Class Metrics"]).T
            log_class[["True Positives", "False Positives", "False Negatives"]] = log_class[["True Positives", "False Positives", "False Negatives"]].astype(int)
            log_class["Class"] = log_class.index
            log_class = log_class[["Class", "True Positives", "False Positives", "False Negatives", "Precision", "Recall", "F1 Score"]]
            class_table = wandb.Table(dataframe=log_class)
            wandb.log({"each class metrics": class_table})

            # overall metrics
            result.pop("Class Metrics")
            log_overall = pd.DataFrame(result, index=[0])
            log_overall = log_overall[["Epoch", "Balanced Accuracy", "Precision", "Recall", "F1 Score"]]
            overall_table = wandb.Table(dataframe=log_overall)
            wandb.log({"overall metrics": overall_table})

            # Remove the temp RUN directory
            shutil.rmtree("RUN", ignore_errors=True)

            # Finish the WANDB run
            wandb.finish()
        
        except:
            # If there is an error, send an email and raise an exception so it'll stop!
            send_email(
                text=f"Error in TEST RESULT fold {self.index} with parameters: num_of_balancing={self.num_of_balancing}, dropout_rate={self.dropout_rate}, batch_size={self.batch_size}, learning_rate={self.learning_rate}, optimizer={self.optimizer}, num_conv_blocks={self.num_conv_blocks}, conv2d_filters={self.conv2d_filters}")
            raise Exception(f"FOUND ERROR, PLEASE RESTART!")
        
        return log_overall

3.11.11 (main, Dec 11 2024, 16:28:39) [GCC 11.2.0]
Operating System: posix
Platform: posix.uname_result(sysname='Linux', nodename='DESKTOP-PTROTCH', release='5.15.167.4-microsoft-standard-WSL2', version='#1 SMP Tue Nov 5 00:21:55 UTC 2024', machine='x86_64')
GPU Name: NVIDIA GeForce RTX 3080


In [None]:
# If error occurs primarly VRAM issues, restart the notebook and re run the code!
a = getBestHyperparamms_optuna_CNN(k=4, savedir="CNN_12")
a.start(trial=100)

In [112]:
import pandas as pd
import re
import os
# This code collects the results from all CNN checkpoints and creates all the metrics in a single CSV file while also creating an average metrics CSV file.

temp = pd.DataFrame()
CNN_checkpoint_dirs = [i for i in os.listdir("CNN_RUN") if i.startswith("CNN") and os.path.isdir(os.path.join("CNN_RUN", i))]

for DIR in CNN_checkpoint_dirs:
    latest_csv_index = ((sorted([int(re.search(r"\d+", i)[0]) for i in os.listdir(os.path.join("CNN_RUN", DIR)) if not i.startswith("ERROR")])[-1]))
    latest_csv_path = os.path.join("CNN_RUN", DIR, f'{latest_csv_index}.csv')
    table = pd.read_csv(latest_csv_path)
    temp = pd.concat([temp, table], ignore_index=True)

# Just in case if it fails to converge. Remove them bcz bad accuracies.    
temp = temp[temp['Epoch'] > 10]
filters = [i for i in list(temp.columns) if i not in ["fold", "Balanced Accuracy", "Precision", "Recall", "F1 Score", "Epoch"]]
temp[filters] = temp[filters].astype(str)
# Remove Config with less than 4 folds
unique_configs = temp.groupby(filters).filter(lambda x: x['fold'].nunique() < 4)
temp = temp[~temp.index.isin(unique_configs.index)]
temp = temp.iloc[: 400]


temp = temp.reset_index(drop=True)
temp.to_csv("CNN_overallMetrics.csv", index=False)

temp["trial"] = [j+1 for j in range(100) for i in range(4)]
avg_metrics = temp.drop(columns=["fold", "Epoch"]).groupby(filters).mean().reset_index().sort_values(by="trial", ascending=True)

avg_metrics["Balanced Accuracy"] = avg_metrics["Balanced Accuracy"].round(4)
avg_metrics["Precision"] = avg_metrics["Precision"].round(4)
avg_metrics["Recall"] = avg_metrics["Recall"].round(4)
avg_metrics["F1 Score"] = avg_metrics["F1 Score"].round(4)
avg_metrics.to_csv("UNSORTED_AVERAGE_CNN_TUNING.csv", index=False)

avg_metrics = avg_metrics.sort_values(by="F1 Score", ascending=False)

avg_metrics.to_csv("CNN_averageMetrics.csv", index=False)

In [20]:
avg_metrics

Unnamed: 0,num_of_balancing,dropout_rate,batch_size,learning_rate,optimizer,num_conv_blocks,conv2d_filters,Balanced Accuracy,Precision,Recall,F1 Score
1,250,0.2096187982506599,16,0.0001320860146001,adam,6,"[64, 64, 224, 256, 352, 448]",0.9298,0.9212,0.9298,0.9237
2,250,0.2106727953816496,8,7.845771933593588e-05,rmsprop,4,"[64, 96, 192, 192, 256, 320]",0.8818,0.8636,0.8818,0.8697
3,250,0.238016296900337,16,0.0007062700618815,rmsprop,4,"[48, 128, 160, 192, 320, 416]",0.9206,0.9044,0.9206,0.9105
4,250,0.2556817738581848,8,0.0005022153460409,rmsprop,6,"[32, 32, 192, 224, 320, 480]",0.9266,0.9172,0.9266,0.9204
5,250,0.2699098581523874,16,0.0001238216661377,rmsprop,6,"[16, 96, 160, 160, 192, 224]",0.9353,0.9217,0.9353,0.9265
...,...,...,...,...,...,...,...,...,...,...,...
96,500,0.3316622070043765,8,8.995545138853484e-05,rmsprop,5,"[48, 112, 112, 112, 240, 272]",0.9436,0.9408,0.9436,0.9414
97,500,0.4107515906788274,32,0.0004081973850847,rmsprop,5,"[64, 96, 192, 256, 256, 320]",0.9504,0.9566,0.9504,0.9530
98,500,0.4178215658946088,16,0.0004476545411722,adam,4,"[64, 112, 240, 240, 336, 368]",0.9231,0.9136,0.9232,0.9172
99,500,0.428617501730154,16,0.0003490759393051,rmsprop,6,"[48, 80, 144, 208, 464, 464]",0.9525,0.9551,0.9525,0.9532


In [None]:
get_test_score(n=10, k=4)

## DEBUG, SINGLE RUN

In [4]:
shutil.rmtree("../DATA/!Temp/", ignore_errors=True)
shutil.rmtree("RUN", ignore_errors=True)
shutil.rmtree("wandb", ignore_errors=True)

In [None]:
# simplePipeline_CNN(
#     dataset_name="dataset_1",
#     batch_size=32,
#     target_sample=400
# )