In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import os
import cv2 # Using OpenCV for image loading/resizing
from tqdm.notebook import tqdm # For progress bars

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Ensure you have a GPU available for faster training
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
if not tf.config.experimental.list_physical_devices('GPU'):
    print("WARNING: No GPU found. Training will be extremely slow on CPU. Please enable GPU runtime: Runtime -> Change runtime type -> GPU.")

# ----------------------------------------------------------------------
# Step 1: Mount Google Drive (if you haven't already in this session)
# ----------------------------------------------------------------------
from google.colab import drive
drive.mount('/content/drive')

# ----------------------------------------------------------------------
# Step 2: Load and Prepare Your Full Dataset
# ----------------------------------------------------------------------
# --- IMPORTANT: Adjust this path to your combined dataset folder ---
data_dir = '/content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/combined_dataset_for_kfold'

image_paths = []
labels = []

# Assuming your class folders are directly inside data_dir
class_names = sorted(os.listdir(data_dir))
class_to_idx = {name: i for i, name in enumerate(class_names)}

print(f"Detected classes: {class_names}")

for class_name in class_names:
    class_path = os.path.join(data_dir, class_name)
    if os.path.isdir(class_path): # Ensure it's a directory
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            if os.path.isfile(img_path): # Ensure it's a file
                image_paths.append(img_path)
                labels.append(class_to_idx[class_name])
    else:
        print(f"Warning: '{class_path}' is not a directory. Skipping.")

image_paths = np.array(image_paths)
labels = np.array(labels)

print(f"\nTotal number of samples loaded for K-fold: {len(image_paths)}")
print(f"Initial Class distribution: {np.bincount(labels)}")

# Define input shape for InceptionV3
IMG_HEIGHT, IMG_WIDTH = 299, 299 # InceptionV3 typically uses 299x299
NUM_CLASSES = len(class_names)
BATCH_SIZE = 32 # Adjust as needed based on GPU memory

# Data preprocessing function (loading, resizing, normalization)
# This will be mapped to the tf.data.Dataset
def load_and_preprocess_image(image_path, label, img_size=(IMG_HEIGHT, IMG_WIDTH)):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, img_size)
    img = preprocess_input(img) # InceptionV3 specific preprocessing
    label = tf.one_hot(label, NUM_CLASSES) # Convert to one-hot encoding
    return img, label

# Data augmentation function (to be used in tf.data.Dataset map for training)
def augment_image(image, label):
    # Apply your augmentations here (based on your previous oversampling strategy)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.2)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    # The following lines were causing the AttributeError and are commented out:
    # image = tf.image.random_zoom(image, zoom_factor=(0.8, 1.2)) # Example zoom
    # image = tf.image.random_rotation(image, factor=0.1) # Example rotation

    # If you still want these augmentations, consider using:
    # - tf.keras.layers.RandomRotation, RandomZoom, etc. applied after batching
    # - Or updating your TensorFlow version (e.g., to 2.3+)

    return image, label

# ----------------------------------------------------------------------
# Step 3: Define Your Model Building Function
# ----------------------------------------------------------------------
def build_inceptionv3_model(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3), num_classes=NUM_CLASSES):
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)

    # Freeze base model layers (optional, but common for transfer learning)
    # You can unfreeze some layers for fine-tuning after initial training if desired
    for layer in base_model.layers:
        layer.trainable = False

    # Add custom classification head
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x) # Increased units slightly from previous example for potential improvement
    predictions = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# ----------------------------------------------------------------------
# Step 4: Implement Your Hybrid Balancing Strategy (within the fold)
# ----------------------------------------------------------------------
from imblearn.under_sampling import RandomUnderSampler
from sklearn.utils import class_weight

def apply_hybrid_balancing_to_fold(X_train_paths, y_train_labels, class_names):
    # 1. Undersampling on the majority class
    # Use the indices for sampling to apply back to paths and labels
    indices = np.arange(len(X_train_paths)).reshape(-1, 1) # Dummy features for imblearn

    rus = RandomUnderSampler(sampling_strategy='majority', random_state=42)
    resampled_indices_dummy, y_resampled_labels = rus.fit_resample(indices, y_train_labels)

    X_resampled_paths = X_train_paths[resampled_indices_dummy.flatten()]

    # 2. Class Weights (implicitly handles "oversampling" in terms of loss contribution)
    # These weights tell the loss function to give more importance to the minority class samples
    unique_classes = np.unique(y_resampled_labels)
    # Ensure correct ordering of classes if needed, for class_weight
    weights = class_weight.compute_class_weight(class_weight='balanced',
                                                classes=np.array(range(len(class_names))), # Use all possible class indices
                                                y=y_resampled_labels)
    class_weights_dict = dict(zip(range(len(class_names)), weights))

    return X_resampled_paths, y_resampled_labels, class_weights_dict

# ----------------------------------------------------------------------
# Step 5: K-Fold Cross-Validation Loop
# ----------------------------------------------------------------------
N_SPLITS = 5 # Using 5 folds is a good balance between computational cost and statistical robustness
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

fold_results = [] # To store metrics for each fold

for fold_idx, (train_index, val_index) in enumerate(skf.split(image_paths, labels)):
    print(f"\n--- Starting Fold {fold_idx + 1}/{N_SPLITS} ---")

    # Split data for the current fold
    X_train_fold_paths, X_val_fold_paths = image_paths[train_index], image_paths[val_index]
    y_train_fold_labels, y_val_fold_labels = labels[train_index], labels[val_index]

    print(f"  Train samples in fold: {len(X_train_fold_paths)}")
    print(f"  Validation samples in fold: {len(X_val_fold_paths)}")
    print(f"  Train labels distribution (before balancing): {np.bincount(y_train_fold_labels)}")
    print(f"  Val labels distribution: {np.bincount(y_val_fold_labels)}")

    # Apply Hybrid Balancing ONLY to the training data of the current fold
    X_train_balanced_paths, y_train_balanced_labels, class_weights = \
        apply_hybrid_balancing_to_fold(X_train_fold_paths, y_train_fold_labels, class_names)

    print(f"  Train samples AFTER balancing (undersampled): {len(X_train_balanced_paths)}")
    print(f"  Balanced Train labels distribution: {np.bincount(y_train_balanced_labels)}")
    print(f"  Class Weights for training: {class_weights}")


    # Create tf.data.Dataset for training and validation
    # Training Dataset: Load, Augment, Shuffle, Batch, Prefetch
    train_ds = tf.data.Dataset.from_tensor_slices((X_train_balanced_paths, y_train_balanced_labels))
    train_ds = train_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    train_ds = train_ds.map(augment_image, num_parallel_calls=tf.data.AUTOTUNE) # Apply augmentation
    train_ds = train_ds.shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    # Validation Dataset: Load, Batch, Prefetch (no augmentation for validation)
    val_ds = tf.data.Dataset.from_tensor_slices((X_val_fold_paths, y_val_fold_labels))
    val_ds = val_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    val_ds = val_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    # Build and compile a fresh model for each fold
    model = build_inceptionv3_model()

    # Callbacks
    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True), # Increased patience
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=0.00001, verbose=1) # Increased patience
    ]

    # Train the model
    print("  Training model...")
    history = model.fit(train_ds,
                        epochs=100, # Max epochs, EarlyStopping will stop it
                        validation_data=val_ds,
                        class_weight=class_weights, # Apply class weights during training
                        callbacks=callbacks,
                        verbose=1)

    # Evaluate the model on the validation data for the current fold
    print("  Evaluating model on validation set...")
    y_true_val = []
    y_pred_proba_val = []

    # Iterate through the validation dataset to collect true labels and predictions
    # Use tqdm for progress bar if validation set is large
    for images, labels_one_hot in tqdm(val_ds, desc="Predicting on validation set"):
        y_true_val.extend(np.argmax(labels_one_hot.numpy(), axis=1))
        y_pred_proba_val.extend(model.predict(images))

    y_pred_val = np.argmax(np.array(y_pred_proba_val), axis=1)

    # Calculate overall metrics for the current fold
    fold_accuracy = accuracy_score(y_true_val, y_pred_val)

    # Get detailed per-class metrics using classification_report
    report = classification_report(y_true_val, y_pred_val, target_names=class_names, output_dict=True, zero_division=0)

    # Determine the minority class from the initial data distribution
    minority_class_label_idx = np.bincount(labels).argmin()
    minority_class_name = class_names[minority_class_label_idx]

    minority_precision = report[minority_class_name]['precision']
    minority_recall = report[minority_class_name]['recall']
    minority_f1 = report[minority_class_name]['f1-score']

    print(f"Fold {fold_idx + 1} Metrics:")
    print(f"  Overall Accuracy: {fold_accuracy:.4f}")
    print(f"  Minority Class ({minority_class_name}):")
    print(f"    Precision: {minority_precision:.4f}")
    print(f"    Recall: {minority_recall:.4f}")
    print(f"    F1-score: {minority_f1:.4f}")
    print("  Full Classification Report:\n", classification_report(y_true_val, y_pred_val, target_names=class_names, zero_division=0))

    fold_results.append({
        'overall_accuracy': fold_accuracy,
        'minority_precision': minority_precision,
        'minority_recall': minority_recall,
        'minority_f1': minority_f1
    })

    # Clear session to free up memory before next fold
    tf.keras.backend.clear_session()
    del model # Delete model instance to release resources

# ----------------------------------------------------------------------
# Step 6: Aggregate and Report Final Results
# ----------------------------------------------------------------------
print("\n--- K-Fold Cross-Validation Final Results ---")
avg_overall_accuracy = np.mean([res['overall_accuracy'] for res in fold_results])
std_overall_accuracy = np.std([res['overall_accuracy'] for res in fold_results])

avg_minority_precision = np.mean([res['minority_precision'] for res in fold_results])
std_minority_precision = np.std([res['minority_precision'] for res in fold_results])

avg_minority_recall = np.mean([res['minority_recall'] for res in fold_results])
std_minority_recall = np.std([res['minority_recall'] for res in fold_results])

avg_minority_f1 = np.mean([res['minority_f1'] for res in fold_results])
std_minority_f1 = np.std([res['minority_f1'] for res in fold_results])

print(f"Average Overall Accuracy: {avg_overall_accuracy:.4f} +/- {std_overall_accuracy:.4f}")
print(f"Average Minority Precision: {avg_minority_precision:.4f} +/- {std_minority_precision:.4f}")
print(f"Average Minority Recall: {avg_minority_recall:.4f} +/- {std_minority_recall:.4f}")
print(f"Average Minority F1-score: {avg_minority_f1:.4f} +/- {std_minority_f1:.4f}")

Num GPUs Available:  1
Mounted at /content/drive
Detected classes: ['Arctocephalus_galapagoensis', 'Zalophus_wollebaeki']

Total number of samples loaded for K-fold: 336
Initial Class distribution: [ 45 291]

--- Starting Fold 1/5 ---
  Train samples in fold: 268
  Validation samples in fold: 68
  Train labels distribution (before balancing): [ 36 232]
  Val labels distribution: [ 9 59]
  Train samples AFTER balancing (undersampled): 72
  Balanced Train labels distribution: [36 36]
  Class Weights for training: {0: np.float64(1.0), 1: np.float64(1.0)}
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
  Training model...
Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 15s/step - accuracy: 0.4106 - loss: 1.0694 - val_accuracy: 0.1471 - val_loss: 2.6856 - learning_rate: 0.0010

Predicting on validation set:   0%|          | 0/3 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
Fold 1 Metrics:
  Overall Accuracy: 0.9706
  Minority Class (Arctocephalus_galapagoensis):
    Precision: 0.8182
    Recall: 1.0000
    F1-score: 0.9000
  Full Classification Report:
                              precision    recall  f1-score   support

Arctocephalus_galapagoensis       0.82      1.00      0.90         9
        Zalophus_wollebaeki       1.00      0.97      0.98        59

                   accuracy                           0.97        68
                  macro avg       0.91      0.98      0.94        68
               weighted avg       0.98      0.97      0.97        68


--- Starting Fold 2/5 ---
  Train samples in fold: 269
  Validation samples in fold: 67
  Train labels distribution (before balancing): [ 36 233]
  Val labels distribution: [ 9 58]
  Tr

Predicting on validation set:   0%|          | 0/3 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 199ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Fold 2 Metrics:
  Overall Accuracy: 0.9552
  Minority Class (Arctocephalus_galapagoensis):
    Precision: 0.8000
    Recall: 0.8889
    F1-score: 0.8421
  Full Classification Report:
                              precision    recall  f1-score   support

Arctocephalus_galapagoensis       0.80      0.89      0.84         9
        Zalophus_wollebaeki       0.98      0.97      0.97        58

                   accuracy                           0.96        67
                  macro avg       0.89      0.93      0.91        67
               weighted avg       0.96      0.96      0.96        67


--- Starting Fold 3/5 ---
  Train samples in fold: 269
  Validation samples in fold: 67
  Train labels distribution (before balancing): [ 36 233]
  Val labels distribution: [ 9 58]
  Tr

Predicting on validation set:   0%|          | 0/3 [00:00<?, ?it/s]



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Fold 3 Metrics:
  Overall Accuracy: 0.9552
  Minority Class (Arctocephalus_galapagoensis):
    Precision: 0.8000
    Recall: 0.8889
    F1-score: 0.8421
  Full Classification Report:
                              precision    recall  f1-score   support

Arctocephalus_galapagoensis       0.80      0.89      0.84         9
        Zalophus_wollebaeki       0.98      0.97      0.97        58

                   accuracy                           0.96        67
                  macro avg       0.89      0.93      0.91        67
               weighted avg       0.96      0.96      0.96        67


--- Starting Fold 4/5 ---
  Train samples in fold: 269
  Validation samples in fold: 67
  Train labels distribution (before balancing): [ 36 233]
  Val labels distribution: [ 9 58]
  Train samples AFTER balancing (undersampled): 72
  Balanced Train labels distribution: [36 36]
  Class Weights for training: {0: np.float64(1.0),

Predicting on validation set:   0%|          | 0/3 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
Fold 4 Metrics:
  Overall Accuracy: 0.9403
  Minority Class (Arctocephalus_galapagoensis):
    Precision: 0.7778
    Recall: 0.7778
    F1-score: 0.7778
  Full Classification Report:
                              precision    recall  f1-score   support

Arctocephalus_galapagoensis       0.78      0.78      0.78         9
        Zalophus_wollebaeki       0.97      0.97      0.97        58

                   accuracy                           0.94        67
                  macro avg       0.87      0.87      0.87        67
               weighted avg       0.94      0.94      0.94        67


--- Starting Fold 5/5 ---
  Train samples in fold: 269
  Validation samples in fold: 67
  Train labels distribution (before balancing): [ 36 233]
  Val labels distribution: [ 9 58]
  Tr

Predicting on validation set:   0%|          | 0/3 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Fold 5 Metrics:
  Overall Accuracy: 0.9254
  Minority Class (Arctocephalus_galapagoensis):
    Precision: 0.7500
    Recall: 0.6667
    F1-score: 0.7059
  Full Classification Report:
                              precision    recall  f1-score   support

Arctocephalus_galapagoensis       0.75      0.67      0.71         9
        Zalophus_wollebaeki       0.95      0.97      0.96        58

                   accuracy                           0.93        67
                  macro avg       0.85      0.82      0.83        67
               weighted avg       0.92      0.93      0.92        67


--- K-Fold Cross-Validation Final Results ---
Average Overall Accuracy: 0.9493 +/- 0.0153
Average Minority Precision: 0.7892 +/- 0.0234
Average Minority Recall: 0.8444 +/- 0.1133
Averag

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix # Added confusion_matrix
import os
import cv2
from tqdm.notebook import tqdm
import pandas as pd
import json

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Ensure you have a GPU available for faster training
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
if not tf.config.experimental.list_physical_devices('GPU'):
    print("WARNING: No GPU found. Training will be extremely slow on CPU. Please enable GPU runtime: Runtime -> Change runtime type -> GPU.")

# ----------------------------------------------------------------------
# Step 1: Mount Google Drive (if not already mounted)
# ----------------------------------------------------------------------
from google.colab import drive
drive.mount('/content/drive')

# ----------------------------------------------------------------------
# Step 2: Load and Prepare Your Full Dataset
# ----------------------------------------------------------------------
# --- IMPORTANT: This path should point to your combined dataset folder ---
data_dir = '/content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/combined_dataset_for_kfold'

image_paths = []
labels = []

class_names = sorted(os.listdir(data_dir))
class_to_idx = {name: i for i, name in enumerate(class_names)}

print(f"Detected classes: {class_names}")

for class_name in class_names:
    class_path = os.path.join(data_dir, class_name)
    if os.path.isdir(class_path):
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            if os.path.isfile(img_path):
                image_paths.append(img_path)
                labels.append(class_to_idx[class_name])
    else:
        print(f"Warning: '{class_path}' is not a directory. Skipping.")

image_paths = np.array(image_paths)
labels = np.array(labels)

print(f"\nTotal number of samples loaded for K-fold: {len(image_paths)}")
print(f"Initial Class distribution: {np.bincount(labels)}")

# Define input shape for InceptionV3
IMG_HEIGHT, IMG_WIDTH = 299, 299
NUM_CLASSES = len(class_names)
BATCH_SIZE = 32

# Data preprocessing function
def load_and_preprocess_image(image_path, label, img_size=(IMG_HEIGHT, IMG_WIDTH)):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, img_size)
    img = preprocess_input(img)
    label = tf.one_hot(label, NUM_CLASSES)
    return img, label

# Data augmentation function
def augment_image(image, label):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.2)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    return image, label

# ----------------------------------------------------------------------
# Step 3: Define Your Model Building Function
# ----------------------------------------------------------------------
def build_inceptionv3_model(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3), num_classes=NUM_CLASSES):
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers:
        layer.trainable = False
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# ----------------------------------------------------------------------
# Step 4: Implement Your Hybrid Balancing Strategy (within the fold)
# ----------------------------------------------------------------------
from imblearn.under_sampling import RandomUnderSampler
from sklearn.utils import class_weight

def apply_hybrid_balancing_to_fold(X_train_paths, y_train_labels, class_names):
    indices = np.arange(len(X_train_paths)).reshape(-1, 1)
    rus = RandomUnderSampler(sampling_strategy='majority', random_state=42)
    resampled_indices_dummy, y_resampled_labels = rus.fit_resample(indices, y_train_labels)
    X_resampled_paths = X_train_paths[resampled_indices_dummy.flatten()]

    unique_classes = np.unique(y_resampled_labels)
    weights = class_weight.compute_class_weight(class_weight='balanced',
                                                classes=np.array(range(len(class_names))),
                                                y=y_resampled_labels)
    class_weights_dict = dict(zip(range(len(class_names)), weights))

    return X_resampled_paths, y_resampled_labels, class_weights_dict

# ----------------------------------------------------------------------
# Define where to save K-fold results
# This will create a 'kfold_results' folder inside 'my_galapagos_seals_dataset'
# ----------------------------------------------------------------------
kfold_results_base_dir = os.path.join(os.path.dirname(data_dir), 'kfold_results')
os.makedirs(kfold_results_base_dir, exist_ok=True)
print(f"\nK-fold results will be saved in: {kfold_results_base_dir}")

# ----------------------------------------------------------------------
# Step 5: K-Fold Cross-Validation Loop (MODIFIED for saving results & CM)
# ----------------------------------------------------------------------
N_SPLITS = 5
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

fold_results = []
all_confusion_matrices = [] # List to store confusion matrix for each fold

for fold_idx, (train_index, val_index) in enumerate(skf.split(image_paths, labels)):
    print(f"\n--- Starting Fold {fold_idx + 1}/{N_SPLITS} ---")

    # Define and create directory for current fold's results
    current_fold_dir = os.path.join(kfold_results_base_dir, f'fold_{fold_idx}')
    os.makedirs(current_fold_dir, exist_ok=True)
    print(f"  Results for Fold {fold_idx + 1} will be saved in: {current_fold_dir}")

    # Split data for the current fold
    X_train_fold_paths, X_val_fold_paths = image_paths[train_index], image_paths[val_index]
    y_train_fold_labels, y_val_fold_labels = labels[train_index], labels[val_index]

    print(f"  Train samples in fold: {len(X_train_fold_paths)}")
    print(f"  Validation samples in fold: {len(X_val_fold_paths)}")
    print(f"  Train labels distribution (before balancing): {np.bincount(y_train_fold_labels)}")
    print(f"  Val labels distribution: {np.bincount(y_val_fold_labels)}")

    # Apply Hybrid Balancing ONLY to the training data of the current fold
    X_train_balanced_paths, y_train_balanced_labels, class_weights = \
        apply_hybrid_balancing_to_fold(X_train_fold_paths, y_train_fold_labels, class_names)

    print(f"  Train samples AFTER balancing (undersampled): {len(X_train_balanced_paths)}")
    print(f"  Balanced Train labels distribution: {np.bincount(y_train_balanced_labels)}")
    print(f"  Class Weights for training: {class_weights}")

    # Create tf.data.Dataset for training and validation
    train_ds = tf.data.Dataset.from_tensor_slices((X_train_balanced_paths, y_train_balanced_labels))
    train_ds = train_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    train_ds = train_ds.map(augment_image, num_parallel_calls=tf.data.AUTOTUNE)
    train_ds = train_ds.shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    val_ds = tf.data.Dataset.from_tensor_slices((X_val_fold_paths, y_val_fold_labels))
    val_ds = val_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    val_ds = val_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    # Build and compile a fresh model for each fold
    model = build_inceptionv3_model()

    # Callbacks (with ModelCheckpoint added)
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(current_fold_dir, 'best_model.h5'),
        monitor='val_loss',
        save_best_only=True,
        mode='min',
        verbose=1
    )
    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=0.00001, verbose=1),
        model_checkpoint_callback
    ]

    # Train the model
    print("  Training model...")
    history = model.fit(train_ds,
                        epochs=100,
                        validation_data=val_ds,
                        class_weight=class_weights,
                        callbacks=callbacks,
                        verbose=1)

    # Save training history for the current fold
    history_df = pd.DataFrame(history.history)
    history_df.to_csv(os.path.join(current_fold_dir, 'training_history.csv'), index=False)
    print(f"  Saved training history for Fold {fold_idx + 1}.")

    # Evaluate the model on the validation data for the current fold
    print("  Evaluating model on validation set...")
    y_true_val = []
    y_pred_proba_val = []

    for images, labels_one_hot in tqdm(val_ds, desc="Predicting on validation set"):
        y_true_val.extend(np.argmax(labels_one_hot.numpy(), axis=1))
        y_pred_proba_val.extend(model.predict(images))

    y_pred_val = np.argmax(np.array(y_pred_proba_val), axis=1)

    # Calculate metrics for the current fold
    fold_accuracy = accuracy_score(y_true_val, y_pred_val)
    report = classification_report(y_true_val, y_pred_val, target_names=class_names, output_dict=True, zero_division=0)

    # Calculate Confusion Matrix for the current fold
    cm = confusion_matrix(y_true_val, y_pred_val)
    all_confusion_matrices.append(cm) # Store for averaging later

    print(f"\nFold {fold_idx + 1} Metrics:")
    print(f"  Overall Accuracy: {fold_accuracy:.4f}")

    minority_class_label_idx = np.bincount(labels).argmin()
    minority_class_name = class_names[minority_class_label_idx]

    minority_precision = report[minority_class_name]['precision']
    minority_recall = report[minority_class_name]['recall']
    minority_f1 = report[minority_class_name]['f1-score']

    print(f"  Minority Class ({minority_class_name}):")
    print(f"    Precision: {minority_precision:.4f}")
    print(f"    Recall: {minority_recall:.4f}")
    print(f"    F1-score: {minority_f1:.4f}")
    print("  Full Classification Report:\n", classification_report(y_true_val, y_pred_val, target_names=class_names, zero_division=0))

    # Print Confusion Matrix for the current fold (formatted with pandas for clarity)
    print("\n  Confusion Matrix for Fold {}:".format(fold_idx + 1))
    cm_df = pd.DataFrame(cm, index=class_names, columns=class_names)
    print(cm_df)

    # Save Confusion Matrix for the current fold
    cm_filepath = os.path.join(current_fold_dir, 'confusion_matrix.csv')
    cm_df.to_csv(cm_filepath)
    print(f"  Confusion Matrix saved to: {cm_filepath}")

    fold_results.append({
        'overall_accuracy': fold_accuracy,
        'minority_precision': minority_precision,
        'minority_recall': minority_recall,
        'minority_f1': minority_f1,
        'confusion_matrix': cm.tolist() # Convert numpy array to list for JSON compatibility
    })

    tf.keras.backend.clear_session()
    del model

# ----------------------------------------------------------------------
# Step 6: Aggregate and Report Final Results (MODIFIED to include Average CM)
# ----------------------------------------------------------------------
print("\n--- K-Fold Cross-Validation Final Results ---")
avg_overall_accuracy = np.mean([res['overall_accuracy'] for res in fold_results])
std_overall_accuracy = np.std([res['overall_accuracy'] for res in fold_results])

avg_minority_precision = np.mean([res['minority_precision'] for res in fold_results])
std_minority_precision = np.std([res['minority_precision'] for res in fold_results])

avg_minority_recall = np.mean([res['minority_recall'] for res in fold_results])
std_minority_recall = np.std([res['minority_recall'] for res in fold_results])

avg_minority_f1 = np.mean([res['minority_f1'] for res in fold_results])
std_minority_f1 = np.std([res['minority_f1'] for res in fold_results])

# Calculate and display Average Confusion Matrix
# np.array(all_confusion_matrices) converts list of CMs to a 3D numpy array
# axis=0 means average across the first dimension (the folds)
average_cm = np.mean(np.array(all_confusion_matrices), axis=0)
print("\n--- Average Confusion Matrix across all Folds (Rounded to 2 decimal places) ---")
average_cm_df = pd.DataFrame(average_cm, index=class_names, columns=class_names)
print(average_cm_df.round(2)) # Round for cleaner display of averages

final_summary = {
    "Average Overall Accuracy": f"{avg_overall_accuracy:.4f} +/- {std_overall_accuracy:.4f}",
    "Average Minority Precision": f"{avg_minority_precision:.4f} +/- {std_minority_precision:.4f}",
    "Average Minority Recall": f"{avg_minority_recall:.4f} +/- {std_minority_recall:.4f}",
    "Average Minority F1-score": f"{avg_minority_f1:.4f} +/- {std_minority_f1:.4f}",
    "Average Confusion Matrix (rows are true, columns are predicted)": average_cm.round(2).tolist()
}

print(json.dumps(final_summary, indent=4))

# Save the final summary to a text file (JSON format for readability)
summary_filepath = os.path.join(kfold_results_base_dir, 'kfold_summary_metrics.json')
with open(summary_filepath, 'w') as f:
    json.dump(final_summary, f, indent=4)
print(f"\nFinal K-fold summary metrics and average CM saved to: {summary_filepath}")

Num GPUs Available:  1
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Detected classes: ['Arctocephalus_galapagoensis', 'Zalophus_wollebaeki']

Total number of samples loaded for K-fold: 336
Initial Class distribution: [ 45 291]

K-fold results will be saved in: /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results

--- Starting Fold 1/5 ---
  Results for Fold 1 will be saved in: /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_0
  Train samples in fold: 268
  Validation samples in fold: 68
  Train labels distribution (before balancing): [ 36 232]
  Val labels distribution: [ 9 59]
  Train samples AFTER balancing (undersampled): 72
  Balanced Train labels distribution: [36 36]
  Class Weights for training: {0: np.float64(1.0), 1: np.float64(1.0)}
  Training model...
Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 9s/step - accuracy: 0.5482 - loss: 1.2630 - val_accuracy: 0.1618 - val_loss: 2.5557 - learning_rate: 0.0010
Epoch 2/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 145ms/step - accuracy: 0.4766 - loss: 1.3043
Epoch 2: val_loss improved from 2.55570 to 0.22197, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_0/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 858ms/step - accuracy: 0.5438 - loss: 1.1186 - val_accuracy: 0.9412 - val_loss: 0.2220 - learning_rate: 0.0010
Epoch 3/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 112ms/step - accuracy: 0.7734 - loss: 0.3818
Epoch 3: val_loss did not improve from 0.22197
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 411ms/step - accuracy: 0.7478 - loss: 0.4058 - val_accuracy: 0.7941 - val_loss: 0.3833 - learning_rate: 0.0010
Epoch 4/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 146ms/step - accuracy: 0.8984 - loss: 0.2296
Epoch 4: val_loss did not improve from 0.22197
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 467ms/step - accuracy: 0.8867 - loss: 0.2436 - val_accuracy: 0.5294 - val_loss: 1.0411 - learning_rate: 0.0010
Epoch 5/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 11



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step - accuracy: 0.9813 - loss: 0.1340 - val_accuracy: 0.9412 - val_loss: 0.1496 - learning_rate: 0.0010
Epoch 7/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 141ms/step - accuracy: 0.9766 - loss: 0.1302
Epoch 7: val_loss did not improve from 0.14960
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 414ms/step - accuracy: 0.9813 - loss: 0.1322 - val_accuracy: 0.8382 - val_loss: 0.2558 - learning_rate: 0.0010
Epoch 8/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 109ms/step - accuracy: 0.9922 - loss: 0.0836
Epoch 8: val_loss did not improve from 0.14960
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 398ms/step - accuracy: 0.9891 - loss: 0.0808 - val_accuracy: 0.8088 - val_loss: 0.4343 - learning_rate: 0.0010
Epoch 9/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 133ms

Predicting on validation set:   0%|          | 0/3 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step

Fold 1 Metrics:
  Overall Accuracy: 0.9412
  Minority Class (Arctocephalus_galapagoensis):
    Precision: 0.7273
    Recall: 0.8889
    F1-score: 0.8000
  Full Classification Report:
                              precision    recall  f1-score   support

Arctocephalus_galapagoensis       0.73      0.89      0.80         9
        Zalophus_wollebaeki       0.98      0.95      0.97        59

                   accuracy                           0.94        68
                  macro avg       0.85      0.92      0.88        68
               weighted avg       0.95      0.94      0.94        68


  Confusion Matrix for Fold 1:
                             Arctocephalus_galapagoensis  Zalophus_wollebaeki
Arctocephalus_galapagoensis                            8                   



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 9s/step - accuracy: 0.5078 - loss: 0.9672 - val_accuracy: 0.2836 - val_loss: 1.5948 - learning_rate: 0.0010
Epoch 2/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 138ms/step - accuracy: 0.6172 - loss: 0.7953
Epoch 2: val_loss improved from 1.59485 to 0.21916, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_1/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 2s/step - accuracy: 0.6419 - loss: 0.7307 - val_accuracy: 0.8806 - val_loss: 0.2192 - learning_rate: 0.0010
Epoch 3/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 151ms/step - accuracy: 0.8125 - loss: 0.3523
Epoch 3: val_loss did not improve from 0.21916
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 419ms/step - accuracy: 0.8368 - loss: 0.3302 - val_accuracy: 0.7164 - val_loss: 0.6257 - learning_rate: 0.0010
Epoch 4/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 115ms/step - accuracy: 0.8828 - loss: 0.2765
Epoch 4: val_loss did not improve from 0.21916
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 361ms/step - accuracy: 0.8581 - loss: 0.3306 - val_accuracy: 0.8507 - val_loss: 0.3391 - learning_rate: 0.0010
Epoch 5/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 116m



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 864ms/step - accuracy: 0.9705 - loss: 0.1198 - val_accuracy: 0.9403 - val_loss: 0.1590 - learning_rate: 0.0010
Epoch 6/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 114ms/step - accuracy: 0.8438 - loss: 0.3545
Epoch 6: val_loss improved from 0.15904 to 0.11521, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_1/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 993ms/step - accuracy: 0.8594 - loss: 0.3198 - val_accuracy: 0.9552 - val_loss: 0.1152 - learning_rate: 0.0010
Epoch 7/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 127ms/step - accuracy: 0.9531 - loss: 0.1274
Epoch 7: val_loss did not improve from 0.11521
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 405ms/step - accuracy: 0.9627 - loss: 0.1180 - val_accuracy: 0.8657 - val_loss: 0.2711 - learning_rate: 0.0010
Epoch 8/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 115ms/step - accuracy: 0.9766 - loss: 0.1066
Epoch 8: val_loss did not improve from 0.11521
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 400ms/step - accuracy: 0.9744 - loss: 0.1036 - val_accuracy: 0.9104 - val_loss: 0.2538 - learning_rate: 0.0010
Epoch 9/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 11



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.9813 - loss: 0.0530 - val_accuracy: 0.9552 - val_loss: 0.0973 - learning_rate: 0.0010
Epoch 11/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 141ms/step - accuracy: 1.0000 - loss: 0.0534
Epoch 11: val_loss did not improve from 0.09735
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 741ms/step - accuracy: 1.0000 - loss: 0.0524 - val_accuracy: 0.9552 - val_loss: 0.1008 - learning_rate: 0.0010
Epoch 12/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 112ms/step - accuracy: 1.0000 - loss: 0.0374
Epoch 12: val_loss did not improve from 0.09735
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 354ms/step - accuracy: 1.0000 - loss: 0.0361 - val_accuracy: 0.9552 - val_loss: 0.1298 - learning_rate: 0.0010
Epoch 13/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 

Predicting on validation set:   0%|          | 0/3 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step

Fold 2 Metrics:
  Overall Accuracy: 0.9552
  Minority Class (Arctocephalus_galapagoensis):
    Precision: 0.8000
    Recall: 0.8889
    F1-score: 0.8421
  Full Classification Report:
                              precision    recall  f1-score   support

Arctocephalus_galapagoensis       0.80      0.89      0.84         9
        Zalophus_wollebaeki       0.98      0.97      0.97        58

                   accuracy                           0.96        67
                  macro avg       0.89      0.93      0.91        67
               weighted avg       0.96      0.96      0.96        67


  Confusion Matrix for Fold 2:
                             Arctocephalus_galapagoensis  Zalophus_wollebaeki
Arctocephalus_galapagoensis                            8                   



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 9s/step - accuracy: 0.6389 - loss: 0.6436 - val_accuracy: 0.8657 - val_loss: 0.3685 - learning_rate: 0.0010
Epoch 2/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 137ms/step - accuracy: 0.8438 - loss: 0.3154
Epoch 2: val_loss improved from 0.36854 to 0.11223, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_2/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 846ms/step - accuracy: 0.8177 - loss: 0.3577 - val_accuracy: 0.9701 - val_loss: 0.1122 - learning_rate: 0.0010
Epoch 3/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 113ms/step - accuracy: 0.9297 - loss: 0.1950
Epoch 3: val_loss did not improve from 0.11223
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 400ms/step - accuracy: 0.9162 - loss: 0.2186 - val_accuracy: 0.8209 - val_loss: 0.4457 - learning_rate: 0.0010
Epoch 4/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 108ms/step - accuracy: 0.9688 - loss: 0.0999
Epoch 4: val_loss improved from 0.11223 to 0.08379, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_2/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 879ms/step - accuracy: 0.9635 - loss: 0.1033 - val_accuracy: 0.9701 - val_loss: 0.0838 - learning_rate: 0.0010
Epoch 5/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 131ms/step - accuracy: 0.9844 - loss: 0.0718
Epoch 5: val_loss did not improve from 0.08379
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 740ms/step - accuracy: 0.9783 - loss: 0.0801 - val_accuracy: 0.9403 - val_loss: 0.1015 - learning_rate: 0.0010
Epoch 6/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 114ms/step - accuracy: 1.0000 - loss: 0.0333
Epoch 6: val_loss did not improve from 0.08379
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 401ms/step - accuracy: 1.0000 - loss: 0.0314 - val_accuracy: 0.9254 - val_loss: 0.1782 - learning_rate: 0.0010
Epoch 7/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 11

Predicting on validation set:   0%|          | 0/3 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step

Fold 3 Metrics:
  Overall Accuracy: 0.9701
  Minority Class (Arctocephalus_galapagoensis):
    Precision: 0.8889
    Recall: 0.8889
    F1-score: 0.8889
  Full Classification Report:
                              precision    recall  f1-score   support

Arctocephalus_galapagoensis       0.89      0.89      0.89         9
        Zalophus_wollebaeki       0.98      0.98      0.98        58

                   accuracy                           0.97        67
                  macro avg       0.94      0.94      0.94        67
               weighted avg       0.97      0.97      0.97        67


  Confusion Matrix for Fold 3:
                             Arctocephalus_galapagoensis  Zalophus_wollebaeki
Arctocephalus_galapagoensis                            8                   



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 9s/step - accuracy: 0.4839 - loss: 2.3457 - val_accuracy: 0.2537 - val_loss: 0.9421 - learning_rate: 0.0010
Epoch 2/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 123ms/step - accuracy: 0.5625 - loss: 0.8349
Epoch 2: val_loss did not improve from 0.94207
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 729ms/step - accuracy: 0.5590 - loss: 0.9584 - val_accuracy: 0.1343 - val_loss: 1.9355 - learning_rate: 0.0010
Epoch 3/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 121ms/step - accuracy: 0.5078 - loss: 0.9656
Epoch 3: val_loss improved from 0.94207 to 0.39183, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_3/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.5247 - loss: 0.9111 - val_accuracy: 0.8806 - val_loss: 0.3918 - learning_rate: 0.0010
Epoch 4/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 114ms/step - accuracy: 0.8672 - loss: 0.4560
Epoch 4: val_loss improved from 0.39183 to 0.31095, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_3/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 840ms/step - accuracy: 0.7878 - loss: 0.5006 - val_accuracy: 0.8657 - val_loss: 0.3109 - learning_rate: 0.0010
Epoch 5/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 109ms/step - accuracy: 0.5703 - loss: 0.5375
Epoch 5: val_loss did not improve from 0.31095
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 398ms/step - accuracy: 0.6254 - loss: 0.5126 - val_accuracy: 0.8507 - val_loss: 0.5043 - learning_rate: 0.0010
Epoch 6/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 109ms/step - accuracy: 0.8750 - loss: 0.3467
Epoch 6: val_loss did not improve from 0.31095
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 397ms/step - accuracy: 0.8542 - loss: 0.3672 - val_accuracy: 0.7761 - val_loss: 0.5538 - learning_rate: 0.0010
Epoch 7/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 11



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 0.9115 - loss: 0.2976 - val_accuracy: 0.9104 - val_loss: 0.2751 - learning_rate: 0.0010
Epoch 8/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 113ms/step - accuracy: 0.9688 - loss: 0.2363
Epoch 8: val_loss improved from 0.27513 to 0.24555, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_3/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 815ms/step - accuracy: 0.9705 - loss: 0.2372 - val_accuracy: 0.9403 - val_loss: 0.2455 - learning_rate: 0.0010
Epoch 9/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 146ms/step - accuracy: 0.9688 - loss: 0.1858
Epoch 9: val_loss did not improve from 0.24555
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 370ms/step - accuracy: 0.9705 - loss: 0.1886 - val_accuracy: 0.9104 - val_loss: 0.3042 - learning_rate: 0.0010
Epoch 10/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 114ms/step - accuracy: 0.9609 - loss: 0.1609
Epoch 10: val_loss did not improve from 0.24555
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 357ms/step - accuracy: 0.9596 - loss: 0.1617 - val_accuracy: 0.9104 - val_loss: 0.2749 - learning_rate: 0.0010
Epoch 11/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 853ms/step - accuracy: 0.9705 - loss: 0.1358 - val_accuracy: 0.9403 - val_loss: 0.2008 - learning_rate: 0.0010
Epoch 12/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 118ms/step - accuracy: 1.0000 - loss: 0.1048
Epoch 12: val_loss improved from 0.20085 to 0.19890, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_3/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 1.0000 - loss: 0.1055 - val_accuracy: 0.9403 - val_loss: 0.1989 - learning_rate: 0.0010
Epoch 13/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 114ms/step - accuracy: 1.0000 - loss: 0.0849
Epoch 13: val_loss did not improve from 0.19890
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 401ms/step - accuracy: 1.0000 - loss: 0.0844 - val_accuracy: 0.9254 - val_loss: 0.2252 - learning_rate: 0.0010
Epoch 14/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 113ms/step - accuracy: 1.0000 - loss: 0.0662
Epoch 14: val_loss did not improve from 0.19890
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 360ms/step - accuracy: 1.0000 - loss: 0.0661 - val_accuracy: 0.9254 - val_loss: 0.2225 - learning_rate: 0.0010
Epoch 15/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 909ms/step - accuracy: 0.9813 - loss: 0.0655 - val_accuracy: 0.9403 - val_loss: 0.1940 - learning_rate: 0.0010
Epoch 16/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 115ms/step - accuracy: 1.0000 - loss: 0.0587
Epoch 16: val_loss improved from 0.19396 to 0.18519, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_3/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 1.0000 - loss: 0.0544 - val_accuracy: 0.9403 - val_loss: 0.1852 - learning_rate: 0.0010
Epoch 17/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 1.0000 - loss: 0.0429 
Epoch 17: val_loss did not improve from 0.18519
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 360ms/step - accuracy: 1.0000 - loss: 0.0436 - val_accuracy: 0.9403 - val_loss: 0.1868 - learning_rate: 0.0010
Epoch 18/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 113ms/step - accuracy: 1.0000 - loss: 0.0359
Epoch 18: val_loss improved from 0.18519 to 0.18006, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_3/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 840ms/step - accuracy: 1.0000 - loss: 0.0352 - val_accuracy: 0.9403 - val_loss: 0.1801 - learning_rate: 0.0010
Epoch 19/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 115ms/step - accuracy: 1.0000 - loss: 0.0289
Epoch 19: val_loss did not improve from 0.18006
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 357ms/step - accuracy: 1.0000 - loss: 0.0289 - val_accuracy: 0.9403 - val_loss: 0.1832 - learning_rate: 0.0010
Epoch 20/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 111ms/step - accuracy: 1.0000 - loss: 0.0199
Epoch 20: val_loss did not improve from 0.18006
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 397ms/step - accuracy: 1.0000 - loss: 0.0214 - val_accuracy: 0.9104 - val_loss: 0.2012 - learning_rate: 0.0010
Epoch 21/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[

Predicting on validation set:   0%|          | 0/3 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 203ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step

Fold 4 Metrics:
  Overall Accuracy: 0.9403
  Minority Class (Arctocephalus_galapagoensis):
    Precision: 0.7778
    Recall: 0.7778
    F1-score: 0.7778
  Full Classification Report:
                              precision    recall  f1-score   support

Arctocephalus_galapagoensis       0.78      0.78      0.78         9
        Zalophus_wollebaeki       0.97      0.97      0.97        58

                   accuracy                           0.94        67
                  macro avg       0.87      0.87      0.87        67
               weighted avg       0.94      0.94      0.94        67


  Confusion Matrix for Fold 4:
                             Arctocephalus_galapagoensis  Zalophus_wollebaeki
Arctocephalus_galapagoensis                            7                   



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 9s/step - accuracy: 0.5109 - loss: 1.0083 - val_accuracy: 0.8657 - val_loss: 0.2705 - learning_rate: 0.0010
Epoch 2/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 137ms/step - accuracy: 0.7344 - loss: 0.4065
Epoch 2: val_loss did not improve from 0.27046
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 365ms/step - accuracy: 0.7561 - loss: 0.3917 - val_accuracy: 0.4776 - val_loss: 0.9888 - learning_rate: 0.0010
Epoch 3/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 117ms/step - accuracy: 0.8438 - loss: 0.3604
Epoch 3: val_loss improved from 0.27046 to 0.21815, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_4/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step - accuracy: 0.8524 - loss: 0.3425 - val_accuracy: 0.8806 - val_loss: 0.2181 - learning_rate: 0.0010
Epoch 4/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 129ms/step - accuracy: 0.7578 - loss: 0.3346
Epoch 4: val_loss improved from 0.21815 to 0.21009, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_4/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 903ms/step - accuracy: 0.7886 - loss: 0.3092 - val_accuracy: 0.8806 - val_loss: 0.2101 - learning_rate: 0.0010
Epoch 5/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 111ms/step - accuracy: 0.9688 - loss: 0.1257
Epoch 5: val_loss did not improve from 0.21009
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 398ms/step - accuracy: 0.9705 - loss: 0.1203 - val_accuracy: 0.7910 - val_loss: 0.4872 - learning_rate: 0.0010
Epoch 6/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 113ms/step - accuracy: 0.9688 - loss: 0.1194
Epoch 6: val_loss did not improve from 0.21009
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 400ms/step - accuracy: 0.9635 - loss: 0.1321 - val_accuracy: 0.8657 - val_loss: 0.2230 - learning_rate: 0.0010
Epoch 7/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 11



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 881ms/step - accuracy: 0.9705 - loss: 0.0813 - val_accuracy: 0.9254 - val_loss: 0.1624 - learning_rate: 0.0010
Epoch 8/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 118ms/step - accuracy: 1.0000 - loss: 0.0650
Epoch 8: val_loss did not improve from 0.16237
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 729ms/step - accuracy: 1.0000 - loss: 0.0646 - val_accuracy: 0.8955 - val_loss: 0.2010 - learning_rate: 0.0010
Epoch 9/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 117ms/step - accuracy: 1.0000 - loss: 0.0298
Epoch 9: val_loss did not improve from 0.16237
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 389ms/step - accuracy: 1.0000 - loss: 0.0334 - val_accuracy: 0.8955 - val_loss: 0.3164 - learning_rate: 0.0010
Epoch 10/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 1



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 834ms/step - accuracy: 1.0000 - loss: 0.0177 - val_accuracy: 0.8955 - val_loss: 0.1571 - learning_rate: 0.0010
Epoch 12/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 116ms/step - accuracy: 1.0000 - loss: 0.0194
Epoch 12: val_loss improved from 0.15707 to 0.15260, saving model to /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/kfold_results/fold_4/best_model.h5




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 862ms/step - accuracy: 1.0000 - loss: 0.0189 - val_accuracy: 0.9104 - val_loss: 0.1526 - learning_rate: 0.0010
Epoch 13/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 113ms/step - accuracy: 1.0000 - loss: 0.0128
Epoch 13: val_loss did not improve from 0.15260
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 488ms/step - accuracy: 1.0000 - loss: 0.0137 - val_accuracy: 0.8955 - val_loss: 0.1727 - learning_rate: 0.0010
Epoch 14/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 115ms/step - accuracy: 1.0000 - loss: 0.0134
Epoch 14: val_loss did not improve from 0.15260
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 726ms/step - accuracy: 1.0000 - loss: 0.0119 - val_accuracy: 0.9104 - val_loss: 0.2307 - learning_rate: 0.0010
Epoch 15/100
[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[

Predicting on validation set:   0%|          | 0/3 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step

Fold 5 Metrics:
  Overall Accuracy: 0.9104
  Minority Class (Arctocephalus_galapagoensis):
    Precision: 0.6364
    Recall: 0.7778
    F1-score: 0.7000
  Full Classification Report:
                              precision    recall  f1-score   support

Arctocephalus_galapagoensis       0.64      0.78      0.70         9
        Zalophus_wollebaeki       0.96      0.93      0.95        58

                   accuracy                           0.91        67
                  macro avg       0.80      0.85      0.82        67
               weighted avg       0.92      0.91      0.91        67


  Confusion Matrix for Fold 5:
                             Arctocephalus_galapagoensis  Zalophus_wollebaeki
Arctocephalus_galapagoensis                            7                   