In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import os
import cv2 # or PIL for image loading
from tqdm.notebook import tqdm # For progress bars

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os
import shutil

# --- IMPORTANT: Confirm this base path matches your Google Drive ---
# This is the path to your main 'my_galapagos_seals_dataset' folder
base_dataset_path = '/content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset'

# Define the subfolders that contain your class data (train, test, validation)
data_splits = ['train', 'test', 'validation']

# Define the exact names of your class folders within each split
class_names = ['Arctocephalus_galapagoensis', 'Zalophus_wollebaeki']

# Define the path for your NEW combined dataset folder for K-fold
# This will be created inside your 'my_galapagos_seals_dataset' folder
combined_kfold = os.path.join(base_dataset_path, 'combined_dataset_for_kfold')

print(f"Preparing to create combined dataset in: {combined_kfold}")

# Create the top-level directory for the combined dataset if it doesn't exist
os.makedirs(combined_kfold, exist_ok=True)
print(f"Combined dataset directory ensured: {combined_kfold}")

# Create class-specific subfolders within the combined directory
# All images for 'Arctocephalus_galapagoensis' from train/test/val will go here, and similarly for 'Zalophus_wolwbaeki'
for class_name in class_names:
    os.makedirs(os.path.join(combined_kfold, class_name), exist_ok=True)
    print(f"Ensured class subfolder exists: {os.path.join(combined_kfold, class_name)}")

print("\n--- Starting image consolidation process ---")
total_images_copied = 0

# Iterate through each data split (train, test, validation)
for split_folder in data_splits:
    current_split_path = os.path.join(base_dataset_path, split_folder)

    if not os.path.exists(current_split_path):
        print(f"Warning: Data split folder '{current_split_path}' not found. Skipping this split.")
        continue

    # Iterate through each class within the current split
    for class_name in class_names:
        source_class_path = os.path.join(current_split_path, class_name)
        destination_class_path = os.path.join(combined_kfold, class_name)

        if not os.path.exists(source_class_path):
            print(f"Warning: Class folder '{source_class_path}' not found. Skipping this class in this split.")
            continue

        # Copy each individual image from the source class path to the combined destination class path
        for image_name in os.listdir(source_class_path):
            source_image_filepath = os.path.join(source_class_path, image_name)
            destination_image_filepath = os.path.join(destination_class_path, image_name)

            # Ensure we're only copying files (not other directories that might exist)
            if os.path.isfile(source_image_filepath):
                # Use shutil.copy2 to preserve metadata like modification times, which can be useful
                shutil.copy2(source_image_filepath, destination_image_filepath)
                total_images_copied += 1

print("\n--- Image Consolidation Complete! ---")
print(f"All images from 'train', 'test', and 'validation' splits have been combined.")
print(f"Total images copied to '{combined_kfold}': {total_images_copied}")
print(f"\nYou can now use '{combined_kfold}' as your 'data_dir' in Step 2 of your K-fold cross-validation notebook.")

Preparing to create combined dataset in: /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/combined_dataset_for_kfold
Combined dataset directory ensured: /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/combined_dataset_for_kfold
Ensured class subfolder exists: /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/combined_dataset_for_kfold/Arctocephalus_galapagoensis
Ensured class subfolder exists: /content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/combined_dataset_for_kfold/Zalophus_wollebaeki

--- Starting image consolidation process ---

--- Image Consolidation Complete! ---
All images from 'train', 'test', and 'validation' splits have been combined.
Total images copied to '/content/drive/My Drive/galapagos_seals_annotated_data/my_galapagos_seals_dataset/combined_dataset_for_kfold': 336

You can now use '/content/drive/My Drive/galapagos_seals_annotated_data/m