<a href="https://colab.research.google.com/github/Auvarmf/PKCV/blob/main/Model3CV3_Dataset_Augmented_Process.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import os
import cv2
import numpy as np
import shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm
from google.colab import drive

In [16]:
# Mount Google Drive to access files and folders
drive.mount('/content/PKCV')

Drive already mounted at /content/PKCV; to attempt to forcibly remount, call drive.mount("/content/PKCV", force_remount=True).


In [17]:
# Define the base directory where all images are stored
BASE_DIR = '/content/PKCV/MyDrive/PROSUSCV/dataset_butterfly'
AUG_DIR = '/content/PKCV/MyDrive/PROSUSCV/dataset_butterfly_augmented'

train_dir = os.path.join(BASE_DIR, 'train')
val_dir = os.path.join(BASE_DIR, 'validation')
test_dir = os.path.join(BASE_DIR, 'test')

aug_train_dir = os.path.join(AUG_DIR, 'train')
aug_val_dir = os.path.join(AUG_DIR, 'validation')
aug_test_dir = os.path.join(AUG_DIR, 'test')

In [18]:
IMG_SIZE = (299, 299)

# Create directories if not exist
os.makedirs(aug_train_dir, exist_ok=True)
os.makedirs(aug_val_dir, exist_ok=True)
os.makedirs(aug_test_dir, exist_ok=True)

In [19]:
# Define data augmentation generator
data_gen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [20]:
def copy_and_augment_data(src_dir, dest_dir, img_size, data_gen, save_prefix='aug', num_augmented=5):
    class_names = os.listdir(src_dir)

    for class_name in class_names:
        class_src_dir = os.path.join(src_dir, class_name)
        class_dest_dir = os.path.join(dest_dir, class_name)
        os.makedirs(class_dest_dir, exist_ok=True)

        for file in tqdm(os.listdir(class_src_dir), desc=f"Processing {class_name}"):
            img_path = os.path.join(class_src_dir, file)
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):
                image = cv2.imread(img_path)
                if image is not None:
                    # Copy original file to destination
                    shutil.copy(img_path, class_dest_dir)

                    # Resize for augmentation
                    image = cv2.resize(image, img_size)
                    image = np.expand_dims(image, axis=0)
                    image_gen = data_gen.flow(image, batch_size=1, save_to_dir=class_dest_dir,
                                              save_prefix=save_prefix, save_format='jpg')

                    # Generate and save augmented images
                    for _ in range(num_augmented):
                        next(image_gen)

In [21]:
# Copy and augment data for train, validation, and test directories
copy_and_augment_data(train_dir, aug_train_dir, IMG_SIZE, data_gen)
copy_and_augment_data(val_dir, aug_val_dir, IMG_SIZE, data_gen)
copy_and_augment_data(test_dir, aug_test_dir, IMG_SIZE, data_gen)

print("Data copy and augmentation completed.")

Processing kupu_raja_limau: 100%|██████████| 70/70 [00:54<00:00,  1.28it/s]
Processing kupu_batik_cap: 100%|██████████| 70/70 [00:46<00:00,  1.50it/s]
Processing kupu_hijau_biru: 100%|██████████| 70/70 [00:45<00:00,  1.54it/s]
Processing kupu_jarak: 100%|██████████| 70/70 [00:17<00:00,  3.91it/s]
Processing kupu_harimau_kuning_hijau: 100%|██████████| 70/70 [00:19<00:00,  3.56it/s]
Processing kupu_raja_helena: 100%|██████████| 70/70 [01:43<00:00,  1.47s/it]
Processing kupu_jojo: 100%|██████████| 70/70 [01:53<00:00,  1.63s/it]
Processing kupu_pantat_merah: 100%|██████████| 70/70 [01:28<00:00,  1.27s/it]
Processing kupu_raja_limau: 100%|██████████| 10/10 [00:12<00:00,  1.27s/it]
Processing kupu_batik_cap: 100%|██████████| 10/10 [00:13<00:00,  1.40s/it]
Processing kupu_hijau_biru: 100%|██████████| 10/10 [00:12<00:00,  1.22s/it]
Processing kupu_jarak: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Processing kupu_harimau_kuning_hijau: 100%|██████████| 10/10 [00:06<00:00,  1.58it/s]
Process

Data copy and augmentation completed.



