In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Set Root Project Folder

In [10]:
PROJECT_ROOT = "/content/drive/MyDrive/Thoriq/UAP_Birds_Classification"

Validasi Struktur Folder

In [11]:
import os

required_dirs = [
    "dataset/raw",
    "dataset/processed/train",
    "dataset/processed/val",
    "dataset/processed/test"
]

for d in required_dirs:
    path = os.path.join(PROJECT_ROOT, d)
    print(d, "=>", "OK" if os.path.exists(path) else "TIDAK ADA")

dataset/raw => OK
dataset/processed/train => OK
dataset/processed/val => OK
dataset/processed/test => OK


Set Path Dataset Asli

In [12]:
SOURCE_DIR = f"{PROJECT_ROOT}/dataset/raw/Birds_dataset"
TARGET_DIR = f"{PROJECT_ROOT}/dataset/processed"

Import Library

In [13]:
import shutil
import random
from tensorflow.keras.preprocessing.image import (
    ImageDataGenerator,
    load_img,
    img_to_array,
    array_to_img
)

Parameter Split & Augmentasi

In [14]:
IMG_SIZE = (224, 224)

TRAIN_RATIO = 0.6
VAL_RATIO   = 0.2
TEST_RATIO  = 0.2

AUGMENT_TARGET = 5000   # TARGET TOTAL DATA TRAIN
SEED = 42
random.seed(SEED)

assert TRAIN_RATIO + VAL_RATIO + TEST_RATIO == 1.0

Bersihkan Folder processed

In [15]:
print("\nMembersihkan folder processed...")
for split in ["train", "val", "test"]:
    split_path = os.path.join(TARGET_DIR, split)
    if not os.path.exists(split_path):
        continue
    for cls in os.listdir(split_path):
        shutil.rmtree(os.path.join(split_path, cls))
print("Folder processed bersih ✅")


Membersihkan folder processed...
Folder processed bersih ✅


Split Dataset (Train / Val / Test)

In [16]:
print("\nSplit dataset...")

classes = os.listdir(SOURCE_DIR)

for cls in classes:
    cls_path = os.path.join(SOURCE_DIR, cls)
    if not os.path.isdir(cls_path):
        continue

    images = os.listdir(cls_path)
    random.shuffle(images)

    n_total = len(images)
    n_train = int(n_total * TRAIN_RATIO)
    n_val   = int(n_total * VAL_RATIO)

    splits = {
        "train": images[:n_train],
        "val":   images[n_train:n_train + n_val],
        "test":  images[n_train + n_val:]
    }

    for split, imgs in splits.items():
        out_dir = os.path.join(TARGET_DIR, split, cls)
        os.makedirs(out_dir, exist_ok=True)

        for img in imgs:
            shutil.copy(
                os.path.join(cls_path, img),
                os.path.join(out_dir, img)
            )

print("Split dataset selesai ✅")


Split dataset...
Split dataset selesai ✅


Hitung Data Augmentasi

In [17]:
def count_images(path):
    return sum(len(files) for _, _, files in os.walk(path))

def count_train_images():
    return count_images(os.path.join(TARGET_DIR, "train"))


print("\nJumlah sebelum augmentasi:")
print("Train:", count_train_images())
print("Val:", count_images(os.path.join(TARGET_DIR, "val")))
print("Test:", count_images(os.path.join(TARGET_DIR, "test")))
print("Total:", count_images(TARGET_DIR))


Jumlah sebelum augmentasi:
Train: 121
Val: 39
Test: 45
Total: 205


Augmentasi (TRAIN SAJA)

In [18]:
print("\nMulai augmentasi TRAIN...")

train_dir = os.path.join(TARGET_DIR, "train")
classes = os.listdir(train_dir)
num_classes = len(classes)

TARGET_PER_CLASS = AUGMENT_TARGET // num_classes
print("Jumlah kelas:", num_classes)
print("Target per kelas:", TARGET_PER_CLASS)

augmentor = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)

for cls in classes:
    cls_path = os.path.join(train_dir, cls)
    images = os.listdir(cls_path)

    count = len(images)
    print(f"{cls}: awal = {count}")

    while count < TARGET_PER_CLASS:
        img_name = random.choice(images)
        img_path = os.path.join(cls_path, img_name)

        img = load_img(img_path, target_size=IMG_SIZE)
        x = img_to_array(img)
        x = x.reshape((1,) + x.shape)

        for batch in augmentor.flow(x, batch_size=1):
            save_name = f"aug_{count}.jpg"
            array_to_img(batch[0]).save(
                os.path.join(cls_path, save_name)
            )
            count += 1
            break

    print(f"{cls}: akhir = {count}")

print("Augmentasi selesai ✅")


Mulai augmentasi TRAIN...
Jumlah kelas: 4
Target per kelas: 1250
gray parrot.jpg: awal = 31
gray parrot.jpg: akhir = 1250
amazon green parrot.jpg: awal = 31
amazon green parrot.jpg: akhir = 1250
macaw.jpg: awal = 31
macaw.jpg: akhir = 1250
white parrot.jpg: awal = 28
white parrot.jpg: akhir = 1250
Augmentasi selesai ✅


Validasi Akhir Dataset

In [19]:
print("Train:", count_images(TARGET_DIR + "/train"))
print("Val:", count_images(TARGET_DIR + "/val"))
print("Test:", count_images(TARGET_DIR + "/test"))
print("Total:", count_images(TARGET_DIR))


Train: 5000
Val: 39
Test: 45
Total: 5084
