# 1. Import Library

In [1]:
import os
import shutil
import random
from torchvision import transforms
from PIL import Image

# 2. Preprocessing

## a. Splitting Data

In [2]:
BASE_DIR = r"C:\Users\IKBAR\uap_ML\src\dataset\Coffe_Bean"
TRAIN_SRC = os.path.join(BASE_DIR, "train")
TRAIN_DST = os.path.join(BASE_DIR, "train")
VAL_DIR = os.path.join(BASE_DIR, "val")

VAL_TOTAL = 400
random.seed(42)

os.makedirs(TRAIN_DST, exist_ok=True)
os.makedirs(VAL_DIR, exist_ok=True)

class_names = sorted(os.listdir(TRAIN_SRC))
val_per_class = VAL_TOTAL // len(class_names)

for cls in class_names:
    src_cls = os.path.join(TRAIN_SRC, cls)
    imgs = os.listdir(src_cls)
    random.shuffle(imgs)

    os.makedirs(os.path.join(TRAIN_DST, cls), exist_ok=True)
    os.makedirs(os.path.join(VAL_DIR, cls), exist_ok=True)

    val_imgs = imgs[:val_per_class]
    train_imgs = imgs[val_per_class:]

    for img in val_imgs:
        shutil.move(
            os.path.join(src_cls, img),
            os.path.join(VAL_DIR, cls, img)
        )

    for img in train_imgs:
        shutil.move(
            os.path.join(src_cls, img),
            os.path.join(TRAIN_DST, cls, img)
        )


## b. Resize & Normalisasi

In [3]:
IMG_SIZE = 224

preprocess_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor()
])

def preprocess_and_save(src_dir, dst_dir):
    for cls in os.listdir(src_dir):
        src_cls = os.path.join(src_dir, cls)
        dst_cls = os.path.join(dst_dir, cls)
        os.makedirs(dst_cls, exist_ok=True)

        for img in os.listdir(src_cls):
            image = Image.open(os.path.join(src_cls, img)).convert("RGB")
            tensor = preprocess_transform(image)
            image = transforms.ToPILImage()(tensor)
            image.save(os.path.join(dst_cls, img))

preprocess_and_save(TRAIN_DST, TRAIN_DST)
preprocess_and_save(VAL_DIR, VAL_DIR)
preprocess_and_save(os.path.join(BASE_DIR, "test"), os.path.join(BASE_DIR, "test"))

# 3. Augmentasi Data

In [6]:
AUG_TRAIN_DIR = r"C:\Users\IKBAR\uap_ML\src\dataset\Coffe_Bean\train"
TARGET_TRAIN_TOTAL = 4200
target_per_class = TARGET_TRAIN_TOTAL // len(class_names)

augment_transform = transforms.Compose([
    transforms.RandomRotation(30),
    transforms.RandomHorizontalFlip(),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor()
])

In [9]:
for cls in class_names:
    src = os.path.join(TRAIN_DST, cls)
    dst = os.path.join(AUG_TRAIN_DIR, cls)
    os.makedirs(dst, exist_ok=True)

    imgs = os.listdir(src)

    idx = 0
    while len(os.listdir(dst)) < target_per_class:
        img_name = imgs[idx % len(imgs)]
        image = Image.open(os.path.join(src, img_name)).convert("RGB")

        aug_img = augment_transform(image)
        aug_img = transforms.ToPILImage()(aug_img)
        aug_img.save(os.path.join(dst, f"aug_{idx}.jpg"))

        idx += 1

In [10]:
import os

def count_images(base_dir):
    total = 0
    for cls in os.listdir(base_dir):
        cls_path = os.path.join(base_dir, cls)
        if os.path.isdir(cls_path):
            total += len(os.listdir(cls_path))
    return total

print("JUMLAH DATA")
print("Train Augmented :", count_images(r"C:\Users\IKBAR\uap_ML\src\dataset\Coffe_Bean\train"))
print("Validation      :", count_images(r"C:\Users\IKBAR\uap_ML\src\dataset\Coffe_Bean\val"))
print("Test            :", count_images(r"C:\Users\IKBAR\uap_ML\src\dataset\Coffe_Bean\test"))

JUMLAH DATA
Train Augmented : 4200
Validation      : 400
Test            : 400
