In [7]:
# === MOUNT GOOGLE DRIVE ===
from google.colab import drive
drive.mount('/content/drive')

import os
import cv2
import pandas as pd
from tqdm import tqdm

# === KONFIGURASI ===
DATASET_DIR = "/content/drive/MyDrive/AksaraOgan_irbah"  # dataset asli
OUTPUT_DIR = "/content/drive/MyDrive/AksaraOgan_rename"     # hasil rename + convert png

os.makedirs(OUTPUT_DIR, exist_ok=True)

summary = []

for label in os.listdir(DATASET_DIR):
    label_path = os.path.join(DATASET_DIR, label)
    if not os.path.isdir(label_path):
        continue

    out_label_path = os.path.join(OUTPUT_DIR, label)
    os.makedirs(out_label_path, exist_ok=True)

    files = sorted(os.listdir(label_path))
    count = 0

    for idx, filename in enumerate(tqdm(files, desc=f"Processing {label}"), start=1):
        img_path = os.path.join(label_path, filename)
        img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
        if img is None:
            continue

        new_name = f"{label}_{str(idx).zfill(3)}.png"
        out_path = os.path.join(out_label_path, new_name)
        cv2.imwrite(out_path, img)   # simpan ulang sebagai PNG
        count += 1

    summary.append({"Huruf": label, "Jumlah Gambar": count})

df = pd.DataFrame(summary).sort_values(by="Huruf")
print("Semua file sudah diubah ke PNG & direname:")
print(df)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Processing Ke: 100%|██████████| 40/40 [00:00<00:00, 63.13it/s]
Processing Ge: 100%|██████████| 40/40 [00:00<00:00, 69.61it/s]
Processing De: 100%|██████████| 40/40 [00:00<00:00, 66.05it/s]
Processing Ne: 100%|██████████| 40/40 [00:00<00:00, 67.78it/s]
Processing Pe: 100%|██████████| 40/40 [00:00<00:00, 67.52it/s]
Processing Be: 100%|██████████| 40/40 [00:00<00:00, 61.96it/s]
Processing Me: 100%|██████████| 40/40 [00:00<00:00, 58.16it/s]
Processing Ce: 100%|██████████| 40/40 [00:00<00:00, 58.96it/s]
Processing Je: 100%|██████████| 40/40 [00:00<00:00, 52.52it/s]
Processing Nye: 100%|██████████| 40/40 [00:00<00:00, 55.15it/s]
Processing Se: 100%|██████████| 40/40 [00:00<00:00, 66.70it/s]
Processing Re: 100%|██████████| 40/40 [00:00<00:00, 68.97it/s]
Processing Le: 100%|██████████| 40/40 [00:00<00:00, 72.27it/s]
Processing We: 100%|██████████| 40/40 [00:00<00:00, 69.05it/s]
Processing He: 100%|██████████| 40/40 [00:00<00:00, 60.71it/s]
Processing Ye: 100%|██████████| 40/40 [00:00<00:00, 61

Semua file sudah diubah ke PNG & direname:
   Huruf  Jumlah Gambar
5     Be             40
7     Ce             40
2     De             40
16     E             40
20  Embe             40
17  Empe             40
19  Ence             40
18  Ende             40
1     Ge             40
14    He             40
8     Je             40
0     Ke             40
12    Le             40
6     Me             40
3     Ne             40
21   Nge             40
9    Nye             40
4     Pe             40
11    Re             40
10    Se             40
22    Te             40
13    We             40
15    Ye             40





In [8]:
import cv2
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm
import pandas as pd

# === KONFIGURASI ===
INPUT_DIR = "/content/drive/MyDrive/AksaraOgan_rename"   # hasil dari Step 1
OUTPUT_DIR = "/content/drive/MyDrive/AksaraOgan_1301220232"  # output akhir
IMG_SIZE = 512
ROTATE_DEG = 15
BLUR_KERNEL = (3, 3)
SCALE_RANGE = 0.3   # zoom
SHEAR_RANGE = 25    # derajat shear

os.makedirs(OUTPUT_DIR, exist_ok=True)

summary = []  # buat laporan akhir

for label in os.listdir(INPUT_DIR):
    label_path = os.path.join(INPUT_DIR, label)
    if not os.path.isdir(label_path):
        continue

    out_label_path = os.path.join(OUTPUT_DIR, label)
    os.makedirs(out_label_path, exist_ok=True)

    files = sorted(os.listdir(label_path))
    counter = 1  # untuk penomoran file
    count_label = 0  # jumlah gambar per huruf

    for filename in tqdm(files, desc=f"Augmenting {label}"):
        if not filename.lower().endswith(".png"):
            continue

        img_path = os.path.join(label_path, filename)
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_AREA)

        # Simpan gambar asli
        cv2.imwrite(os.path.join(out_label_path, f"{label}_{str(counter).zfill(3)}.png"), img)
        counter += 1
        count_label += 1

        # Augmentasi Rotasi
        h, w = img.shape[:2]
        M = cv2.getRotationMatrix2D((w//2, h//2), np.random.uniform(-ROTATE_DEG, ROTATE_DEG), 1.0)
        rotated = cv2.warpAffine(img, M, (w, h), borderValue=(255, 255, 255))
        cv2.imwrite(os.path.join(out_label_path, f"{label}_{str(counter).zfill(3)}.png"), rotated)
        counter += 1
        count_label += 1

        # Augmentasi Gaussian Blur
        blurred = cv2.GaussianBlur(img, BLUR_KERNEL, 0)
        cv2.imwrite(os.path.join(out_label_path, f"{label}_{str(counter).zfill(3)}.png"), blurred)
        counter += 1
        count_label += 1

        # Augmentasi Zoom + Shear
        datagen = ImageDataGenerator(
            shear_range=SHEAR_RANGE,
            zoom_range=SCALE_RANGE,
            fill_mode='constant',
            cval=255
        )
        x = np.expand_dims(img, axis=0)
        aug_iter = datagen.flow(x, batch_size=1)
        aug_img = next(aug_iter)[0].astype(np.uint8)
        cv2.imwrite(os.path.join(out_label_path, f"{label}_{str(counter).zfill(3)}.png"), aug_img)
        counter += 1
        count_label += 1

    # simpan ringkasan per huruf
    summary.append({"Huruf": label, "Jumlah Gambar": count_label})

# === RINGKASAN AKHIR ===
df = pd.DataFrame(summary).sort_values(by="Huruf")
total = df["Jumlah Gambar"].sum()

print("\n=== RINGKASAN DATASET ===")
print(df.to_string(index=False))
print(f"\nTOTAL SEMUA GAMBAR: {total}")


Augmenting Ke: 100%|██████████| 40/40 [00:03<00:00, 10.23it/s]
Augmenting Ge: 100%|██████████| 40/40 [00:05<00:00,  7.39it/s]
Augmenting De: 100%|██████████| 40/40 [00:04<00:00,  9.18it/s]
Augmenting Ne: 100%|██████████| 40/40 [00:03<00:00, 11.10it/s]
Augmenting Pe: 100%|██████████| 40/40 [00:05<00:00,  7.24it/s]
Augmenting Be: 100%|██████████| 40/40 [00:04<00:00,  8.57it/s]
Augmenting Me: 100%|██████████| 40/40 [00:03<00:00, 11.12it/s]
Augmenting Ce: 100%|██████████| 40/40 [00:04<00:00,  8.04it/s]
Augmenting Je: 100%|██████████| 40/40 [00:05<00:00,  7.26it/s]
Augmenting Nye: 100%|██████████| 40/40 [00:03<00:00, 10.78it/s]
Augmenting Se: 100%|██████████| 40/40 [00:04<00:00,  9.29it/s]
Augmenting Re: 100%|██████████| 40/40 [00:06<00:00,  6.65it/s]
Augmenting Le: 100%|██████████| 40/40 [00:03<00:00, 10.53it/s]
Augmenting We: 100%|██████████| 40/40 [00:04<00:00,  9.98it/s]
Augmenting He: 100%|██████████| 40/40 [00:06<00:00,  6.15it/s]
Augmenting Ye: 100%|██████████| 40/40 [00:03<00:00, 11


=== RINGKASAN DATASET ===
Huruf  Jumlah Gambar
   Be            160
   Ce            160
   De            160
    E            160
 Embe            160
 Empe            160
 Ence            160
 Ende            160
   Ge            160
   He            160
   Je            160
   Ke            160
   Le            160
   Me            160
   Ne            160
  Nge            160
  Nye            160
   Pe            160
   Re            160
   Se            160
   Te            160
   We            160
   Ye            160

TOTAL SEMUA GAMBAR: 3680



