In [4]:
import os
from collections import defaultdict
from sklearn.model_selection import train_test_split

# Path dasar
base_path_aksarali = r'D:\Proyek Aksara\Datasets'
dataset_paths_aksarali = {
    'aksara_bali': os.path.join(base_path_aksarali, 'aksarali'),
}

# Subfolder yang relevan
relevant_subfolders_aksarali = {
    'aksara_bali': ['ha', 'na', 'ca', 'ra', 'ka',
                    'da', 'ta', 'sa', 'wa', 'la',
                    'ma', 'ga', 'ba', 'nga',
                    'pa', 'ja', 'ya', 'nya']
}

# Fungsi untuk memuat path citra dan labelnya
def load_image_paths_labels(base_path_aksarali, relevant_subfolders):
    image_paths_labels = []
    for label, folder_path in base_path_aksarali.items():
        for subfolder in relevant_subfolders[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.exists(subfolder_path):
                for filename in os.listdir(subfolder_path):
                    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                        image_path = os.path.join(subfolder_path, filename)
                        image_paths_labels.append((image_path, subfolder))
    return image_paths_labels

# Muat data
image_paths_labels = load_image_paths_labels(dataset_paths_aksarali, relevant_subfolders_aksarali)

# Pisahkan path dan label
image_paths, labels = zip(*image_paths_labels)

# Label numeric
label_map = {label: idx for idx, label in enumerate(relevant_subfolders_aksarali['aksara_bali'])}
numeric_labels = [label_map[label] for label in labels]

# Pisahkan data menjadi train dan test (70:30)
train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, numeric_labels, test_size=0.3, stratify=numeric_labels, random_state=42
)


In [5]:
import shutil

# Utility function to split data
def split_data(base_path_aksarali, output_base_path_aksarali, split_ratio=0.7):
    for label, folder_path in base_path_aksarali.items():
        for subfolder in relevant_subfolders_aksarali[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.isdir(subfolder_path):
                files = [f for f in os.listdir(subfolder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
                train_files, test_files = train_test_split(files, test_size=1-split_ratio, random_state=42)

                # Create train and test directories
                train_output_dir = os.path.join(output_base_path_aksarali, 'train', subfolder)
                test_output_dir = os.path.join(output_base_path_aksarali, 'test', subfolder)
                os.makedirs(train_output_dir, exist_ok=True)
                os.makedirs(test_output_dir, exist_ok=True)

                for file in train_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(train_output_dir, file))
                for file in test_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(test_output_dir, file))

# Apply the split
output_base_path_aksarali = r'D:\Proyek Aksara\Datasets_split_aksarali'
split_data(dataset_paths_aksarali, output_base_path_aksarali)


In [6]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Path dataset split
train_dir = os.path.join(output_base_path_aksarali, 'train')
test_dir = os.path.join(output_base_path_aksarali, 'test')

# ImageDataGenerator untuk augmentasi dan preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Buat generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='sparse'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='sparse'
)


Found 3133 images belonging to 18 classes.
Found 1360 images belonging to 18 classes.
