In [1]:
import os
from collections import defaultdict
from sklearn.model_selection import train_test_split

# Path dasar
base_path_aksarawi = r'D:\Proyek Aksara 2'
dataset_paths_aksarawi = {
    'aksara_jawi': os.path.join(base_path_aksarawi, 'Jawi'),
}

# Subfolder yang relevan
relevant_subfolders_aksarawi = {
    'aksara_jawi': ['A (alif)', 'B (ba)', 'C (ca)', 'D (dad [dOd])', 'D (dal)', 'F (fa)', 
                    'G (ga)', 'GH (gain)', 'H (ha)', 'J (jim)', 'K (kaf)', 'KH (kha)', 
                    'L (lam)', 'M (mim)', 'N (nun)', 'NG (nga)', 'NY (nya)', 'P (pa)', 
                    'R (ra,rO)', 'S (sAD [sOD])', 'S (sin)', 'S, th (tsa) [tha]', 'SY (syin)', 
                    'T (ta)', 'T,H (ta marbutah)', 'V (va)', 'W, U, O (wau)', 'Y akhir (ye)', 
                    'Y,I,E (ya)', 'Z (dah) [dhal]']
}

# Fungsi untuk memuat path citra dan labelnya
def load_image_paths_labels(base_path_aksarawi, relevant_subfolders):
    image_paths_labels = []
    for label, folder_path in base_path_aksarawi.items():
        for subfolder in relevant_subfolders[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.exists(subfolder_path):
                for filename in os.listdir(subfolder_path):
                    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                        image_path = os.path.join(subfolder_path, filename)
                        image_paths_labels.append((image_path, subfolder))
    return image_paths_labels

# Muat data
image_paths_labels = load_image_paths_labels(dataset_paths_aksarawi, relevant_subfolders_aksarawi)

# Pisahkan path dan label
image_paths, labels = zip(*image_paths_labels)

# Label numeric
label_map = {label: idx for idx, label in enumerate(relevant_subfolders_aksarawi['aksara_jawi'])}
numeric_labels = [label_map[label] for label in labels]

train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, numeric_labels, test_size=0.2, stratify=numeric_labels, random_state=42
)


In [2]:
import shutil

# Utility function to split data
def split_data(base_path_aksarawi, output_base_path_aksarawi, split_ratio=0.7):
    for label, folder_path in base_path_aksarawi.items():
        for subfolder in relevant_subfolders_aksarawi[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.isdir(subfolder_path):
                files = [f for f in os.listdir(subfolder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
                train_files, test_files = train_test_split(files, test_size=1-split_ratio, random_state=42)

                # Create train and test directories
                train_output_dir = os.path.join(output_base_path_aksarawi, 'train', subfolder)
                test_output_dir = os.path.join(output_base_path_aksarawi, 'test', subfolder)
                os.makedirs(train_output_dir, exist_ok=True)
                os.makedirs(test_output_dir, exist_ok=True)

                for file in train_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(train_output_dir, file))
                for file in test_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(test_output_dir, file))

# Apply the split
output_base_path_aksarawi = r'D:\Proyek Aksara 2\Datasets_split_aksarawi'
split_data(dataset_paths_aksarawi, output_base_path_aksarawi)


In [None]:
import os

# Path dataset split
output_base_path_aksarawi = r'D:\Proyek Aksara 2\Datasets_split_aksarawi'
train_dir = os.path.join(output_base_path_aksarawi, 'train')

# Ambil list nama subfolder di dalam direktori train (ini akan menjadi nama kelas/label)
classes = sorted(os.listdir(train_dir))

# Tampilkan kelas
print("Kelas yang ada dalam dataset:")
for idx, cls in enumerate(classes):
    print(f"{idx + 1}. {cls}")
