In [1]:
import os
from collections import defaultdict
from sklearn.model_selection import train_test_split

# Path dasar
base_path_aksarantara = r'D:\Proyek Aksara 2'
dataset_paths_aksarantara = {
    'aksara_lontara': os.path.join(base_path_aksarantara, 'lontara'),
}

# Subfolder yang relevan
relevant_subfolders_aksarantara = {
    'aksara_lontara': ['a', 'ba', 'ca', 'da', 'ga', 'ha', 'ja', 'ka', 'la', 'ma', 'mpa', 'na',
                       'nca', 'nga', 'ngka', 'nra', 'nya', 'pa', 'ra', 'sa', 'ta', 'wa', 'ya']
}

# Fungsi untuk memuat path citra dan labelnya
def load_image_paths_labels(base_path_aksarantara, relevant_subfolders):
    image_paths_labels = []
    for label, folder_path in base_path_aksarantara.items():
        for subfolder in relevant_subfolders[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.exists(subfolder_path):
                for filename in os.listdir(subfolder_path):
                    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                        image_path = os.path.join(subfolder_path, filename)
                        image_paths_labels.append((image_path, subfolder))
    return image_paths_labels

# Muat data
image_paths_labels = load_image_paths_labels(dataset_paths_aksarantara, relevant_subfolders_aksarantara)

# Pisahkan path dan label
image_paths, labels = zip(*image_paths_labels)

# Label numeric
label_map = {label: idx for idx, label in enumerate(relevant_subfolders_aksarantara['aksara_lontara'])}
numeric_labels = [label_map[label] for label in labels]

# Pisahkan data menjadi train dan test (70:30)
train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, numeric_labels, test_size=0.2, stratify=numeric_labels, random_state=42
)

In [2]:
import shutil

# Utility function to split data
def split_data(base_path_aksarantara, output_base_path_aksarantara, split_ratio=0.7):
    for label, folder_path in base_path_aksarantara.items():
        for subfolder in relevant_subfolders_aksarantara[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.isdir(subfolder_path):
                files = [f for f in os.listdir(subfolder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
                train_files, test_files = train_test_split(files, test_size=1-split_ratio, random_state=42)

                # Create train and test directories
                train_output_dir = os.path.join(output_base_path_aksarantara, 'train', subfolder)
                test_output_dir = os.path.join(output_base_path_aksarantara, 'test', subfolder)
                os.makedirs(train_output_dir, exist_ok=True)
                os.makedirs(test_output_dir, exist_ok=True)

                for file in train_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(train_output_dir, file))
                for file in test_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(test_output_dir, file))

# Apply the split
output_base_path_aksarantara = r'D:\Proyek Aksara 2\Datasets_split_aksarantara'
split_data(dataset_paths_aksarantara, output_base_path_aksarantara)


In [3]:
import os

# Path dataset split
output_base_path_aksarantara = r'D:\Proyek Aksara 2\Datasets_split_aksarantara'
train_dir = os.path.join(output_base_path_aksarantara, 'train')

# Ambil list nama subfolder di dalam direktori train (ini akan menjadi nama kelas/label)
classes = sorted(os.listdir(train_dir))

# Tampilkan kelas
print("Kelas yang ada dalam dataset:")
for idx, cls in enumerate(classes):
    print(f"{idx + 1}. {cls}")


Kelas yang ada dalam dataset:
1. a
2. ba
3. ca
4. da
5. ga
6. ha
7. ja
8. ka
9. la
10. ma
11. mpa
12. na
13. nca
14. nga
15. ngka
16. nra
17. nya
18. pa
19. ra
20. sa
21. ta
22. wa
23. ya
