In [1]:
import os
from collections import defaultdict
from sklearn.model_selection import train_test_split

# Path dasar
base_path_aksarala = r'D:\Proyek Aksara 2'
dataset_paths_aksarala = {
    'aksara_pallawa': os.path.join(base_path_aksarala, 'pallawa'),
}

# Subfolder yang relevan
relevant_subfolders_aksarala = {
    'aksara_pallawa': ['ba', 'bha', 'ca', 'cha', 'da', 'ḍa', 'dha', 'ḍha', 'ga', 'gha', 'ha', 'ja', 'jha', 
                       'ka', 'kha', 'la', 'ma', 'na', 'ṅa', 'ña', 'ṇa', 'pa', 'pha', 'ra', 'sa', 
                       'śa', 'ṣa', 'ta', 'ṭa', 'tha', 'ṭha', 'va', 'ya']
}

# Fungsi untuk memuat path citra dan labelnya
def load_image_paths_labels(base_path_aksarala, relevant_subfolders):
    image_paths_labels = []
    for label, folder_path in base_path_aksarala.items():
        for subfolder in relevant_subfolders[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.exists(subfolder_path):
                for filename in os.listdir(subfolder_path):
                    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                        image_path = os.path.join(subfolder_path, filename)
                        image_paths_labels.append((image_path, subfolder))
    return image_paths_labels

# Muat data
image_paths_labels = load_image_paths_labels(dataset_paths_aksarala, relevant_subfolders_aksarala)

# Pisahkan path dan label
image_paths, labels = zip(*image_paths_labels)

# Label numeric
label_map = {label: idx for idx, label in enumerate(relevant_subfolders_aksarala['aksara_pallawa'])}
numeric_labels = [label_map[label] for label in labels]

# Pisahkan data menjadi train dan test (70:30)
train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, numeric_labels, test_size=0.2, stratify=numeric_labels, random_state=42
)

import shutil


In [2]:
# Utility function to split data
def split_data(base_path_aksarala, output_base_path_aksarala, split_ratio=0.7):
    for label, folder_path in base_path_aksarala.items():
        for subfolder in relevant_subfolders_aksarala[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.isdir(subfolder_path):
                files = [f for f in os.listdir(subfolder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
                train_files, test_files = train_test_split(files, test_size=1-split_ratio, random_state=42)

                # Create train and test directories
                train_output_dir = os.path.join(output_base_path_aksarala, 'train', subfolder)
                test_output_dir = os.path.join(output_base_path_aksarala, 'test', subfolder)
                os.makedirs(train_output_dir, exist_ok=True)
                os.makedirs(test_output_dir, exist_ok=True)

                for file in train_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(train_output_dir, file))
                for file in test_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(test_output_dir, file))

# Apply the split
output_base_path_aksarala = r'D:\Proyek Aksara 2\Datasets_split_aksarala'
split_data(dataset_paths_aksarala, output_base_path_aksarala)


In [4]:
import os

# Path dataset split
output_base_path_aksarala = r'D:\Proyek Aksara 2\Datasets_split_aksarala'
train_dir = os.path.join(output_base_path_aksarala, 'train')

# Ambil list nama subfolder di dalam direktori train (ini akan menjadi nama kelas/label)
classes = sorted(os.listdir(train_dir))

# Tampilkan kelas
print("Kelas yang ada dalam dataset:")
for idx, cls in enumerate(classes):
    print(f"{idx + 1}. {cls}")


Kelas yang ada dalam dataset:
1. ba
2. bha
3. ca
4. cha
5. da
6. dha
7. ga
8. gha
9. ha
10. ja
11. jha
12. ka
13. kha
14. la
15. ma
16. na
17. pa
18. pha
19. ra
20. sa
21. ta
22. tha
23. va
24. ya
25. ña
26. śa
27. ḍa
28. ḍha
29. ṅa
30. ṇa
31. ṣa
32. ṭa
33. ṭha
