In [1]:
import os
from collections import defaultdict
from sklearn.model_selection import train_test_split

# Path dasar
base_path_aksaracong = r'D:\Proyek Aksara 2'
dataset_paths_aksaracong = {
    'aksara_rencong': os.path.join(base_path_aksaracong, 'rencong'),
}

# Subfolder yang relevan
relevant_subfolders_aksaracong = {
    'aksara_rencong': ['a', 'ba', 'ca', 'da', 'ga', 'gha', 'ha', 'ja', 'ka', 'la', 'ma', 'na', 'nga', 'nya',
                       'pa', 'ra', 'sa', 'ta', 'wa', 'ya']
}

# Fungsi untuk memuat path citra dan labelnya
def load_image_paths_labels(base_path_aksaracong, relevant_subfolders):
    image_paths_labels = []
    for label, folder_path in base_path_aksaracong.items():
        for subfolder in relevant_subfolders[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.exists(subfolder_path):
                for filename in os.listdir(subfolder_path):
                    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                        image_path = os.path.join(subfolder_path, filename)
                        image_paths_labels.append((image_path, subfolder))
    return image_paths_labels

# Muat data
image_paths_labels = load_image_paths_labels(dataset_paths_aksaracong, relevant_subfolders_aksaracong)

# Pisahkan path dan label
image_paths, labels = zip(*image_paths_labels)

# Label numeric
label_map = {label: idx for idx, label in enumerate(relevant_subfolders_aksaracong['aksara_rencong'])}
numeric_labels = [label_map[label] for label in labels]

# Pisahkan data menjadi train dan test (70:30)
train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, numeric_labels, test_size=0.3, stratify=numeric_labels, random_state=42
)


In [2]:
import shutil

# Utility function to split data
def split_data(base_path_aksaracong, output_base_path_aksaracong, split_ratio=0.7):
    for label, folder_path in base_path_aksaracong.items():
        for subfolder in relevant_subfolders_aksaracong[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.isdir(subfolder_path):
                files = [f for f in os.listdir(subfolder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
                train_files, test_files = train_test_split(files, test_size=1-split_ratio, random_state=42)

                # Create train and test directories
                train_output_dir = os.path.join(output_base_path_aksaracong, 'train', subfolder)
                test_output_dir = os.path.join(output_base_path_aksaracong, 'test', subfolder)
                os.makedirs(train_output_dir, exist_ok=True)
                os.makedirs(test_output_dir, exist_ok=True)

                for file in train_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(train_output_dir, file))
                for file in test_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(test_output_dir, file))

# Apply the split
output_base_path_aksaracong = r'D:\Proyek Aksara 2\Datasets_split_aksaracong'
split_data(dataset_paths_aksaracong, output_base_path_aksaracong)


In [3]:
import os

# Path dataset split
output_base_path_aksaracong = r'D:\Proyek Aksara 2\Datasets_split_aksaracong'
train_dir = os.path.join(output_base_path_aksaracong, 'train')

# Ambil list nama subfolder di dalam direktori train (ini akan menjadi nama kelas/label)
classes = sorted(os.listdir(train_dir))

# Tampilkan kelas
print("Kelas yang ada dalam dataset:")
for idx, cls in enumerate(classes):
    print(f"{idx + 1}. {cls}")


Kelas yang ada dalam dataset:
1. a
2. ba
3. ca
4. da
5. ga
6. gha
7. ha
8. ja
9. ka
10. la
11. ma
12. na
13. nga
14. nya
15. pa
16. ra
17. sa
18. ta
19. wa
20. ya
