In [2]:
import os
from collections import defaultdict
from sklearn.model_selection import train_test_split

# Path dasar
base_path_aksarunda = r'D:\Proyek Aksara\Datasets'
dataset_paths_aksarunda = {
    'aksara_sunda': os.path.join(base_path_aksarunda, 'aksarunda'),
}

# Subfolder yang relevan
relevant_subfolders_aksarunda = {
    'aksara_sunda': ['ka', 'ga', 'nga', 'ca', 'ja', 'nya',
                     'ta', 'da', 'na', 'pa', 'ba', 'ma',
                     'ya', 'ra', 'la', 'wa', 'sa', 'ha',
                     'fa', 'va', 'qa', 'xa', 'za',
                     'kha', 'sya',
                     'a', 'e', 'eu', 'i', 'o', 'u', 'é']
}

# Fungsi untuk memuat path citra dan labelnya
def load_image_paths_labels(base_path_aksarunda, relevant_subfolders_aksarunda):
    image_paths_labels = []
    for label, folder_path in base_path_aksarunda.items():
        for subfolder in relevant_subfolders_aksarunda[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.exists(subfolder_path):
                for filename in os.listdir(subfolder_path):
                    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                        image_path = os.path.join(subfolder_path, filename)
                        image_paths_labels.append((image_path, subfolder))
    return image_paths_labels

# Muat data
image_paths_labels = load_image_paths_labels(dataset_paths_aksarunda, relevant_subfolders_aksarunda)

# Pisahkan path dan label
image_paths, labels = zip(*image_paths_labels)

# Label numeric
label_map = {label: idx for idx, label in enumerate(relevant_subfolders_aksarunda['aksara_sunda'])}
numeric_labels = [label_map[label] for label in labels]

# Pisahkan data menjadi train dan test (70:30)
train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, numeric_labels, test_size=0.3, stratify=numeric_labels, random_state=42
)


In [3]:
import shutil

# Utility function to split data
def split_data(base_path_aksarunda, output_base_path_aksarunda, split_ratio=0.7):
    for label, folder_path in base_path_aksarunda.items():
        for subfolder in relevant_subfolders_aksarunda[label]:
            subfolder_path = os.path.join(folder_path, subfolder)
            if os.path.isdir(subfolder_path):
                files = [f for f in os.listdir(subfolder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
                train_files, test_files = train_test_split(files, test_size=1-split_ratio, random_state=42)

                # Create train and test directories
                train_output_dir = os.path.join(output_base_path_aksarunda, 'train', subfolder)
                test_output_dir = os.path.join(output_base_path_aksarunda, 'test', subfolder)
                os.makedirs(train_output_dir, exist_ok=True)
                os.makedirs(test_output_dir, exist_ok=True)

                for file in train_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(train_output_dir, file))
                for file in test_files:
                    shutil.copy(os.path.join(subfolder_path, file), os.path.join(test_output_dir, file))

# Apply the split
output_base_path_aksarunda = r'D:\Proyek Aksara\Datasets_split_aksarunda'
split_data(dataset_paths_aksarunda, output_base_path_aksarunda)


In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Path dataset split
train_dir = os.path.join(output_base_path_aksarunda, 'train')
test_dir = os.path.join(output_base_path_aksarunda, 'test')

# ImageDataGenerator untuk augmentasi dan preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Buat generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='sparse'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='sparse'
)


Found 14140 images belonging to 32 classes.
Found 6084 images belonging to 32 classes.


In [5]:
classes = sorted(os.listdir(train_dir))

# Tampilkan kelas
print("Kelas yang ada dalam dataset:")
for idx, cls in enumerate(classes):
    print(f"{idx + 1}. {cls}")


Kelas yang ada dalam dataset:
1. a
2. ba
3. ca
4. da
5. e
6. eu
7. fa
8. ga
9. ha
10. i
11. ja
12. ka
13. kha
14. la
15. ma
16. na
17. nga
18. nya
19. o
20. pa
21. qa
22. ra
23. sa
24. sya
25. ta
26. u
27. va
28. wa
29. xa
30. ya
31. za
32. é


In [6]:
import os

output_base_path_aksarunda = r'D:\Proyek Aksara\Datasets_split_aksarunda'

# Mengumpulkan label dari struktur output
def get_labels_from_output(output_base_path):
    labels = {}
    for split_folder in ['train', 'test']:  # Iterasi melalui folder train dan test
        split_path = os.path.join(output_base_path, split_folder)
        for label in os.listdir(split_path):
            label_path = os.path.join(split_path, label)
            if os.path.isdir(label_path):
                labels[label] = split_folder  # Menyimpan label split (train/test) untuk setiap folder
    return labels

# Memanggil fungsi untuk mendapatkan label dari struktur output
output_labels = get_labels_from_output(output_base_path_aksarunda)

# Menampilkan label untuk setiap subfolder di dalam direktori train atau test
for label, split in output_labels.items():
    print(f"Folder '{label}' di dalam direktori '{split}' dilabeli sebagai: {label}")


Folder 'a' di dalam direktori 'test' dilabeli sebagai: a
Folder 'ba' di dalam direktori 'test' dilabeli sebagai: ba
Folder 'ca' di dalam direktori 'test' dilabeli sebagai: ca
Folder 'da' di dalam direktori 'test' dilabeli sebagai: da
Folder 'e' di dalam direktori 'test' dilabeli sebagai: e
Folder 'eu' di dalam direktori 'test' dilabeli sebagai: eu
Folder 'fa' di dalam direktori 'test' dilabeli sebagai: fa
Folder 'ga' di dalam direktori 'test' dilabeli sebagai: ga
Folder 'ha' di dalam direktori 'test' dilabeli sebagai: ha
Folder 'i' di dalam direktori 'test' dilabeli sebagai: i
Folder 'ja' di dalam direktori 'test' dilabeli sebagai: ja
Folder 'ka' di dalam direktori 'test' dilabeli sebagai: ka
Folder 'kha' di dalam direktori 'test' dilabeli sebagai: kha
Folder 'la' di dalam direktori 'test' dilabeli sebagai: la
Folder 'ma' di dalam direktori 'test' dilabeli sebagai: ma
Folder 'na' di dalam direktori 'test' dilabeli sebagai: na
Folder 'nga' di dalam direktori 'test' dilabeli sebagai: nga

In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


# Path dataset split
train_dir = os.path.join(output_base_path_aksarunda, 'train')
test_dir = os.path.join(output_base_path_aksarunda, 'test')

# ImageDataGenerator untuk augmentasi dan preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Buat generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='sparse'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='sparse'
)

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(256, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(512, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(relevant_subfolders_aksarunda['aksara_sunda']), activation='softmax')  
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#early stopping for avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


# Train model
steps_per_epoch = train_generator.samples // train_generator.batch_size
validation_steps = test_generator.samples // test_generator.batch_size

history = model.fit(
    train_generator,
    epochs=50,
    validation_data=test_generator,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
)

# Evaluasi model
test_loss, test_acc = model.evaluate(test_generator)
print(f'Akurasi pada data pengujian: {test_acc:.2f}')

Found 14140 images belonging to 32 classes.
Found 6084 images belonging to 32 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50


  self._warn_if_super_not_called()


[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m272s[0m 600ms/step - accuracy: 0.0296 - loss: 3.4684 - val_accuracy: 0.0359 - val_loss: 3.4652
Epoch 2/50
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322us/step - accuracy: 0.0625 - loss: 3.4592 - val_accuracy: 0.0000e+00 - val_loss: 3.4710
Epoch 3/50


  self.gen.throw(value)


[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 386ms/step - accuracy: 0.0367 - loss: 3.4652 - val_accuracy: 0.0359 - val_loss: 3.4651
Epoch 4/50
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119us/step - accuracy: 0.0938 - loss: 3.4634 - val_accuracy: 0.0000e+00 - val_loss: 3.4763
Epoch 5/50
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 338ms/step - accuracy: 0.0358 - loss: 3.4656 - val_accuracy: 0.0359 - val_loss: 3.4651
Epoch 6/50
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98us/step - accuracy: 0.0625 - loss: 3.4689 - val_accuracy: 0.0000e+00 - val_loss: 3.4760
Epoch 7/50
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 336ms/step - accuracy: 0.0347 - loss: 3.4656 - val_accuracy: 0.0359 - val_loss: 3.4651
Epoch 8/50
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96us/step - accuracy: 0.0625 - loss: 3.4613 - val_accuracy: 0.0000e+00 - val_loss: 3.4694
Epoch 9/50
