In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image

# Path to your data folder
data_dir = "data"
img_size = (32, 32)

images, labels = [], []

for fname in os.listdir(data_dir):
    if fname.lower().endswith((".png", ".jpg", ".jpeg")):   # accept jpg & png
        label = fname.split(".")[0]   # "001he"
        filepath = os.path.join(data_dir, fname)

        # Load & preprocess
        img = Image.open(filepath).convert("L")  # grayscale
        img = img.resize(img_size)
        img = np.array(img).astype("float32") / 255.0

        images.append(img)
        labels.append(label)

# Convert to arrays
images = np.array(images)[..., None]  # add channel dim
labels = np.array(labels)

# Encode string labels -> integers
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)

print("Total images:", len(images))
print("Unique labels:", len(le.classes_))
print("Sample labels:", le.classes_[:10])  # check first 10

Total images: 37652
Unique labels: 238
Sample labels: ['001he' '002hu' '003hi' '004ha' '005hy' '006hh' '007ho' '008le' '009lu'
 '010li']


In [2]:
le.classes_

array(['001he', '002hu', '003hi', '004ha', '005hy', '006hh', '007ho',
       '008le', '009lu', '010li', '011la', '012ly', '013ll', '014lo',
       '015_h', '016_h', '017_h', '018_h', '019_h', '020_h', '021_h',
       '022me', '023mu', '024mi', '025ma', '026my', '027mm', '028mo',
       '029_s', '030_s', '031_s', '032_s', '033_s', '034_s', '035_s',
       '036re', '037ru', '038ri', '039ra', '040ry', '041rr', '042ro',
       '043se', '044su', '045si', '046sa', '047sy', '048ss', '049so',
       '050_S', '051_S', '052_S', '053_S', '054_S', '055_S', '056_S',
       '057qe', '058qu', '059qi', '060qa', '061qy', '062qq', '063qo',
       '064be', '065bu', '066bi', '067ba', '068by', '069bb', '070bo',
       '071ve', '072vu', '073vi', '074va', '075vy', '076vv', '077vo',
       '078te', '079tu', '080ti', '081ta', '082ty', '083tt', '084to',
       '085Ce', '086Cu', '087Ci', '088Ca', '089Cy', '090CC', '091Co',
       '092He', '093Hu', '094Hi', '095Ha', '096Hy', '097HH', '098Ho',
       '099ne', '100

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    images, labels_encoded, test_size=0.2, random_state=42, stratify=labels_encoded
)

print("Train shape:", X_train.shape, y_train.shape)
print("Test shape:", X_test.shape, y_test.shape)

Train shape: (30121, 32, 32, 1) (30121,)
Test shape: (7531, 32, 32, 1) (7531,)


In [6]:
import os

# Path to your dataset
data_dir = "data"

# Get class names from the dataset folder
class_names = sorted(os.listdir(data_dir))

# Number of classes
num_classes = len(class_names)

print("Number of classes:", num_classes)
print("Sample classes:", class_names[:10])


Number of classes: 37652
Sample classes: ['001he.1.jpg', '001he.10.jpg', '001he.100.jpg', '001he.101.jpg', '001he.102.jpg', '001he.103.jpg', '001he.104.jpg', '001he.105.jpg', '001he.106.jpg', '001he.107.jpg']


In [8]:
num_classes = len(le.classes_)

model = keras.Sequential([
    layers.Input(shape=(32, 32, 1)),
    layers.Conv2D(32, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, activation='relu'),
    layers.Flatten(),
    layers.Dropout(0.4),
    layers.Dense(256, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [11]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=25,
    batch_size=64
)


Epoch 1/25
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - accuracy: 0.9456 - loss: 0.1589 - val_accuracy: 0.8428 - val_loss: 0.6004
Epoch 2/25
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 28ms/step - accuracy: 0.9493 - loss: 0.1472 - val_accuracy: 0.8501 - val_loss: 0.5982
Epoch 3/25
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 29ms/step - accuracy: 0.9529 - loss: 0.1351 - val_accuracy: 0.8518 - val_loss: 0.5784
Epoch 4/25
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - accuracy: 0.9576 - loss: 0.1232 - val_accuracy: 0.8466 - val_loss: 0.6164
Epoch 5/25
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 34ms/step - accuracy: 0.9588 - loss: 0.1215 - val_accuracy: 0.8511 - val_loss: 0.6050
Epoch 6/25
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 34ms/step - accuracy: 0.9584 - loss: 0.1145 - val_accuracy: 0.8559 - val_loss: 0.6014
Epoch 7/25
[1m4

In [13]:
model.save("amharic_cnn.keras")

# Save label encoder classes
np.save("class_names.npy", le.classes_)