#### load the dataset


In [2]:
from PIL import Image
import os
import numpy as np
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

### Methods To Extract Data From Images 

In [3]:
def convert_img_to_binary_array(img):
    #convert image to rgba
    img = img.resize((40, 40))  
    img = img.convert('RGBA')

    #get the pixel values of the image and convert to black and white
    pixels = list(img.getdata())
    binary_array = []
    for pixel in pixels:
        if pixel[0] >= 128 and pixel[1] >= 128 and pixel[2] >= 128:
            binary_array.append(1)
        else:
            binary_array.append(0)
    return binary_array

def add_image_to_X_train(image, X_train):
    # image = modify_image(image)
    # print(image.shape)

    # Resize the image to 40x40 if it's not already
    if image.shape[:2] != (40, 40):
        image = cv2.resize(image, (40, 40), interpolation=cv2.INTER_AREA)
    
    # Convert to grayscale if the image has more than one channel
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Convert to black and white (0 or 255)
    _, bw_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    
    # Normalize to 0 or 1 for the neural network
    normalized_image = (bw_image / 255).astype(np.float32)
    
    # Reshape to add channel dimension
    reshaped_image = normalized_image.reshape(1, 40, 40, 1)

    # Add to X_train
    if X_train is None:
        X_train = reshaped_image
    else:
        X_train = np.vstack((X_train, reshaped_image))
    return X_train

### Collect Data From Dataset

In [4]:
X_train = None
y_train = []

path = "../dataset/TRAIN/"
for folder in os.listdir(path):
    folder_path = os.path.join(path, folder)
    if not os.path.isdir(folder_path):
        continue
    if folder == "27":
        continue
    print(f"Processing folder: {folder}")
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        if os.path.isfile(file_path):
            try:
                img = Image.open(file_path)

                X_train = add_image_to_X_train(np.array(img), X_train)

                y_train.append(int(folder))
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

Processing folder: 0
Processing folder: 1
Processing folder: 10
Processing folder: 11
Processing folder: 12
Processing folder: 13
Processing folder: 14
Processing folder: 15
Processing folder: 16
Processing folder: 17
Processing folder: 18
Processing folder: 19
Processing folder: 2
Processing folder: 20
Processing folder: 21
Processing folder: 22
Processing folder: 23
Processing folder: 24
Processing folder: 25
Processing folder: 26
Processing folder: 3
Processing folder: 4
Processing folder: 5
Processing folder: 6
Processing folder: 7
Processing folder: 8
Processing folder: 9


In [5]:
X_test = None
y_test = []
path = "../dataset/TEST/"
for folder in os.listdir(path):
    folder_path = os.path.join(path, folder)
    if not os.path.isdir(folder_path):
        continue
    if folder == "27":
        continue
    print(f"Processing folder: {folder}")
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        if os.path.isfile(file_path):
            try:
                img = Image.open(file_path)
                # binary_array = convert_img_to_binary_array(img)
                X_test = add_image_to_X_train(np.array(img), X_test)

                y_test.append(int(folder))
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

Processing folder: 0
Processing folder: 1
Processing folder: 10
Processing folder: 11
Processing folder: 12
Processing folder: 13
Processing folder: 14
Processing folder: 15
Processing folder: 16
Processing folder: 17
Processing folder: 18
Processing folder: 19
Processing folder: 2
Processing folder: 20
Processing folder: 21
Processing folder: 22
Processing folder: 23
Processing folder: 24
Processing folder: 25
Processing folder: 26
Processing folder: 3
Processing folder: 4
Processing folder: 5
Processing folder: 6
Processing folder: 7
Processing folder: 8
Processing folder: 9


## Split Data Train&Test

In [6]:
X = np.concatenate((X_train, X_test))
y = np.array(np.concatenate((y_train, y_test)))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Train size: {len(X_train)}")
print(f"Test size: {len(X_test)}")

Train size: 4058
Test size: 1015


## Define The Model

In [7]:

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(40, 40, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(27, activation='softmax')  # 27 for 22 regular + 5 final forms
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Convert Labels To One-Hot Encoding

In [8]:
y_train_one_hot = to_categorical(y_train, 27)
y_test_one_hot = to_categorical(y_test, 27)

## Train The Model

In [13]:
history = model.fit(X_train, y_train_one_hot, 
                    epochs=20, 
                    batch_size=32)

Epoch 1/20
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.8701 - loss: 0.3566
Epoch 2/20
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.8805 - loss: 0.3413
Epoch 3/20
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.8746 - loss: 0.3504
Epoch 4/20
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8869 - loss: 0.3290
Epoch 5/20
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8763 - loss: 0.3413
Epoch 6/20
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8850 - loss: 0.3107
Epoch 7/20
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8749 - loss: 0.3465
Epoch 8/20
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8844 - loss: 0.3160
Epoch 9/20
[1m127/127[0m [32m━━━━━

In [10]:
test_loss, test_acc = model.evaluate(X_test, y_test_one_hot)
print(f'Test accuracy: {test_acc}')

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7464 - loss: 0.9936
Test accuracy: 0.7605911493301392


In [11]:
# save the model
model.save('../models/keras_model.keras')