In [49]:
import cv2
import mediapipe as mp
import numpy as np
import os
import time
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.models import save_model, load_model
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt


In [88]:
landmark_data =np.load("landmark_data.npy")
labels = np.load("labels.npy")

In [90]:
landmark_data.shape

(65282, 63)

In [40]:
landmark_data = landmark_data / np.max(landmark_data)

# Encode labels as integers and convert to categorical
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
labels_categorical = to_categorical(labels_encoded)

In [63]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(landmark_data, labels_categorical, test_size=0.2, random_state=42)

In [65]:
# Build 1D CNN model
model = Sequential([
    Conv1D(64, 3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(2),
    Dropout(0.2),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.3),
    Conv1D(128, 3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(2),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.4),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [70]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

es = EarlyStopping(patience = 5, restore_best_weights=True)

# Train the model
model.fit(X_train[..., np.newaxis], y_train, epochs=100, batch_size=32, validation_data=(X_test[..., np.newaxis], y_test), callbacks=es)
#model.save("asl_sign_language_model.h5")
#save_model(model, 'asl_sign_language_model_tf_2.18.keras')

Epoch 1/100
[1m1633/1633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.9556 - loss: 0.1471 - val_accuracy: 0.9828 - val_loss: 0.0564
Epoch 2/100
[1m1633/1633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9545 - loss: 0.1436 - val_accuracy: 0.9875 - val_loss: 0.0467
Epoch 3/100
[1m1633/1633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9542 - loss: 0.1428 - val_accuracy: 0.9889 - val_loss: 0.0451
Epoch 4/100
[1m1633/1633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9541 - loss: 0.1425 - val_accuracy: 0.9897 - val_loss: 0.0409
Epoch 5/100
[1m1633/1633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9566 - loss: 0.1418 - val_accuracy: 0.9878 - val_loss: 0.0481
Epoch 6/100
[1m1633/1633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9515 - loss: 0.1503 - val_accuracy: 0.9894 - val_loss: 0.0418
Epoch 7/1

<keras.src.callbacks.history.History at 0x2e37c7f10>

In [71]:
model

<Sequential name=sequential_2, built=True>

In [91]:
# Load the trained model

model = tf.keras.models.load_model("asl_sign_language_model_tf_2.18.keras")

In [51]:
def predict_image(directory):
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)
    mp_drawing = mp.solutions.drawing_utils

    img = cv2.imread(directory)
    img_rbg =  cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    result = hands.process(img_rbg)

    sequence = []
    sequence_length = 1

    if result.multi_hand_landmarks:
        landmarks = []
        for lm in result.multi_hand_landmarks[0].landmark:
            landmarks.extend([lm.x, lm.y, lm.z])

        # Draw hand landmarks on the frame
        mp_drawing.draw_landmarks(
            img,
            result.multi_hand_landmarks[0],
            mp_hands.HAND_CONNECTIONS
        )

        # Append new frame landmarks to sequence
        sequence.append(landmarks)
        if len(sequence) > sequence_length:
            sequence.pop(0)

        if len(sequence) == sequence_length:
            sequence_input = np.array(sequence).flatten()[np.newaxis, ..., np.newaxis]
            prediction = model.predict(sequence_input)
            predicted_label_index = np.argmax(prediction)
            predicted_label = label_encoder.inverse_transform([predicted_label_index])
            confidence = prediction[0][predicted_label_index]
    return predicted_label


In [53]:
predict_image("raw_data/test_set_pics/A/test_A_1.jpg")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step


I0000 00:00:1730370193.918184 3483198 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro
W0000 00:00:1730370193.929000 3651311 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730370193.953626 3651314 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


array(['A'], dtype='<U5')

In [54]:
predict_image("raw_data/test_set_pics/P/test_P_4.jpg")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


I0000 00:00:1730370201.007991 3483198 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro
W0000 00:00:1730370201.016433 3651400 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730370201.023376 3651400 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


array(['P'], dtype='<U5')

In [55]:
def adjust_brightness_contrast(image, brightness=40, contrast=1.0):
    # Convert to float to prevent clipping
    img = image.astype(np.float32)
    # Adjust brightness and contrast
    img = img * contrast + brightness
    # Clip to keep pixel values between 0 and 255 and convert back to uint8
    img = np.clip(img, 0, 255).astype(np.uint8)
    return img

In [87]:
def evaluate_model(test_data_dir):
        mp_hands = mp.solutions.hands
        hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.7)


        labels1 = []
        landmark_data1 = []

        for letter in os.listdir(test_data_dir):

            letter_dir = os.path.join(test_data_dir, letter)
            for i, img_path in enumerate(os.listdir(letter_dir)):

                img = cv2.imread(os.path.join(letter_dir, img_path))
                img = adjust_brightness_contrast(img, 40, 1)

                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                results = hands.process(img_rgb)


                if results.multi_hand_landmarks:
                    landmarks = []
                    for lm in results.multi_hand_landmarks[0].landmark:
                        landmarks.extend([lm.x, lm.y, lm.z])
                    landmark_data1.append(landmarks)
                    labels1.append(letter)
        landmark_data1 = np.array(landmark_data1)
        labels1 = np.array(labels1)


        # Normalize landmarks between 0 and 1
        landmark_data1 = landmark_data1 / np.max(landmark_data)

        # Encode labels as integers and convert to categorical

        labels_encoded1 = label_encoder.transform(labels1)
        labels_categorical1 = to_categorical(labels_encoded1)
        landmark_data1 = np.reshape(landmark_data1,(-1,63,1))

        evaluation = model.evaluate(landmark_data1,labels_categorical1)   

        return landmark_data1, labels_encoded1, evaluation


In [69]:
#evaluate_model("raw_data/test_set_pics")

I0000 00:00:1730370668.082739 3483198 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro
W0000 00:00:1730370668.133411 3657120 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730370668.143360 3657120 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7292 - loss: 3.1481 


[2.709430456161499, 0.7709923386573792]

In [93]:
a,b,c = evaluate_model("raw_data/test_set_pics")

Context leak detected, msgtracer returned -1
I0000 00:00:1730373657.356748 3483198 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro
W0000 00:00:1730373657.388402 3700820 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730373657.405851 3700820 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7591 - loss: 5.9417


In [85]:
a.shape

(131, 63, 1)

In [95]:
b.shape

(131,)

In [94]:
c

[5.064549922943115, 0.7786259651184082]