1. collecting keypoints for training and testing 
2. preprocessing data 
3. build a model and train
4. make predictions 
5. evaluation using confusion matrix and accuracy
6. test in real-time 
7. Tuning 

In [2]:
import mediapipe as mp
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model



In [2]:

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5, max_num_hands=2)


I0000 00:00:1701050992.748238       1 gl_context.cc:344] GL version: 2.1 (2.1 ATI-5.1.35), renderer: AMD Radeon Pro 555X OpenGL Engine


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [3]:

alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

data_path = "/Users/reagan/desktop/AI/AI_ASL/"

In [4]:


cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 450)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 450)

index = 0

while index < len(alphabet):
    letter = alphabet[index]
    letter_path = os.path.join(data_path, letter)
    os.makedirs(letter_path, exist_ok=True)
    print(letter, letter_path)

    count = 0 

    while True:
        ret, frame = cap.read()
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)
        print(results.multi_hand_landmarks)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                          mp_drawing.DrawingSpec(color=(0, 117, 128), thickness=2, circle_radius=4),
                                          mp_drawing.DrawingSpec(color=(53, 101, 77), thickness=2, circle_radius=2)
                                          )

        cv2.imshow('frame', frame)

        key = cv2.waitKey(1)

        if key == ord('q'):
            break
        elif key == ord(' '):
            count = 0
            break  

        elif key == ord('c'):
            image_name = letter + str(count) + '.png'
            image_path = os.path.join(letter_path, image_name)
            cv2.imwrite(image_path, frame)
            print('image_name: ', image_name)
            count += 1

    index += 1

cap.release()
cv2.destroyAllWindows()




A /Users/reagan/desktop/AI/AI_ASL/A
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
B /Users/reagan/desktop/AI/AI_ASL/B
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
C /Users/reagan/desktop/AI/AI_ASL/C
None
None
D /Users/reagan/desktop/AI/AI_ASL/D
None
None
E /Users/reagan/desktop/AI/AI_ASL/E
None
None
F /Users/reagan/desktop/AI/AI_ASL/F
None
None
None
G /Users/reagan/desktop/AI/AI_ASL/G
None
Non

In [44]:
data_path = "/Users/reagan/desktop/AI/archive/ASL_alphabet_train/ASL_alphabet_train/"

In [45]:
from sklearn.preprocessing import LabelEncoder

images = [] 
labels = []

label_encoder = LabelEncoder()

for letter in alphabet:
    letter_path = os.path.join(data_path, letter) #path to each letter
    for file_name in os.listdir(letter_path):
        image_path = os.path.join(letter_path, file_name) #path to each image 
        image = cv2.imread(image_path) #read image
        
        if image is not None:  # check if the image is not empty
            image = cv2.resize(image, (64, 64))  # resize image to 64x64    
            image = image.astype(float) / 255.0  # normalize image
            image = img_to_array(image)          # convert image to numpy array
            images.append(image)
            labels.append(letter)

images = np.array(images) 
labels = np.array(labels)

# Encode the labels
labels = label_encoder.fit_transform(labels)
labels = to_categorical(labels, num_classes=len(alphabet))

print(images.shape)
print(labels.shape)

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)


print(y_train.shape)
print (y_test.shape)


(78000, 64, 64, 3)
(78000, 26)
(62400, 64, 64, 3)
(15600, 64, 64, 3)
(62400, 26)
(15600, 26)
(62400, 26)
(15600, 26)


In [47]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same', input_shape=(64, 64, 3)))  
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(alphabet), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))
accuracy = model.evaluate(X_test, y_test)[1]
print('Accuracy:', accuracy)

model.save('asl_model.h20')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.9964102506637573
INFO:tensorflow:Assets written to: asl_model.h20/assets


INFO:tensorflow:Assets written to: asl_model.h20/assets


In [66]:


model = load_model('asl_model.h20')


def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (64, 64))
        image = image.astype(float) / 255.0
        image = img_to_array(image)
        image = np.expand_dims(image, axis=0)  
        return image
    else:
        return None


new_image_path = '/Users/reagan/desktop/ai/archive/asl_alphabet_test/asl_alphabet_test/H/H_test.jpg'

# Preprocess the image
new_image = preprocess_image(new_image_path)

if new_image is not None:
    
    predictions = model.predict(new_image)
    
    
    predicted_class = np.argmax(predictions)

    
    predicted_letter = label_encoder.inverse_transform([predicted_class])[0]

    print(f"Predicted Class: {predicted_letter}")
else:
    print("Error: Unable to read the new image.")


Predicted Class: H


In [64]:
model  = load_model('asl_model.h20')

cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 450)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 450)

while True:
    ret, frame = cap.read()
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)
    landmarks = []
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                      mp_drawing.DrawingSpec(color=(0, 117, 128), thickness=2, circle_radius=4),
                                      mp_drawing.DrawingSpec(color=(53, 101, 77), thickness=2, circle_radius=2)
                                      )
    
    preprocess_frame = cv2.resize(frame, (64, 64))
    preprocess_frame = preprocess_frame.astype(float) / 255.0
    preprocess_frame = np.expand_dims(preprocess_frame, axis=0)

    predictions = model.predict(preprocess_frame)
    predicted_class = np.argmax(predictions)

    cv2.putText(frame, alphabet[predicted_class], (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)

    cv2.imshow('frame', frame)

    key = cv2.waitKey(1)

    if key == ord('q'):
        break
    elif key == ord(' '):
        break
    elif key == ord('c'):
        image_name = 'test.png'
        image_path = os.path.join(data_path, image_name)
        cv2.imwrite(image_path, frame)
        print('image_name: ', image_name)
        break

cap.release()
cv2.destroyAllWindows()

    




KeyboardInterrupt: 