In [2]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import skimage



In [3]:
model = tf.keras.models.load_model('./ASL.h5')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 60, 60, 32)        2432      
                                                                 
 activation (Activation)     (None, 60, 60, 32)        0         
                                                                 
 max_pooling2d (MaxPooling2  (None, 30, 30, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 64)        18496     
                                                                 
 activation_1 (Activation)   (None, 28, 28, 64)        0         
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 14, 14, 64)        0         
 g2D)                                                   

In [4]:
# classifying the labels with the letter they are representing
labels_dict = {
    0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I',
    9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q',
    17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y',
    25: 'Z', 26: 'del', 27: 'nothing', 28: 'space'
}

batch_size = 64
imageSize = 64
target_dims = (imageSize, imageSize, 3)
num_classes = 29

def imagePredTest(imageFile):
    # TEST

    img_file = cv2.imread(imageFile)

    if img_file is not None:
        img_file = skimage.transform.resize(img_file, (imageSize, imageSize, 3))
        img_arr = np.asarray(img_file).reshape((-1, imageSize, imageSize, 3))
    
    pred = model.predict(img_arr)
    y = np.argmax(pred)
    character = labels_dict[y]
    print("Pred:", character)


# imagePredTest(imageFile='./asl_alphabet_random/C.jpeg')

In [10]:
# Define constants
batch_size = 1  # We process one frame at a time
image_size = 64
target_dims = (image_size, image_size, 3)
num_classes = 29

# Initialize Mediapipe Hand module
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1)

# Open the camera
cap = cv2.VideoCapture(0)  # 0 for the default camera

try:
    while cap.isOpened():
        ret, frame = cap.read()

        if not ret:
            break

        
        results = hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Get bounding box coordinates
                bbox = [landmark.x for landmark in hand_landmarks.landmark]  # Modify this based on your needs
                x_min = min(bbox)
                x_max = max(bbox)
                y_min = min(landmark.y for landmark in hand_landmarks.landmark)
                y_max = max(landmark.y for landmark in hand_landmarks.landmark)

                # Draw bounding box
                cv2.rectangle(frame, (int(x_min * frame.shape[1]), int(y_min * frame.shape[0])),
                              (int(x_max * frame.shape[1]), int(y_max * frame.shape[0])), (0, 255, 0), 2)

                # Crop, resize, and preprocess the hand region
                hand_roi = frame[int(y_min * frame.shape[0]):int(y_max * frame.shape[0]),
                                 int(x_min * frame.shape[1]):int(x_max * frame.shape[1])]
                hand_roi = cv2.resize(hand_roi, target_dims[:2])
                hand_roi = skimage.transform.resize(hand_roi, target_dims)
                hand_arr = np.asarray(hand_roi).reshape((-1, *target_dims))

                # Predict using the model
                predictions = model.predict(hand_arr)
                predicted_label_idx = np.argmax(predictions)

                predicted_label = "?"
                if predicted_label_idx in labels_dict:
                    predicted_label = labels_dict[predicted_label_idx]

                # Display the predicted label
                cv2.putText(frame, predicted_label, (int(x_min * frame.shape[1]), int(y_min * frame.shape[0])),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        # Preprocess the frame
        # resized_frame = skimage.transform.resize(frame, target_dims)
        # input_frame = np.expand_dims(resized_frame, axis=0)
        # resized_frame = skimage.transform.resize(frame, target_dims)
        # input_frame = np.asarray(frame).reshape((-1, *target_dims))
        

        # # Predict the label
        # predictions = model.predict(input_frame)
        # predicted_label = np.argmax(predictions)

        # predicted_class = "None"
        # if predicted_label in labels_dict:
        #     predicted_class = labels_dict[predicted_label]

        # Display the predicted label on the frame
        # cv2.putText(frame, predicted_class, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Display the frame
        cv2.imshow('Real-time Gesture Recognition', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    hands.close()

except Exception as e:
    print(e)
    cap.release()
    cv2.destroyAllWindows()
    hands.close()


