In [1]:
import cv2
import mediapipe as mp
import time
import numpy as np
from tensorflow.keras.models import load_model
def preprocess_hand_region(hand_region):
    # Resize the image to 28x28 pixels
    if len(hand_region.shape) == 2 or hand_region.shape[2] == 1:
        # If it's already grayscale, just resize it
        resized_hand = cv2.resize(hand_region, (28, 28))
    else:
        # If it's not grayscale, resize it and then convert to grayscale
        resized = cv2.resize(hand_region, (28, 28))
        resized_hand = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
    
    # Normalize the image (assuming pixel values are in [0, 255])
    normalized_hand = resized_hand / 255.0
    
    # Reshape the image to add a channel dimension, if your model expects it
    # For a model trained on MNIST-like dataset, it might expect a shape of (28, 28, 1)
    reshaped_hand = np.reshape(normalized_hand, (28, 28, 1))
    batch_hand = np.expand_dims(reshaped_hand, axis=0)
    return batch_hand




In [3]:
#queue to find the right gesture
cap = cv2.VideoCapture(0)
mpHands = mp.solutions.hands
hands = mpHands.Hands(static_image_mode=False,
                      max_num_hands=1,
                      min_detection_confidence=0.5,
                      min_tracking_confidence=0.5)
mpDraw = mp.solutions.drawing_utils

pTime = 0
cTime = 0
ASLModel=load_model('ASLModelV1.h5')

while True:
    success, img = cap.read()
    if not success:
        print("empty camera frame!!!!!")
        continue
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(imgRGB)
    if results.multi_hand_landmarks:
        
        h, w, c = img.shape
        min_x, min_y = w, h
        max_x, max_y = 0, 0
        for id, lm in enumerate(results.multi_hand_landmarks[0].landmark):
            # Convert the normalized position to pixel coordinates
            cx, cy = int(lm.x * w), int(lm.y * h)

            # Update min and max coordinates based on current landmark
            min_x, min_y = min(min_x, cx), min(min_y, cy)
            max_x, max_y = max(max_x, cx), max(max_y, cy)
        
        center_x, center_y = (min_x + max_x) // 2, (min_y + max_y) // 2
        width, height = 1.3*(max_x - min_x), 1.3*(max_y - min_y)
        
        new_min_x, new_min_y = int(center_x - width / 2), int(center_y -  height / 2)
        new_max_x, new_max_y = int(center_x + width / 2), int(center_y +  height / 2)
        cv2.rectangle(img, (new_min_x, new_min_y), (new_max_x, new_max_y), (255, 255, 25), 2)
        if new_min_x < new_max_x and new_min_y < new_max_y:
            hand_region = img[new_min_y:new_max_y, new_min_x:new_max_x]
            if hand_region.size > 0:
                # Preprocess the hand region for the ASL model
                preprocessed_hand_region = preprocess_hand_region(hand_region)
                
                # Predict the ASL gesture
                asl_prediction = ASLModel.predict(preprocessed_hand_region)  # Assuming the model expects a batch
                
                #print(asl_prediction)
                # Display the predicted gesture (modify as needed)
                gesture_name = "Detected Gesture: " + str(np.argmax(asl_prediction))  
                cv2.putText(img, gesture_name, (10, 130), cv2.FONT_HERSHEY_PLAIN, 2, (100, 25, 220), 2)


    cTime = time.time()
    fps = 1/(cTime-pTime)
    pTime = cTime
    cv2.putText(img,str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (255,0,255), 3)
    cv2.imshow("Image", img)
    cv2.waitKey(1)
    time.sleep(0.1)



KeyboardInterrupt: 

: 