In [2]:
import cv2
import os
import numpy as np
from keras.models import load_model

In [3]:
# Load the pre-trained CNN model
model = load_model('C:/Users/ASUS/Documents/Prodigy_Tasks/CNN model_final.keras')
print(model.summary())

None


In [4]:
# Map labels to gestures
gesture_mapping = {
    0: "01_palm", 1: "02_l", 2: "03_fist", 3: "04_fist_moved",
    4: "05_thumb", 5: "06_index", 6: "07_ok", 7: "08_palm_moved",
    8: "09_c", 9: "10_down"
}

# Camera Initialization
cap = cv2.VideoCapture(0)  # 0 --> Default camera

# Background subtractor using KNN to capture the gesture witin the frame
bg_subtractor = cv2.createBackgroundSubtractorKNN(history = 500, dist2Threshold = 400.0, detectShadows = False)

while True:
    # Frame-by-frame capture
    ret, frame = cap.read()

    # Convert frame to the HSV color space
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    # Skin color in HSV range
    lower_skin = np.array([0, 20, 70], dtype = np.uint8)
    upper_skin = np.array([20, 255, 255], dtype = np.uint8)

    # Threshold the HSV image to capture the skin color
    mask_skin = cv2.inRange(hsv, lower_skin, upper_skin)

    # Applying background subtraction
    fg_mask = bg_subtractor.apply(frame)

    # Combining the skin mask with the background subtractor mask
    mask_combined = cv2.bitwise_and(mask_skin, mask_skin, mask = fg_mask)

    # Morphological operations to reduce noise
    kernel = np.ones((5, 5), np.uint8)
    mask_combined = cv2.morphologyEx(mask_combined, cv2.MORPH_OPEN, kernel)
    mask_combined = cv2.morphologyEx(mask_combined, cv2.MORPH_CLOSE, kernel)

    # Apply combined mask to the original frame
    segmented_hand = cv2.bitwise_and(frame, frame, mask = mask_combined)

    # Convert the segmented frame to grayscale
    gray = cv2.cvtColor(segmented_hand, cv2.COLOR_BGR2GRAY)

    # Resize the segmented frame to match the model's input size
    resized_frame = cv2.resize(gray, (150, 150))

    # Reshape the frame to match the input shape of the model
    input_data_arr = np.array(resized_frame)
    input_data = input_data_arr.reshape((1, 150, 150, 1))  # Ensure single channel

    # Model predictions
    prediction = model.predict(input_data)
    predicted_label = np.argmax(prediction)

    print("Raw Prediction:", prediction)

    # Map to the gesture
    predicted_gesture = gesture_mapping[predicted_label]

    # Display frame with the predicted gesture
    cv2.putText(frame, f"Predicted Gesture: {predicted_gesture}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('Hand Gesture Recognition', frame)
    print("Predicted Probabilities:", prediction)

    # Break the loop when 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
Raw Prediction: [[2.1890631e-02 9.7644264e-01 1.8498450e-07 3.9072917e-04 6.8292064e-07
  3.4129882e-06 4.8613646e-10 1.1320933e-03 1.3961997e-04 1.6049574e-08]]
Predicted Probabilities: [[2.1890631e-02 9.7644264e-01 1.8498450e-07 3.9072917e-04 6.8292064e-07
  3.4129882e-06 4.8613646e-10 1.1320933e-03 1.3961997e-04 1.6049574e-08]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Raw Prediction: [[2.4982749e-02 9.7373337e-01 6.1436310e-07 6.3667871e-04 8.0224510e-08
  2.8805839e-06 4.8548809e-10 5.7730917e-04 6.6271496e-05 2.9711364e-08]]
Predicted Probabilities: [[2.4982749e-02 9.7373337e-01 6.1436310e-07 6.3667871e-04 8.0224510e-08
  2.8805839e-06 4.8548809e-10 5.7730917e-04 6.6271496e-05 2.9711364e-08]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
Raw Prediction: [[2.8089264e-01 7.1426564e-01 4.0898814e-07 2.9812299e-03 2.1214304e-07
  2.6567939e-06 4.3608642e-10

In [11]:
# Release camera and close all windows
cap.release()
cv2.destroyAllWindows()