In [1]:
import cv2
from cvzone.HandTrackingModule import HandDetector
from cvzone.ClassificationModule import Classifier
import numpy as np
import math

# Initialize video capture
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Unable to open camera.")
    exit()

# Initialize hand detector and classifier(pre trainde model)
detector = HandDetector(maxHands=1)
classifier = Classifier("C:/Users/RAVI/Downloads/Sign language Project/Model/keras_model.h5", "C:/Users/RAVI/Downloads/Sign language Project/Model/labels.txt")

# Constants
offset = 20
imgSize = 300
labels = ["Hello", "iloveyou", "Okay", "Yes", "No", "Thankyou"]

while True:
    # Capture frame
    ret, img = cap.read()
    if not ret:
        print("Error: Failed to capture frame.")
        break

    # Make a copy of the original frame
    imgOutput = img.copy()

    # Find hands in the frame
    hands, img = detector.findHands(img)
    if hands:
        hand = hands[0]
        x, y, w, h = hand['bbox']

        # Create a white image
        imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255

        # Crop and resize the hand region
        imgCrop = img[y - offset:y + h + offset, x - offset:x + w + offset]
        if imgCrop.size == 0:
            print("Warning: Empty cropped image.")
            continue

        aspectRatio = h / w

        if aspectRatio > 1:
            k = imgSize / h
            wCal = math.ceil(k * w)
            imgResize = cv2.resize(imgCrop, (wCal, imgSize))
            wGap = math.ceil((imgSize - wCal) / 2)
            imgWhite[:, wGap:wCal + wGap] = imgResize
        else:
            k = imgSize / w
            hCal = math.ceil(k * h)
            imgResize = cv2.resize(imgCrop, (imgSize, hCal))
            hGap = math.ceil((imgSize - hCal) / 2)
            imgWhite[hGap:hCal + hGap, :] = imgResize

        # Get prediction from the classifier
        prediction, index = classifier.getPrediction(imgWhite, draw=False)
        print(prediction, index)

        # Display labels and bounding box
        cv2.rectangle(imgOutput, (x - offset, y - offset - 70), (x - offset + 400, y - offset + 60 - 50), (0, 255, 0),
                      cv2.FILLED)
        cv2.putText(imgOutput, labels[index], (x, y - 30), cv2.FONT_HERSHEY_COMPLEX, 2, (0, 0, 0), 2)
        cv2.rectangle(imgOutput, (x - offset, y - offset), (x + w + offset, y + h + offset), (0, 255, 0), 4)

        # Display cropped and resized images
        cv2.imshow('ImageCrop', imgCrop)
        cv2.imshow('ImageWhite', imgWhite)

    # Display original image with annotations
    cv2.imshow('Image', imgOutput)

    # Check for exit key
    key = cv2.waitKey(1)
    if key == ord("q"):
        break

# Release the camera and close OpenCV windows
cap.release()
cv2.destroyAllWindows()

[0.0006650722, 0.009682291, 0.92176926, 0.0008103709, 0.047190994, 0.019882023] 2
[0.0007194752, 0.44635797, 0.46850735, 0.01864517, 0.050002027, 0.015768077] 2
[0.003931088, 0.5416661, 0.38465422, 0.0329595, 0.018909069, 0.017880112] 1
[0.0021860811, 0.4604231, 0.46714428, 0.049942423, 0.007245429, 0.013058692] 2
[0.0005381413, 0.18004508, 0.7590178, 0.030710164, 0.0115731815, 0.018115694] 2
[0.0023120674, 0.76488197, 0.21445483, 0.007602436, 0.0048721386, 0.0058765956] 1
[0.001404506, 0.8571792, 0.13008112, 0.008523323, 0.001608934, 0.0012028354] 1
[0.00043221214, 0.34789613, 0.6427619, 0.0068880133, 0.0007054519, 0.0013163617] 2
[0.004601642, 0.0018977403, 3.994351e-06, 0.019897094, 0.10347575, 0.8701238] 5
[0.007291083, 0.0030547378, 0.8586397, 0.09605674, 0.011371174, 0.02358662] 2
[0.044084903, 0.002098694, 0.9145021, 0.023242338, 0.00074271136, 0.01532926] 2
[0.018398177, 0.010521245, 0.95323414, 0.0147537105, 0.00049139396, 0.0026012717] 2
[0.012152, 0.0023111543, 0.9770736, 0.

# Model

In [4]:
# keras_model.h5

# Architecture

In [7]:
Convolutional Neural Networks (CNNs)

# Preprocesing

In [8]:
#Specifically, the HandDetector module is responsible for finding hands in the frame, which involves preprocessing steps like converting the image to grayscale, applying filters, and extracting hand regions.

In [None]:
The first script is for collecting and saving hand gesture images.
The second script detects hand gestures in real-time, classifies them using a pre-trained model, and displays the results.
Both scripts utilize OpenCV for image capture and display, and cvzone for hand detection and gesture classification.
Classifier: Loads a pre-trained model and corresponding labels for gesture classification using Classifier()