# Using Mediapipe to get hand Landmarks

In [5]:
import cv2
from tensorflow.keras.models import load_model
import numpy as np

In [6]:
def segment(image, threshold=25):
    global bg
    # find the absolute difference between background and current frame
    diff = cv2.absdiff(bg.astype("uint8"), image)

    # threshold the diff image so that we get the foreground
    thresholded = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)[1]

    # get the contours in the thresholded image
    (cnts, _) = cv2.findContours(thresholded.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # return None, if no contours detected
    if len(cnts) == 0:
        return
    else:
        # based on contour area, get the maximum contour which is the hand
        segmented = max(cnts, key=cv2.contourArea)
        return (thresholded, segmented)

In [7]:

def _load_weights():
    try:
        model = load_model("hand_gesture_recog_model.h5")
        print(model.summary())
        # print(model.get_weights())
        # print(model.optimizer)
        return model
    except Exception as e:
        return None


    
def getPredictedClass(model):

    image = cv2.imread('Temp.png')
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.resize(gray_image, (100, 120))

    gray_image = gray_image.reshape(1, 100, 120, 1)

    prediction = model.predict_on_batch(gray_image)

    predicted_class = np.argmax(prediction)
    if predicted_class == 0:
        return "Blank"
    elif predicted_class == 1:
        return "OK"
    elif predicted_class == 2:
        return "Thumbs Up"
    elif predicted_class == 3:
        return "Thumbs Down"
    elif predicted_class == 4:
        return "Punch"
    elif predicted_class == 5:
        return "High Five"


if __name__ == "__main__":
    # initialize accumulated weight
    accumWeight = 0.5

    # get the reference to the webcam
    camera = cv2.VideoCapture(0)

    fps = int(camera.get(cv2.CAP_PROP_FPS))
    # region of interest (ROI) coordinates
    top, right, bottom, left = 10, 350, 225, 590
    # initialize num of frames
    num_frames = 0
    # calibration indicator
    calibrated = False
    model = _load_weights()
    k = 0
    # keep looping, until interrupted
    while (True):
        # get the current frame
        (grabbed, frame) = camera.read()

        # resize the frame
        frame = cv2.resize(frame, (700,700))
        # flip the frame so that it is not the mirror view
        frame = cv2.flip(frame, 1)

        # clone the frame
        clone = frame.copy()

        # get the height and width of the frame
        (height, width) = frame.shape[:2]

        # get the ROI
        roi = frame[top:bottom, right:left]

        # convert the roi to grayscale and blur it
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (7, 7), 0)

        # to get the background, keep looking till a threshold is reached
        # so that our weighted average model gets calibrated
        if num_frames < 30:
            run_avg(gray, accumWeight)
            if num_frames == 1:
                print("[STATUS] please wait! calibrating...")
            elif num_frames == 29:
                print("[STATUS] calibration successfull...")
        else:
            # segment the hand region
            hand = segment(gray)

            # check whether hand region is segmented
            if hand is not None:
                # if yes, unpack the thresholded image and
                # segmented region
                (thresholded, segmented) = hand

                # draw the segmented region and display the frame
                cv2.drawContours(clone, [segmented + (right, top)], -1, (0, 0, 255))

                # count the number of fingers
                # fingers = count(thresholded, segmented)
                if k % (fps / 6) == 0:
                    cv2.imwrite('Temp.png', thresholded)
                    predictedClass = getPredictedClass(model)
                    cv2.putText(clone, str(predictedClass), (70, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

                # show the thresholded image
                cv2.imshow("Thesholded", thresholded)
        k = k + 1
        # draw the segmented hand
        cv2.rectangle(clone, (left, top), (right, bottom), (0, 255, 0), 2)

        # increment the number of frames
        num_frames += 1

        # display the frame with segmented hand
        cv2.imshow("Video Feed", clone)

        # observe the keypress by the user
        keypress = cv2.waitKey(1) & 0xFF

        # if the user pressed "q", then stop looping
        if keypress == ord("q"):
            break

    # free up memory
    camera.release()
    cv2.destroyAllWindows()

NameError: name 'run_avg' is not defined

In [8]:
import cv2
import numpy as np
import time

# Create a VideoCapture object
cap = cv2.VideoCapture(0)

# Variables for background capturing
background = None
capturing_background = True
start_time = time.time()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    if capturing_background and time.time() - start_time < 5:
        # Capture the background in the first 5 seconds
        background = gray.copy()
        cv2.putText(frame, 'Capturing Background...', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    else:
        # Once background is captured, create a mask
        capturing_background = False
        diff = cv2.absdiff(background, gray)
        _, mask = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY)
        mask = cv2.erode(mask, None, iterations=2)
        mask = cv2.dilate(mask, None, iterations=2)

        # Show the mask
        cv2.imshow('Mask', mask)

        # Apply the mask to the frame
        #res = cv2.bitwise_and(frame, frame, mask=mask)

        # Display the resulting frame
        cv2.imshow('Frame', frame)

    # Check for the 'q' key to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the VideoCapture object and close all windows
cap.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 

: 

In [None]:
import cv2
import numpy as np
import time

# Create a VideoCapture object
cap = cv2.VideoCapture(0)

# Variables for background capturing
background = None
capturing_background = True
start_time = time.time()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    if capturing_background and time.time() - start_time < 5:
        # Capture the background in the first 5 seconds
        background = gray.copy()
        cv2.putText(frame, 'Capturing Background...', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    else:
        # Once background is captured, create a mask
        capturing_background = False
        diff = cv2.absdiff(background, gray)
        _, mask = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY)
        mask = cv2.erode(mask, None, iterations=2)
        mask = cv2.dilate(mask, None, iterations=2)

        # Find contours in the mask
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        for contour in contours:
            # Get the bounding box of the contour
            x, y, w, h = cv2.boundingRect(contour)

            # Draw a 512x512 rectangle around the contour if it's large enough
            if w > 256 and h > 256:
                cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

        # Display the mask
        cv2.imshow('Mask', mask)

        # Display the resulting frame
        cv2.imshow('Frame', frame)

    # Check for the 'q' key to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the VideoCapture object and close all windows
cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import numpy as np
import time

# Create a VideoCapture object
cap = cv2.VideoCapture(0)

# Variables for background capturing
background = None
capturing_background = True
start_time = time.time()

# Define the size of the fixed rectangle
rect_width, rect_height = 250, 250

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Get the dimensions of the frame
    height, width = frame.shape[:2]

    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    if capturing_background and time.time() - start_time < 5:
        # Capture the background in the first 5 seconds
        background = gray.copy()
        cv2.putText(frame, 'Capturing Background...', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    else:
        # Once background is captured, create a mask
        capturing_background = False
        diff = cv2.absdiff(background, gray)
        _, mask = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY)
        mask = cv2.erode(mask, None, iterations=2)
        mask = cv2.dilate(mask, None, iterations=2)

        # Calculate the coordinates of the rectangle
        rect_x = int((width - rect_width) / 2)
        rect_y = int((height - rect_height) / 2)

        # Draw the fixed rectangle
        cv2.rectangle(frame, (rect_x, rect_y), (rect_x + rect_width, rect_y + rect_height), (255, 0, 0), 2)

        # Display the mask
        cv2.imshow('Mask', mask)

        # Display the resulting frame
        cv2.imshow('Frame', frame)

    # Check for the 'q' key to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the VideoCapture object and close all windows
cap.release()
cv2.destroyAllWindows()


In [None]:
import mediapipe as mp
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

In [None]:
camera.release()
cv2.destroyAllWindows()

In [2]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
import time

# Load the hand gesture recognition model
model = load_model("D:/sign_language_recognition/hand_gesture_recognition.h5")

# Create a VideoCapture object
cap = cv2.VideoCapture(0)

# Variables for background capturing
background = None
capturing_background = True
start_time = time.time()

# Define the size of the fixed rectangle
rect_width, rect_height = 250, 250

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Get the dimensions of the frame
    height, width = frame.shape[:2]

    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    if capturing_background and time.time() - start_time < 5:
        # Capture the background in the first 5 seconds
        background = gray.copy()
        cv2.putText(frame, 'Capturing Background...', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    else:
        # Once background is captured, create a mask
        capturing_background = False
        diff = cv2.absdiff(background, gray)
        _, mask = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY)
        mask = cv2.erode(mask, None, iterations=2)
        mask = cv2.dilate(mask, None, iterations=2)

        # Extract the region of interest (ROI) from the frame
        roi = mask[rect_y:rect_y+rect_height, rect_x:rect_x+rect_width]

        # Preprocess the ROI for prediction
        roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        roi_resized = cv2.resize(roi_gray, (100, 120))
        roi_reshaped = roi_resized.reshape(1, 100, 120, 1) / 255.0

        # Make a prediction using the model
        prediction = model.predict(roi_reshaped)
        gesture = np.argmax(prediction)
        print(gesture)

        # Display the gesture label on the frame
        cv2.putText(frame, f'Gesture: {gesture}', (rect_x, rect_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Draw the fixed rectangle
        cv2.rectangle(frame, (rect_x, rect_y), (rect_x + rect_width, rect_y + rect_height), (255, 0, 0), 2)

        # Display the mask
        cv2.imshow('Mask', mask)

        # Display the resulting frame
        cv2.imshow('Frame', frame)

    # Check for the 'q' key to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the VideoCapture object and close all windows
cap.release()
cv2.destroyAllWindows()


NameError: name 'rect_y' is not defined

In [3]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
import time

# Load the hand gesture recognition model
model = load_model("D:/sign_language_recognition/hand_gesture_recognition.h5")
image_path = "D:/sign_language_recognition/data/fist/fistq0.jpg"