In [1]:
import cv2
import numpy as np
import os
import time
import mediapipe as mp

language = 'id'

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

cam_width = 640
cam_height = 480

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False            
    results = model.process(image)            
    image.flags.writeable = True                
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_styled_landmarks(image, results):
    if results.multi_hand_landmarks:
        for index, hand_landmarks in enumerate(results.multi_hand_landmarks):
            if results.multi_handedness[index].classification[0].index == 0:
                mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
                                            mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                                            mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                            ) 
            else:
                mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
                                            mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                                            mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                            )
                
def draw_squares(image, results, name, acc):
    min_x = 1000
    min_y = 1000
    max_x = 0
    max_y = 0
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            for landmark in hand_landmarks.landmark:
                min_x = min(landmark.x, min_x)
                min_y = min(landmark.y, min_y)
                max_x = max(landmark.x, max_x)
                max_y = max(landmark.y, max_y)
    min_x = round(min_x * cam_width)
    min_y = round(min_y * cam_height)
    max_x = round(max_x * cam_width)
    max_y = round(max_y * cam_height)
    min_x -= 20
    min_y -= 20
    max_x += 20
    max_y += 20
    thickness = 3
    cv2.rectangle(image, (min_x - thickness + 1, min_y - 30), (max_x + thickness - 1, min_y), (0, 255, 0), -1)
    cv2.rectangle(image,(min_x, min_y),(max_x, max_y), (0, 255, 0), thickness)
    cv2.putText(image, name + ": " + str(round(acc, 2)), (min_x + 8, min_y - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2, cv2.LINE_AA)
    
def extract_keypoints(results):
    lh = np.zeros(21*3)
    rh = np.zeros(21*3)
    visible = False
    if results.multi_hand_landmarks:
        visible = True
        for index, hand_landmarks in enumerate(results.multi_hand_landmarks):
            if results.multi_handedness[index].classification[0].index == 0:
                lh = np.array([[res.x, res.y, res.z] for res in
                    hand_landmarks.landmark]).flatten()
            else:
                rh = np.array([[res.x, res.y, res.z] for res in
                            hand_landmarks.landmark]).flatten()
    return np.concatenate([lh, rh]), visible

DATA_PATH = os.path.join('Custom Dataset')
actions = np.array([folder for folder in os.listdir(DATA_PATH)])

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(1, 126)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

model.load_weights('Custom.h5')

def showText(image, sentence):
    cv2.rectangle(image, (0,440), (640, 480), (245, 117, 16), -1)
    cv2.putText(image, ' '.join(sentence), (3,470), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

In [2]:
cap = cv2.VideoCapture(0)

cap.set(cv2.CAP_PROP_FRAME_WIDTH, cam_width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, cam_height)

with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, hands)
        
        keypoints, visible = extract_keypoints(results)
        
        if visible:
            draw_styled_landmarks(image, results)
            keypoints = keypoints.reshape(1, keypoints.shape[0])

            startTime = time.time()
            res = model.predict(np.expand_dims(keypoints, axis=0))[0]
            endTime = time.time()
            index = np.argmax(res)
            draw_squares(image, results, actions[index], res[index])
            
        cv2.imshow('OpenCV Feed', image)
        cv2.waitKey(1)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

