In [1]:
import tensorflow as t
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [6]:
import cv2
import numpy as np
import os
import mediapipe as mp
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils


def mp_process(image_bgr, mp_model):
    image_bgr.flags.writeable = False
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    mp_outputs = mp_model.process(image_rgb)
    image_bgr.flags.writeable = True
    return mp_outputs


def draw_mp_hands_landmarks(image_bgr, mp_hands_outputs):
    if mp_hands_outputs.multi_hand_landmarks:
        image_drawn_landmarks = image_bgr
        for hand_landmarks in mp_hands_outputs.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image_drawn_landmarks,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                None,
                mp_drawing.DrawingSpec(color=(255, 255, 255))
            )
        return image_drawn_landmarks
    else:
        return image_bgr


def get_mp_hands_landmarks(mp_hands_outputs):
    if mp_hands_outputs.multi_hand_landmarks:
        mp_hands_landmarks = np.array([
            [values.x, values.y, values.z] for values in mp_hands_outputs.multi_hand_landmarks[0].landmark
        ]).flatten()
    else:
        mp_hands_landmarks = np.zeros(63)
    return mp_hands_landmarks

data_path = os.path.join('MediaPipeData')
actions = np.array(['A', 'B', 'C'])
n_sequences = 30
sequence_len = 30
label_map = {label: num for num, label in enumerate(actions)}
sequences, labels = [], []
for action in actions:
    for sequence in range(n_sequences):
        window = []
        for frame_num in range(sequence_len):
            res = np.load(os.path.join(data_path, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])      
X = np.array(sequences)
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30, 63)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.load_weights('abc.h5')

colors = [(245, 117, 16), (117, 245, 16), (16, 117, 245)]


def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0, 60 + num * 40), (int(prob * 100), 90 + num * 40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85 + num * 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame

sequence = []
sentence = []
threshold = 0.4
cap = cv2.VideoCapture(0)
with mp_hands.Hands(max_num_hands=1) as hands:
    while cap.isOpened():
        success, image_bgr = cap.read()
        mp_hands_outputs = mp_process(image_bgr, hands)
        image = draw_mp_hands_landmarks(image_bgr, mp_hands_outputs)
        mp_hands_landmarks = get_mp_hands_landmarks(mp_hands_outputs)
        sequence.append(mp_hands_landmarks)
        sequence = sequence[-30:]
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])
            if len(sentence) > 5: 
                sentence = sentence[-5:]    
            image = prob_viz(res, actions, cv2.flip(image, 1), colors)
        cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.namedWindow('LibrasDetection', cv2.WINDOW_NORMAL)
        cv2.resizeWindow('LibrasDetection', 1280, 960)
        cv2.imshow('LibrasDetection', image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
cap.release()
cv2.destroyAllWindows()


