In [1]:
import numpy as np
import cv2
import mediapipe as mp
from keras.models import Sequential
from keras.layers import LSTM, Dense, GRU
from tensorflow.keras.optimizers import Adam

In [2]:
# Actions that model predicts
actions = np.array(['hello', 'apa khabar', 'maafkan saya', 'terima kasih', 'selamat tinggal', 'memandu dengan selamat', 'kiri', 'kanan', 'kekalkan perubahan', 'lurus', 'nama', 'saya', 'kamu', 'A', 'B', 'C', 'L', 'O' ])

In [3]:
# Define the GRU & LSTM model architecture
def create_GRU_model(path):
    model = Sequential()
    model.add(GRU(18, activation = "tanh", recurrent_activation = "sigmoid", input_shape=(30, 1662)))
    model.add(Dense(18, activation='softmax'))
    custom_optimizer = Adam(learning_rate=0.001)
    model.compile(loss='categorical_crossentropy', optimizer = custom_optimizer, metrics=['categorical_accuracy'])
    model.load_weights(path)
    return model

def create_LSTM_model(path):
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
    model.add(LSTM(128, return_sequences=True, activation='relu'))
    model.add(LSTM(64, return_sequences=False, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(actions.shape[0], activation='softmax'))
    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    model.load_weights(path)
    return model

In [4]:
# Load model path
model1 = create_GRU_model('GRU_model_150_epochs_1_tuned.h5')
model2 = create_LSTM_model('LSTM_model_150_epochs_1_test.h5')

# Define mediapipe functions
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [1]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, left_hand, right_hand])

In [None]:
# Initialize variables
sequence = []
sentence = ""
current_action_index = 0
looping = False

cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Read feed
        ret, frame = cap.read()
        
        # Make Detection
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw Landmarks
        draw_styled_landmarks(image, results)
        
        # Prediction Logic
        keypoints = extract_keypoints(results)
        sequence.insert(0, keypoints)
        sequence = sequence[:30]
        
        res1 = np.zeros(len(actions))
        res2 = np.zeros(len(actions))
        
        if len(sequence) == 30 and not looping:
            res1 = model1.predict(np.expand_dims(sequence, axis=0))[0]
            res2 = model2.predict(np.expand_dims(sequence, axis=0))[0]
            print(f"Model 1: {actions[np.argmax(res1)]}, Model 2: {actions[np.argmax(res2)]}")

        cv2.rectangle(image, (0, 0), (1280, 60), (245, 117, 16), -1)
        cv2.putText(image, 'Current Action: ' + actions[current_action_index], (3, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        if looping:
            cv2.putText(image, 'Prediction: ' + sentence, (3, 55), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen        
        cv2.imshow('OpenCV Feed', image)
        
        # Break and toggle looping
        key = cv2.waitKey(10)
        if key & 0xFF == ord('q'):
            break
        elif key & 0xFF == ord('p'):
            print("++++++++++++++++++++"*5)
            print("Now print " + actions[current_action_index + 1])
            looping = False
            current_action_index = (current_action_index + 1) % len(actions)
            sentence = ""

    cap.release()
    cv2.destroyAllWindows()