In [1]:
import mediapipe as mp
import tensorflow as tf
import tensorflow_addons as tfa

import torch
import torch.nn.functional as F

import pandas as pd

import numpy as np
import cv2
import h5py
import os
import time


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.9.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
mp_holistic = mp.solutions.holistic

ROWS_PER_FRAME = 543
FIXED_FRAMES = 34

RH_IDX = 501
LH_IDX = 522
POSE_IDX = 468
FACE_IDX = 0

lips_UpperOuter = [185, 40, 39, 37, 0, 267, 269, 270, 409]
lips_LowerOuter = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
lips_UpperInner = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308]
lips_LowerInner = [191, 80, 81, 82, 13, 312, 311, 310, 415]
LIPS_IDX = np.concatenate(
    [lips_UpperOuter, lips_LowerOuter, lips_UpperInner, lips_LowerInner]
)

UPPER_BODY_IDX = np.arange(0, 25)

In [3]:
def mp_detection(frame, mp_model):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame.flags.writeable = False
    landmarks = mp_model.process(frame)
    return landmarks

In [4]:
def zsc(data, mean, std):
    return (data - mean) / std

In [5]:
def preprocessing_landmark(landmarks, faceIDX = np.arange(0,468), poseIDX = np.arange(0,33), component = ['face', 'pose', 'right_hand', 'left_hand']):
    with h5py.File('z-score_parameter.h5','r') as hf:
    
        #Face
        if 'face' in component:
            if landmarks.face_landmarks:            
                face = np.array([[landmarks.face_landmarks.landmark[idx].x, 
                                landmarks.face_landmarks.landmark[idx].y, 
                                landmarks.face_landmarks.landmark[idx].z] 
                                for idx in faceIDX])
            else:
                face = np.zeros((len(faceIDX),3))
            
            face = np.array(zsc(
                face.T, np.array(hf.get('face/mean'))[:,faceIDX], np.array(hf.get('face/std'))[:,faceIDX]
            )).T.flatten()
        else:
            face = [None] * (len(faceIDX) * 3)

        #Pose
        if 'pose' in component:
            if landmarks.pose_landmarks:            
                pose = np.array([[landmarks.pose_landmarks.landmark[idx].x, 
                                landmarks.pose_landmarks.landmark[idx].y, 
                                landmarks.pose_landmarks.landmark[idx].z,
                                landmarks.pose_landmarks.landmark[idx].visibility] 
                                for idx in poseIDX])
            else:
                pose = np.zeros((len(poseIDX),4))
            
            pose = np.array(zsc(
                pose.T, np.array(hf.get('pose/mean'))[:,poseIDX], np.array(hf.get('pose/std'))[:,poseIDX]
            )).T.flatten()
        else:
            pose = [None] * (len(poseIDX) * 4)
        
        #Right Hand
        if 'right_hand' in component:
            if landmarks.right_hand_landmarks:            
                rh = np.array([[cord.x, cord.y, cord.z] for cord in landmarks.right_hand_landmarks.landmark])
            else:
                rh = np.zeros((21,3))
            
            rh = np.array(zsc(
                rh.T, np.array(hf.get('right_hand/mean')), np.array(hf.get('right_hand/std'))
            )).T.flatten()
        else:
            rh = [None] * (63)
        
        #Left Hand
        if 'left_hand' in component:
            if landmarks.left_hand_landmarks:            
                lh = np.array([[cord.x, cord.y, cord.z] for cord in landmarks.left_hand_landmarks.landmark])
            else:
                lh = np.zeros((21,3))
            
            lh = np.array(zsc(
                lh.T, np.array(hf.get('left_hand/mean')), np.array(hf.get('left_hand/std'))
            )).T.flatten()
        else:
            lh = [None] * (63)
        
        result = np.concatenate([face,pose,rh,lh])
    return result[result != np.array(None)].astype('float')

In [6]:
decoder = {0: 'Batuk',
           1: 'Demam',
           2: 'Gigi',
           3: 'Kepala',
           4: 'Minum',
           5: 'Obat',
           6: 'Perut',
           7: 'Resep',
           8: 'Sakit'}

In [7]:
lstm = [
    'models1/64/lstm_dset1_0.0001.h5',
    'models3/v2/64/lstm_dset2_0.0001.h5',
    'models1/64/lstm_dset3_0.0001.h5',
    'models3/v2/16/lstm_dset4_0.0001.h5', 
    'models2/32/lstm_dset5_0.0001.h5',
]

bilstm = [
    'models1/32/bilstm_dset1_0.0001.h5',
    'models2/64/bilstm_dset2_0.0001.h5',
    'models3/v2/64/bilstm_dset3_0.0001.h5',
    'models2/32/bilstm_dset4_0.0001.h5',
    'models3/v2/32/bilstm_dset5_0.001.h5',
]

In [33]:
sequence = []
predicts = []
words = ''
th = .15
model = tf.keras.models.load_model(bilstm[3], custom_objects={'Addons>F1Score': tfa.metrics.F1Score(9)})

cap = cv2.VideoCapture(2)

with mp_holistic.Holistic(min_detection_confidence=.5, min_tracking_confidence=.5) as holistic_model:
    
    while cap.isOpened():
        
        ret, frame = cap.read()
        
        mp_results = mp_detection(frame, holistic_model)
        sequence.append(preprocessing_landmark(mp_results, LIPS_IDX, UPPER_BODY_IDX))
        sequence = sequence[-34:]
        
        if len(sequence) == 34: 
            res = model.predict(np.expand_dims(np.array(sequence), axis=0))[0]
            predicts.append(np.argmax(res))
            
            if res[np.argmax(res)] >= th:
                words = decoder[np.argmax(res)]
        
        cv2.putText(frame, words, (20,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3, cv2.LINE_AA)        
        cv2.imshow('SIBI Sign Language', frame)
        
        if cv2.waitKey(10) & 0xFF == ord('q'): break
        
    cap.release()
    cv2.destroyAllWindows()



KeyboardInterrupt: 

: 