In [1]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd

In [2]:
def detection_landmark(img, model):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img.flags.writeable = False
    results = model.process(img)
    img.flags.writeable = True
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    return img, results

In [3]:
def draw_landmark(results, img):
    mp_drawing.draw_landmarks(img, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=2),
                             mp_drawing.DrawingSpec(color=(80, 66, 122), thickness=2, circle_radius=1))
    mp_drawing.draw_landmarks(img, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=2),
                             mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=1))
    mp_drawing.draw_landmarks(img, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=2),
                             mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=1))

In [25]:
# Tạo thư viện mediapipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

camera_id = 0
cap = cv2.VideoCapture(camera_id)

label = "Swinghand"
lm_lst = []
no_frames = 500


with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while len(lm_lst)<no_frames:
        ret, frame = cap.read()
    
        #Detect
        image, results = detection_landmark(frame, holistic)

        #Draw
        draw_landmark(results, image)
        
        #Extract Keypoints
        extract = extract_keypoints(results)
        lm_lst.append(extract)

        cv2.imshow('image', image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    df = pd.DataFrame(lm_lst)
    df.to_csv(label + ".txt")

    cap.release()
    cv2.destroyAllWindows()

In [4]:
def extract_keypoints(results):
    lm = []
    for res in results.pose_landmarks.landmark:
        lm.append(res.x)
        lm.append(res.y)
        lm.append(res.z)
        lm.append(res.visibility)
    if results.left_hand_landmarks:
        for res in results.left_hand_landmarks.landmark:
            lm.append(res.x)
            lm.append(res.y)
            lm.append(res.z)
    else:
        for res in range(21):
            lm.append(0)
            lm.append(0)
            lm.append(0)
    if results.right_hand_landmarks:             
        for res in results.right_hand_landmarks.landmark:
            lm.append(res.x)
            lm.append(res.y)
            lm.append(res.z)
    else:
         for res in range(21):
            lm.append(0)
            lm.append(0)
            lm.append(0)     
                  
    return lm

In [25]:
from keras.layers import LSTM, Dense, Dropout
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

In [26]:
Iloveu = pd.read_csv("Iloveu.txt")
Thanks = pd.read_csv("Thanks.txt")
Swinghand = pd.read_csv("Swinghand.txt")
X = []
y = []

no_tiem_steps = 10

dataset = Iloveu.iloc[:, 1:].values
n_samples= len(dataset)
for i in range(no_tiem_steps, n_samples):
    X.append(dataset[i-no_tiem_steps:i, :])
    y.append(0)
dataset = Thanks.iloc[:, 1:].values
n_samples= len(dataset)
for i in range(no_tiem_steps, n_samples):
    X.append(dataset[i-no_tiem_steps:i, :])
    y.append(1)
dataset = Swinghand.iloc[:, 1:].values
n_samples= len(dataset)
for i in range(no_tiem_steps, n_samples):
    X.append(dataset[i-no_tiem_steps:i, :])
    y.append(2)

In [27]:
encoder = LabelBinarizer()
y = encoder.fit_transform(y)
X, y =np.array(X), np.array(y)
X.shape, y.shape

((1470, 10, 258), (1470, 3))

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [29]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True,input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(units=50))
model.add(Dropout(0.3))
model.add(Dense(units=3, activation="softmax"))

model.compile(optimizer="Adam", metrics=['accuracy'], loss = "categorical_crossentropy")
model.fit(X_train, y_train, epochs=16, validation_data=(X_test, y_test))
model.save("model.h5")

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [39]:
import threading
import keras.models
def detect(lm_lst, model):
    global label
    lm_lst= np.array(lm_lst)
    lm_lst_tensor = np.expand_dims(lm_lst, axis=0)
    action_result= model.predict(lm_lst_tensor)
    if np.max(action_result) > 0.9:
        label = act_dict[np.argmax(action_result)]
    return label

In [46]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

camera_id = 0
cap = cv2.VideoCapture(camera_id)
#load model
model = keras.models.load_model("model.h5")
act_dict= {0:'Iloveu', 1:'Thanks',2:'Swinghand'}
lm_lst = []
label = "..."
no_frames = 0
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while True:
        ret, frame = cap.read()
            
        #Detect
        image, results = detection_landmark(frame, holistic)
            
        if no_frames < 50:
            label = "Wating for detecting..."
           
        else:
            #Draw
            draw_landmark(results, image)

            #Extract Keypoints
            extract = extract_keypoints(results)
            lm_lst.append(extract)
            if len(lm_lst) == 10:
                t1 = threading.Thread(target=detect, args=(lm_lst, model))
                t1.start()
                lm_lst = []

            
        cv2.putText(image, label,
                    (200,100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)

        cv2.imshow('image', image)
        no_frames += 1 
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

