In [None]:
!pip install mediapipe

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Bidirectional
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import load_model

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import cv2
import mediapipe as mp

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Data Preparation

In [None]:
from zipfile import ZipFile
file_name = "/content/drive/MyDrive/NSC/Data Set/DataSet LSTM/DataSet LSTM.zip"

with ZipFile(file_name, 'r') as zip:
  zip.extractall()
  print("Done")

In [None]:
actions = np.array(['Hello', 'Hungry', 'Sick', 'Sorry', 'Thank you', 'What', 'When', 'Where', 'Who', 'Why'])
label_map = {label:num for num, label in enumerate(actions)}
label_map

In [None]:
folder = 30
sequence_length = 30
DATA_PATH = os.path.join("/content")
print(DATA_PATH)

In [None]:
sequences, labels = [], []
for action in actions:
  for num in range(folder):
    window = []
    for frame_num in range(sequence_length):
      res = np.load(os.path.join(DATA_PATH, action, str(num), "{}.npy".format(frame_num)))
      window.append(res)
    sequences.append(window)
    labels.append(label_map[action])
  print(action)

In [None]:
X = np.array(sequences)
y = np.array(labels)
X.shape, y.shape

# Modeling

In [None]:
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm

In [None]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30, 258)))
model.add(LSTM(128, input_shape=(30, 258), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(actions.shape[0], activation='softmax'))
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.summary()

In [None]:
from tensorflow.keras.utils import plot_model

plot_model(model, to_file='model-lstm.png', show_shapes=True)

In [None]:
input_dim = X.shape[1]

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30, 258)))
model.add(LSTM(128, input_shape=(30, 258), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(actions.shape[0], activation='softmax'))
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

num_folds = 5
skf = StratifiedKFold(n_splits=num_folds, shuffle=True)

fold = 0
models = []
cos_mat = []
report = []
for train_indices, val_indices in tqdm(skf.split(X, y)):
    fold += 1
    print(f'Fold: {fold}')

    log_dir = os.path.join('Logs_{0}'.format(fold))
    tb_callback = TensorBoard(log_dir=log_dir)

    y_c = to_categorical(y).astype(int)
    X_train, X_val = X[train_indices], X[val_indices]
    y_train, y_val = y_c[train_indices], y_c[val_indices]

    model.fit(X_train, y_train, epochs=150, callbacks=[TensorBoard(log_dir='./log')], validation_data=(X_val, y_val), batch_size=64, verbose=0)

    loss, accuracy = model.evaluate(X_val, y_val, verbose=0)
    print(f'Validation loss: {loss}, Validation accuracy: {accuracy}')

    y_nn = model.predict(X_val)
    models.append(model)
    cos_mat.append(confusion_matrix(actions[y_val.argmax(axis=1)], actions[y_nn.argmax(axis=1)]))
    report.append(classification_report(actions[y_val.argmax(axis=1)], actions[y_nn.argmax(axis=1)]))
    model.reset_states()

In [None]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30, 258)))
model.add(LSTM(128, input_shape=(30, 258), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(actions.shape[0], activation='softmax'))
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.fit(X_train, y_train, epochs=150, validation_data=(X_val, y_val), batch_size=64, verbose=1)

# Evaluation

In [None]:
num_folds = 5
skf = StratifiedKFold(n_splits=num_folds, shuffle=True)

In [None]:
cos_mat = []
models = []

num = 1
for train_indices, val_indices in skf.split(X, y):
  model = load_model('/content/drive/MyDrive/NSC/Model/LSTM/Model_LSTM_{0}.h5'.format(num))
  num += 1
  models.append(model)
  y_c = to_categorical(y).astype(int)
  X_train, X_val = X[train_indices], X[val_indices]
  y_train, y_val = y_c[train_indices], y_c[val_indices]
  y_nn = model.predict(X_val)
  cos_mat.append(confusion_matrix(actions[y_val.argmax(axis=1)], actions[y_nn.argmax(axis=1)]))
  loss, accuracy = model.evaluate(X_val, y_val, verbose=0)
  print(f'Validation loss: {loss}, Validation accuracy: {accuracy}')
  print('')

In [None]:
word = np.array(['Hello', 'Hungry', 'Sick', 'Sorry', 'Thank you', 'What', 'When', 'Where', 'Who', 'Why'])
word

## Model 1

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(cos_mat[0], annot=True, cmap='RdPu')
plt.xticks(range(len(cos_mat[0])), word)
plt.yticks(range(len(cos_mat[0])), word)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
print(report[0])

In [None]:
models[0].save('/content/drive/MyDrive/NSC/Model/LSTM/Model_LSTM_1.h5')

## Model 2

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(cos_mat[1], annot=True, cmap='RdPu')
plt.xticks(range(len(cos_mat[1])), word)
plt.yticks(range(len(cos_mat[1])), word)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
print(report[1])

In [None]:
models[1].save('/content/drive/MyDrive/NSC/Model/LSTM/Model_LSTM_2.h5')

## Model 3

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(cos_mat[2], annot=True, cmap='RdPu')
plt.xticks(range(len(cos_mat[2])), word)
plt.yticks(range(len(cos_mat[2])), word)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
print(report[2])

In [None]:
models[2].save('/content/drive/MyDrive/NSC/Model/LSTM/Model_LSTM_3.h5')

## Model 4

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(cos_mat[3], annot=True, cmap='RdPu')
plt.xticks(range(len(cos_mat[3])), word)
plt.yticks(range(len(cos_mat[3])), word)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
print(report[3])

In [None]:
models[3].save('/content/drive/MyDrive/NSC/Model/LSTM/Model_LSTM_4.h5')

## Model 5

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(cos_mat[4], annot=True, cmap='RdPu')
plt.xticks(range(len(cos_mat[4])), word)
plt.yticks(range(len(cos_mat[4])), word)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
print(report[4])

In [None]:
models[4].save('/content/drive/MyDrive/NSC/Model/LSTM/Model_LSTM_5.h5')

# Deployment

In [None]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             )
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

In [None]:
from google.colab.patches import cv2_imshow

In [None]:
actions = np.array(['Hello', 'Hungry', 'Sick', 'Sorry', 'Thank you', 'What', 'When', 'Where', 'Who', 'Why'])

In [None]:
model = load_model('/content/drive/MyDrive/NSC/Model/Model_LSTM.h5')

## Hello

In [None]:
sequence = []
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
  cap = cv2.VideoCapture("/content/drive/MyDrive/NSC/Data Set/datasets clip/Hello 30 clip/IMG_3830.MOV")
  n = 0
  while True:
    n += 1
    ret, frame = cap.read()
    if ret:
      frame = cv2.resize(frame, (800, 480))
      image, results = mediapipe_detection(frame, holistic)
      draw_styled_landmarks(image, results)
      keypoints = extract_keypoints(results)
      sequence.append(keypoints)
      sequence = sequence[-30:]
      if len(sequence) == 30:
        res = model.predict(np.expand_dims(sequence, axis=0))[0]
        name = actions[np.argmax(res)]
        cal = res[np.argmax(res)] * 100
        if cal <= 50:
          name = "Do not know"
        cv2.putText(image, name, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), 2)
        cv2.putText(image, str('%.2f' %(cal)) + " %", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
        cv2_imshow(image)
    if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
      break

cv2.destroyAllWindows()

## Sorry

In [None]:
sequence = []
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
  cap = cv2.VideoCapture("/content/drive/MyDrive/NSC/Data Set/datasets clip/Sorry 30 clip/30.mov")
  n = 0
  while True:
    n += 1
    ret, frame = cap.read()
    if ret:
      frame = cv2.resize(frame, (800, 480))
      image, results = mediapipe_detection(frame, holistic)
      draw_styled_landmarks(image, results)
      keypoints = extract_keypoints(results)
      sequence.append(keypoints)
      sequence = sequence[-30:]
      if len(sequence) == 30:
        res = model.predict(np.expand_dims(sequence, axis=0))[0]
        name = actions[np.argmax(res)]
        cal = res[np.argmax(res)] * 100
        if cal <= 50:
          name = "Do not know"
        cv2.putText(image, name, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), 2)
        cv2.putText(image, str('%.2f' %(cal)) + " %", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
        cv2_imshow(image)
    if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
      break

cv2.destroyAllWindows()

## Thank you

In [None]:
sequence = []
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
  cap = cv2.VideoCapture("/content/drive/MyDrive/NSC/Data Set/datasets clip/Thank you 30 clip/IMG_3842.MOV")
  n = 0
  while True:
    n += 1
    ret, frame = cap.read()
    if ret:
      frame = cv2.resize(frame, (800, 480))
      image, results = mediapipe_detection(frame, holistic)
      draw_styled_landmarks(image, results)
      keypoints = extract_keypoints(results)
      sequence.append(keypoints)
      sequence = sequence[-30:]
      if len(sequence) == 30:
        res = model.predict(np.expand_dims(sequence, axis=0))[0]
        name = actions[np.argmax(res)]
        cal = res[np.argmax(res)] * 100
        if cal <= 50:
          name = "Do not know"
        cv2.putText(image, name, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), 2)
        cv2.putText(image, str('%.2f' %(cal)) + " %", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
        cv2_imshow(image)
    if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
      break

cv2.destroyAllWindows()