### **Imports**

In [None]:
from os.path import join, realpath, dirname, exists, abspath, isfile, isdir
from os import mkdir as mk, name as os_name, getcwd, environ, pathsep, rename, listdir
from typing import Tuple

from mediapipe.python.solutions import drawing_utils as du 
from mediapipe.python.solutions import hands
from google.protobuf.json_format import MessageToDict

from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
import matplotlib.pyplot as plt
from tensorflow.python.keras.utils.all_utils import to_categorical;
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import LSTM, Dense, Conv1D
from tensorflow.python.keras.callbacks import TensorBoard
from tensorflow.python.keras.backend import set_session
from tensorflow.python.keras import optimizers
import tensorflow as tf
import tensorflowjs as tfjs

from numpy import array, zeros, concatenate, save, load, argmax, expand_dims
from uuid import uuid1

import cv2
from cv2 import imread, imshow, imwrite, flip, cvtColor, COLOR_BGR2RGB

### **Definitions**

In [None]:
# Options 💾
MODEL_NAME = 'v9_dan'

MP_MODEL_COMPLEXITY = 0
MP_DETECTION_CONFIDENCE = 0.75
MP_TRACKING_CONFIDENCE = 0.75
MP_NUM_HANDS = 1

SIGNS = [
  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
]

ALL_SIGNS = SIGNS.copy()
ALL_SIGNS.insert(0, 'none')

CLASS_COUNT = len(ALL_SIGNS)
SEQUENCE_LENGHT = 10 # Amount of data per collection

# Paths 📁
ROOT_DIR = getcwd()
MODELS_DIR = join(ROOT_DIR, 'models')
MODEL_DIR = join(MODELS_DIR, MODEL_NAME)
LOG_DIR = join(MODEL_DIR, 'logs')
SAVED_MODEL_PATH = join(MODEL_DIR, 'signs.h5')
EXPORTED_MODEL_DIR = join(MODEL_DIR)
DATA_DIR = join(ROOT_DIR, 'data')
IMAGES_DIR = join(ROOT_DIR, 'images')
COLLECTED_IMAGES_DIR = join(IMAGES_DIR, 'collected')
PREPROCESSED_IMAGES_DIR = join(IMAGES_DIR, 'preprocessed')
PROCCESSED_IMAGES_DIR = join(IMAGES_DIR, 'processed')
REJECTED_IMAGES_DIR = join(IMAGES_DIR, 'rejected')

# Constants 🚧
HAND_LANDMARK_COUNT = 21 # https://mediapipe.dev/images/mobile/hand_landmarks.png
HAND_LANDMARK_POINTS = HAND_LANDMARK_COUNT * 3 # (x, y, z)

# Util 📐
def mkdir(path: str):
  if not exists(path):
    mk(path)
  else:
    print(f'{path} already exists!')

def dir_exists(dir_path: str) -> bool:
  return exists(dir_path) and isdir(dir_path)


### **Mediapipe Util**

In [None]:
def draw_landmarks(image, results):
  if not results.multi_hand_landmarks: return image
  
  hand_landmarks = results.multi_hand_landmarks
  
  for point in hand_landmarks:
    du.draw_landmarks(
      image, point, hands.HAND_CONNECTIONS, 
      du.DrawingSpec(color=(119, 252, 3), thickness=1, circle_radius=2), 
      du.DrawingSpec(color=(119, 252, 3), thickness=1, circle_radius=2)
    )
    
def draw_img_landmarks(image, hand_landmarks):
  for point in hand_landmarks:
    du.draw_landmarks(
      image, point, hands.HAND_CONNECTIONS, 
      du.DrawingSpec(color=(224,0,0), thickness=32, circle_radius=5), # points
      du.DrawingSpec(color=(0,0,224), thickness=32, circle_radius=5) # edges
    )

def mediapipe_detection(image, hands: hands.Hands):
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # COLOR CONVERSION BGR 2 RGB
  image.flags.writeable = False                   # Image is no longer writeable
  results = hands.process(image)                  # Make prediction
  image.flags.writeable = True                    # Image is now writeable
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # COLOR COVERSION RGB 2 BGR
  return image, results

def extract_keypoints_rh(results):
  if not results.multi_hand_landmarks:
    return zeros(HAND_LANDMARK_POINTS)
  
  landmarks = MessageToDict(results.multi_hand_landmarks[0])['landmark']
  res = []
  
  for lk in landmarks:
    res.append(lk['x'])
    res.append(lk['y'])
    res.append(lk['z'])
    
  return array(res)

def get_handedness(results):
  return MessageToDict(results.multi_handedness[0])['classification'][0]['label']

### **Model Utils**

#### *Model Topologies*

In [None]:
def model_0(input_shape: Tuple[int, int]) -> Sequential:
  model = Sequential()
  model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=input_shape))
  model.add(LSTM(128, return_sequences=True, activation='relu'))
  model.add(LSTM(64, return_sequences=False, activation='relu'))
  model.add(Dense(64, activation='relu'))
  model.add(Dense(32, activation='relu'))
  model.add(Dense(CLASS_COUNT, activation='softmax'))
  return model

def model_1(input_shape: Tuple[int, int]) -> Sequential:
  model = Sequential()
  model.add(LSTM(32, return_sequences=True, activation='relu', input_shape=input_shape))
  model.add(LSTM(64, return_sequences=True, activation='relu'))
  model.add(LSTM(128, return_sequences=False, activation='relu'))
  model.add(Dense(128, activation='relu'))
  model.add(Dense(64, activation='relu'))
  model.add(Dense(CLASS_COUNT, activation='softmax'))
  return model

### **Capture w/Mediapipe**

In [None]:
cap = cv2.VideoCapture(0)

selected_sign_index = 0
selected_sign = SIGNS[selected_sign_index]
selected_sign_amount = len(listdir(join(DATA_DIR, selected_sign)))

def select_next_sign():
  global selected_sign, selected_sign_index, selected_sign_amount
  if selected_sign_index < len(SIGNS) - 1:
    selected_sign_index += 1
    selected_sign = SIGNS[selected_sign_index]
    selected_sign_amount = len(listdir(join(DATA_DIR, selected_sign)))

def select_prev_sign():
  global selected_sign, selected_sign_index, selected_sign_amount
  if selected_sign_index > 0:
    selected_sign_index -= 1
    selected_sign = SIGNS[selected_sign_index]
    selected_sign_amount = len(listdir(join(DATA_DIR, selected_sign)))

def capture(results):
  global selected_sign, selected_sign_amount
  if not results.multi_hand_landmarks:
    print('no hand available')
    return
  if get_handedness(results) == 'Left':
    print('must use right hand')
    return

  keypoints = extract_keypoints_rh(results)
  data_path = join(DATA_DIR, selected_sign, f'{selected_sign}.{uuid1()}')
  save(data_path, keypoints)
  print(f'saved at {data_path}')
  selected_sign_amount = len(listdir(join(DATA_DIR, selected_sign)))

with hands.Hands(
  model_complexity=MP_MODEL_COMPLEXITY,
  min_detection_confidence=MP_DETECTION_CONFIDENCE,
  min_tracking_confidence=MP_TRACKING_CONFIDENCE,
  max_num_hands=MP_NUM_HANDS
) as mp_hands:
  while cap.isOpened():
    
    success, image = cap.read()
    image = flip(image, 1)
    
    if not success:
      print("Ignoring empty camera frame.")
      continue

    image, results = mediapipe_detection(image, mp_hands)
    draw_landmarks(image, results)
    image = cv2.putText(
      image, f'{selected_sign} | {selected_sign_amount}',
      (32, 32), cv2.FONT_HERSHEY_SIMPLEX, 1, (119, 252, 3), 2, cv2.LINE_AA
    )
      
    # Flip the image horizontally for a selfie-view display.
    imshow('signs', image)
    
    key = cv2.waitKeyEx(1)

    # if key != -1:
    #   print(key)

    if key == 32: # space bar
      capture(results)

    if key == 2555904: # right key
      select_next_sign()

    if key == 2424832: # left key
      select_prev_sign()
      
    if key == 27:
      break
  
cap.release()
cv2.destroyAllWindows()

### **Image Renaming**

In [None]:
def rename_folder_signs(_sign: str):
  SOURCE_SIGN_DIR = join(COLLECTED_IMAGES_DIR, _sign)
  DESTIN_SIGN_DIR = join(PREPROCESSED_IMAGES_DIR, _sign)
  mkdir(DESTIN_SIGN_DIR)

  if not exists(SOURCE_SIGN_DIR): raise Exception(f'make sure {SOURCE_SIGN_DIR} exists!')

  for img_name in listdir(SOURCE_SIGN_DIR):
    src_img_path = join(SOURCE_SIGN_DIR, img_name)
    if isfile(src_img_path):
      dest_img_path = join(DESTIN_SIGN_DIR, f'{_sign}.{uuid1()}.jpg')
      rename(src_img_path, dest_img_path)

In [None]:
# ALL SIGNS
for sign in SIGNS:
  rename_folder_signs(sign)

In [None]:
# SINGLE SIGN
_SIGN = 'k'
rename_folder_signs(_SIGN)

### **Image Data Extraction**

In [None]:
def reject(sign: str, img_name: str):
  rename(
    join(PREPROCESSED_IMAGES_DIR, sign, img_name),
    join(REJECTED_IMAGES_DIR, img_name)
  )
  
def accept(sign: str, img_name: str):
  rename(
    join(PREPROCESSED_IMAGES_DIR, sign, img_name),
    join(PROCCESSED_IMAGES_DIR, img_name)
  )

def extract_data(sign: str):
  SIGN_DIR = join(PREPROCESSED_IMAGES_DIR, sign)
  DATA_SIGN_DIR = join(DATA_DIR, sign)
  if not exists(DATA_SIGN_DIR): mkdir(DATA_SIGN_DIR)
  
  with hands.Hands(
    model_complexity=MP_MODEL_COMPLEXITY,
    min_detection_confidence=MP_DETECTION_CONFIDENCE,
    min_tracking_confidence=MP_TRACKING_CONFIDENCE,
    max_num_hands=1
  ) as mp_hands:
    for img_name in listdir(SIGN_DIR):
      img_path = join(SIGN_DIR, img_name)
      image = flip(imread(img_path), 1)
      _, results = mediapipe_detection(image, mp_hands)
      
      if not results.multi_hand_landmarks:
        print(f'unable to detect any hands for image: {img_name}')
        reject(sign, img_name)
        continue
      else:
        if len(results.multi_handedness) > 1:
          print(f'detecting more than 1 hand for image: {img_name}')
          reject(sign, img_name)
          continue
        else:
          if get_handedness(results) != 'Right':
            print(f'detected hand is not a Right hand for image {img_name}')
            reject(sign, img_name)
            continue
    
      keypoints = extract_keypoints_rh(results)
      data_path = join(DATA_DIR, sign, img_name)
      save(data_path, keypoints)  
      accept(sign, img_name)

In [None]:
# ALL SIGNS
for sign in SIGNS:
  extract_data(sign)

In [None]:
# SIGNLE SIGN
extract_data('a')

### **Train**

##### *Load & Parition partition data*

In [None]:
def get_collection_count():
  data_amounts = []
  for collection_dir in listdir(DATA_DIR):
    if collection_dir == '.gitkeep': continue
    data_amounts.append(len(listdir(join(DATA_DIR, collection_dir))))
      
  return min(data_amounts)

COLLECTION_COUNT = get_collection_count()
print(f'currently using {COLLECTION_COUNT} data points')

# Load Training Data
label_map = { label: num for num, label in enumerate(ALL_SIGNS) }
sequences, labels = [ # Initializing with 'none' sign
  [
    [
      0 for i in range(HAND_LANDMARK_POINTS)
    ] for j in range(SEQUENCE_LENGHT)
  ] for k in range(COLLECTION_COUNT)
], [
  0 for i in range(COLLECTION_COUNT)
]

for sign in SIGNS:
  sign_data_dir = join(DATA_DIR, sign)
  for data_file_name in listdir(sign_data_dir)[:COLLECTION_COUNT]:
    data_path = join(sign_data_dir, data_file_name)
    res = load(data_path)
    window = [res] * SEQUENCE_LENGHT
    sequences.append(window)
    labels.append(label_map[sign])

x = array(sequences)
y = to_categorical(labels).astype(int)

input_shape = (SEQUENCE_LENGHT, HAND_LANDMARK_POINTS)

# Testing!
s_expected = (CLASS_COUNT * COLLECTION_COUNT, SEQUENCE_LENGHT, HAND_LANDMARK_POINTS)
s_result = x.shape
l_expected = (CLASS_COUNT * COLLECTION_COUNT, CLASS_COUNT)
l_result = y.shape
if s_result != s_expected:
  raise Exception(f'WARNING: expected sequence shape `{s_expected}` != from gotten `{s_result}`')
if l_result != l_expected:
  raise Exception(f'WARNING: expected labels shape `{l_expected}` != from gotten `{l_result}`')

print(f'input shape is {input_shape}')

# partitioning train, test, validation data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1 * 1.0)            # 10% test data (1.0 * 0.20 = 0.1 => 10%)
x_train, x_val,  y_train, y_val  = train_test_split(x_train, y_train, test_size=0.1/0.9)  # 10% val  data (0.9 * 0.11 = 0.1 => 10%)

##### *Training*

In [None]:
def get_model(): # Defining model to use
  return model_1((SEQUENCE_LENGHT, HAND_LANDMARK_POINTS))

In [None]:
mkdir(MODEL_DIR)
mkdir(LOG_DIR)
tb_callback = TensorBoard(log_dir=LOG_DIR)
terminal_log_dir = LOG_DIR.replace('\\', '/')

model = get_model()
model.compile(
  optimizer=optimizers.adam_v2.Adam(learning_rate=0.0001),
  loss='categorical_crossentropy',
  metrics=['categorical_accuracy', 'categorical_crossentropy']
)

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(f'[RUN]: tensorboard --logdir={terminal_log_dir}')
model.fit(
  x_train, y_train, validation_data=(x_val, y_val),
  epochs=300, batch_size=256,
  use_multiprocessing=True, workers=4,
  callbacks=[tb_callback],
  steps_per_epoch=64,
  shuffle=True,
)

In [None]:
model.save(SAVED_MODEL_PATH)
del model

##### *Test Model*

In [None]:
# LOAD MODEL 💾
model = get_model()
model.load_weights(SAVED_MODEL_PATH)
print(model.summary())

In [None]:
# POST-TRAINING PREDICIONS ⚡
res = model.predict(x_test)

y_real = []
y_pred = []

for i in range(len(res)):
  y_real.append(argmax(y_test[i]))
  y_pred.append(argmax(res[i]))

y_real = array(y_real)
y_pred = array(y_pred)

In [None]:
# GENERATE CONFUSION MATRIX 😵
cm = confusion_matrix(y_real, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=ALL_SIGNS)
disp.plot(xticks_rotation=75)
disp.figure_.set_size_inches(12.8, 7.2)
disp.figure_.set_dpi(150)
plt.show()

In [None]:
# OVERALL ACC 🎯
total = len(res)
correct_preds = 0

for i in range(total):
  if y_real[i] == y_pred[i]:
    correct_preds += 1

acc_precentage = (correct_preds/total) * 100

print(f'model {MODEL_NAME} has an overall accuracy of {"{:.2f}".format(acc_precentage)}%')

### **Detection**

In [None]:
model = get_model()
model.load_weights(SAVED_MODEL_PATH)

sequence = []
predicted_sign = 'none'
threshold = 0.5

cap = cv2.VideoCapture(0)
with hands.Hands(
  model_complexity=MP_MODEL_COMPLEXITY,
  min_detection_confidence=MP_DETECTION_CONFIDENCE,
  min_tracking_confidence=MP_TRACKING_CONFIDENCE,
  max_num_hands=MP_NUM_HANDS
) as mp_hands:
  while cap.isOpened():
    
    success, image = cap.read()
    image = flip(image, 1)
    
    if not success:
      print("Ignoring empty camera frame.")
      continue

    image, results = mediapipe_detection(image, mp_hands)
    draw_landmarks(image, results)
    
    keypoints = extract_keypoints_rh(results)
    sequence.insert(0, keypoints)
    sequence = sequence[:SEQUENCE_LENGHT]
    
    if len(sequence) == SEQUENCE_LENGHT:
      output = model.predict(expand_dims(sequence, axis=0))[0]
      res = argmax(output)
      print(ALL_SIGNS[res], output[res])
      
    # Flip the image horizontally for a selfie-view display.
    imshow('MediaPipe Hands', image)
    
    key = cv2.waitKey(1)
    if key == ord('q'):
      break
  
cap.release()
cv2.destroyAllWindows()

### **Export Model**

#### *Topology*

In [None]:
model = get_model()
model.load_weights(SAVED_MODEL_PATH)
tfjs.converters.save_keras_model(model, EXPORTED_MODEL_DIR)