### **Imports**

In [1]:
from os.path import join, realpath, dirname, exists, abspath, isfile, isdir
from os import mkdir as mk, name as os_name, getcwd, environ, pathsep, rename, listdir
from typing import Tuple

from mediapipe.python.solutions import drawing_utils as du 
from mediapipe.python.solutions import hands
from google.protobuf.json_format import MessageToDict

from sklearn.model_selection import train_test_split
from tensorflow.python.keras.utils.all_utils import to_categorical;
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import LSTM, Dense, Conv1D
from tensorflow.python.keras.callbacks import TensorBoard
from tensorflow.python.keras.backend import set_session
import tensorflow as tf
import tensorflowjs as tfjs

from numpy import array, zeros, concatenate, save, load, argmax, expand_dims
from uuid import uuid1

import cv2
from cv2 import imread, imshow, imwrite, flip, cvtColor, COLOR_BGR2RGB

### **Definitions**

In [3]:
# Options 💾
MODEL_NAME = 'v3'

MP_MODEL_COMPLEXITY = 0
MP_DETECTION_CONFIDENCE = 0.75
MP_TRACKING_CONFIDENCE = 0.75
MP_NUM_HANDS = 1

SIGNS = [
  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
]

ALL_SIGNS = SIGNS.copy()
ALL_SIGNS.insert(0, 'none')

CLASS_COUNT = len(ALL_SIGNS)
SEQUENCE_LENGHT = 10 # Amount of data per collection

# Paths 📁
ROOT_DIR = getcwd()
MODELS_DIR = join(ROOT_DIR, 'models')
MODEL_DIR = join(MODELS_DIR, MODEL_NAME)
LOG_DIR = join(MODEL_DIR, 'logs')
SAVED_MODEL_PATH = join(MODEL_DIR, 'signs.h5')
EXPORTED_MODEL_DIR = join(MODEL_DIR)
DATA_DIR = join(ROOT_DIR, 'data')
IMAGES_DIR = join(ROOT_DIR, 'images')
COLLECTED_IMAGES_DIR = join(IMAGES_DIR, 'collected')
PREPROCESSED_IMAGES_DIR = join(IMAGES_DIR, 'preprocessed')
PROCCESSED_IMAGES_DIR = join(IMAGES_DIR, 'processed')
REJECTED_IMAGES_DIR = join(IMAGES_DIR, 'rejected')

# Constants 🚧
HAND_LANDMARK_COUNT = 21 # https://mediapipe.dev/images/mobile/hand_landmarks.png
HAND_LANDMARK_POINTS = HAND_LANDMARK_COUNT * 3 # (x, y, z)

# Util 📐
def mkdir(path: str):
  if not exists(path):
    mk(path)
  else:
    print(f'{path} already exists!')

def dir_exists(dir_path: str) -> bool:
  return exists(dir_path) and isdir(dir_path)


### **Mediapipe Util**

In [4]:
def draw_landmarks(image, results):
  if not results.multi_hand_landmarks: return image
  
  hand_landmarks = results.multi_hand_landmarks
  
  for point in hand_landmarks:
    du.draw_landmarks(
      image, point, hands.HAND_CONNECTIONS, 
      du.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
      du.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
    )
    
def draw_img_landmarks(image, hand_landmarks):
  for point in hand_landmarks:
    du.draw_landmarks(
      image, point, hands.HAND_CONNECTIONS, 
      du.DrawingSpec(color=(224,0,0), thickness=32, circle_radius=5), # points
      du.DrawingSpec(color=(0,0,224), thickness=32, circle_radius=5) # edges
    )

def mediapipe_detection(image, hands: hands.Hands):
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # COLOR CONVERSION BGR 2 RGB
  image.flags.writeable = False                   # Image is no longer writeable
  results = hands.process(image)                  # Make prediction
  image.flags.writeable = True                    # Image is now writeable
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # COLOR COVERSION RGB 2 BGR
  return image, results

def extract_keypoints_rh(results):
  if not results.multi_hand_landmarks:
    return zeros(HAND_LANDMARK_POINTS)
  
  landmarks = MessageToDict(results.multi_hand_landmarks[0])['landmark']
  res = []
  
  for lk in landmarks:
    res.append(lk['x'])
    res.append(lk['y'])
    res.append(lk['z'])
    
  return array(res)

def get_handedness(results):
  return MessageToDict(results.multi_handedness[0])['classification'][0]['label']

### **Model Utils**

#### *Model Topologies*

In [5]:
def model_0(input_shape: Tuple[int, int]) -> Sequential:
  model = Sequential()
  model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=input_shape))
  model.add(LSTM(128, return_sequences=True, activation='relu'))
  model.add(LSTM(64, return_sequences=False, activation='relu'))
  model.add(Dense(64, activation='relu'))
  model.add(Dense(32, activation='relu'))
  model.add(Dense(CLASS_COUNT, activation='softmax'))
  return model

def model_1(input_shape: Tuple[int, int]) -> Sequential:
  model = Sequential()
  model.add(LSTM(32, return_sequences=True, activation='relu', input_shape=input_shape))
  model.add(LSTM(64, return_sequences=True, activation='relu'))
  model.add(LSTM(128, return_sequences=False, activation='relu'))
  model.add(Dense(128, activation='relu'))
  model.add(Dense(64, activation='relu'))
  model.add(Dense(CLASS_COUNT, activation='softmax'))
  return model

### **Capture w/Mediapipe**

In [29]:
cap = cv2.VideoCapture(0)
with hands.Hands(
  model_complexity=MP_MODEL_COMPLEXITY,
  min_detection_confidence=MP_DETECTION_CONFIDENCE,
  min_tracking_confidence=MP_TRACKING_CONFIDENCE,
  max_num_hands=MP_NUM_HANDS
) as mp_hands:
  while cap.isOpened():
    
    success, image = cap.read()
    image = flip(image, 1)
    
    if not success:
      print("Ignoring empty camera frame.")
      continue

    image, results = mediapipe_detection(image, mp_hands)
    draw_landmarks(image, results)
      
    # Flip the image horizontally for a selfie-view display.
    imshow('MediaPipe Hands', image)
    
    key = cv2.waitKey(1)
    if key == ord('q'):
      break
  
cap.release()
cv2.destroyAllWindows()

### **Image Renaming**

In [10]:
def rename_folder_signs(_sign: str):
  SOURCE_SIGN_DIR = join(COLLECTED_IMAGES_DIR, _sign)
  DESTIN_SIGN_DIR = join(PREPROCESSED_IMAGES_DIR, _sign)
  mkdir(DESTIN_SIGN_DIR)

  if not exists(SOURCE_SIGN_DIR): raise Exception(f'make sure {SOURCE_SIGN_DIR} exists!')

  for img_name in listdir(SOURCE_SIGN_DIR):
    src_img_path = join(SOURCE_SIGN_DIR, img_name)
    if isfile(src_img_path):
      dest_img_path = join(DESTIN_SIGN_DIR, f'{_sign}.{uuid1()}.jpg')
      rename(src_img_path, dest_img_path)

In [6]:
# ALL SIGNS
for sign in SIGNS:
  rename_folder_signs(sign)

c:\universidad\traductor-lsv-mp\images\preprocessed\a already exists!
c:\universidad\traductor-lsv-mp\images\preprocessed\b already exists!
c:\universidad\traductor-lsv-mp\images\preprocessed\c already exists!
c:\universidad\traductor-lsv-mp\images\preprocessed\d already exists!
c:\universidad\traductor-lsv-mp\images\preprocessed\e already exists!
c:\universidad\traductor-lsv-mp\images\preprocessed\f already exists!
c:\universidad\traductor-lsv-mp\images\preprocessed\g already exists!
c:\universidad\traductor-lsv-mp\images\preprocessed\h already exists!
c:\universidad\traductor-lsv-mp\images\preprocessed\i already exists!
c:\universidad\traductor-lsv-mp\images\preprocessed\j already exists!
c:\universidad\traductor-lsv-mp\images\preprocessed\k already exists!


In [None]:
# SINGLE SIGN
_SIGN = 'k'
rename_folder_signs(_SIGN)

### **Image Data Extraction**

In [15]:
def reject(sign: str, img_name: str):
  rename(
    join(PREPROCESSED_IMAGES_DIR, sign, img_name),
    join(REJECTED_IMAGES_DIR, img_name)
  )
  
def accept(sign: str, img_name: str):
  rename(
    join(PREPROCESSED_IMAGES_DIR, sign, img_name),
    join(PROCCESSED_IMAGES_DIR, img_name)
  )

def extract_data(sign: str):
  SIGN_DIR = join(PREPROCESSED_IMAGES_DIR, sign)
  DATA_SIGN_DIR = join(DATA_DIR, sign)
  if not exists(DATA_SIGN_DIR): mkdir(DATA_SIGN_DIR)
  
  with hands.Hands(
    model_complexity=MP_MODEL_COMPLEXITY,
    min_detection_confidence=MP_DETECTION_CONFIDENCE,
    min_tracking_confidence=MP_TRACKING_CONFIDENCE,
    max_num_hands=1
  ) as mp_hands:
    for img_name in listdir(SIGN_DIR):
      img_path = join(SIGN_DIR, img_name)
      image = flip(imread(img_path), 1)
      _, results = mediapipe_detection(image, mp_hands)
      
      if not results.multi_hand_landmarks:
        print(f'unable to detect any hands for image: {img_name}')
        reject(sign, img_name)
        continue
      else:
        if len(results.multi_handedness) > 1:
          print(f'detecting more than 1 hand for image: {img_name}')
          reject(sign, img_name)
          continue
        else:
          if get_handedness(results) != 'Right':
            print(f'detected hand is not a Right hand for image {img_name}')
            reject(sign, img_name)
            continue
    
      keypoints = extract_keypoints_rh(results)
      data_path = join(DATA_DIR, sign, img_name)
      save(data_path, keypoints)  
      accept(sign, img_name)

In [18]:
# ALL SIGNS
for sign in SIGNS:
  extract_data(sign)

unable to detect any hands for image: f.8b3931a6-9e8f-11ed-b17b-1c872c4889a8.jpg
unable to detect any hands for image: g.8d27fe9a-9e8f-11ed-8b03-1c872c4889a8.jpg
unable to detect any hands for image: g.8d30d845-9e8f-11ed-b2cc-1c872c4889a8.jpg
unable to detect any hands for image: i.9185d228-9e8f-11ed-9a04-1c872c4889a8.jpg
unable to detect any hands for image: k.95256bdd-9e8f-11ed-b6d3-1c872c4889a8.jpg


In [None]:
# SIGNLE SIGN
extract_data('a')

### **Train**

##### *Load & Parition partition data*

In [8]:
def get_collection_count():
  data_amounts = []
  for collection_dir in listdir(DATA_DIR):
    if collection_dir == '.gitkeep': continue
    data_amounts.append(len(listdir(join(DATA_DIR, collection_dir))))
      
  return min(data_amounts)

COLLECTION_COUNT = get_collection_count()
print(f'currently using {COLLECTION_COUNT} data points')

# Load Training Data
label_map = { label: num for num, label in enumerate(ALL_SIGNS) }
sequences, labels = [ # Initializing with 'none' sign
  [
    [
      0 for i in range(HAND_LANDMARK_POINTS)
    ] for j in range(SEQUENCE_LENGHT)
  ] for k in range(COLLECTION_COUNT)
], [
  0 for i in range(COLLECTION_COUNT)
]

for sign in SIGNS:
  sign_data_dir = join(DATA_DIR, sign)
  for data_file_name in listdir(sign_data_dir)[:COLLECTION_COUNT]:
    data_path = join(sign_data_dir, data_file_name)
    res = load(data_path)
    window = [res] * SEQUENCE_LENGHT
    sequences.append(window)
    labels.append(label_map[sign])

x = array(sequences)
y = to_categorical(labels).astype(int)

input_shape = (SEQUENCE_LENGHT, HAND_LANDMARK_POINTS)

# Testing!
s_expected = (CLASS_COUNT * COLLECTION_COUNT, SEQUENCE_LENGHT, HAND_LANDMARK_POINTS)
s_result = x.shape
l_expected = (CLASS_COUNT * COLLECTION_COUNT, CLASS_COUNT)
l_result = y.shape
if s_result != s_expected:
  raise Exception(f'WARNING: expected sequence shape `{s_expected}` != from gotten `{s_result}`')
if l_result != l_expected:
  raise Exception(f'WARNING: expected labels shape `{l_expected}` != from gotten `{l_result}`')

print(f'input shape is {input_shape}')
# partitioning train & test data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.05) # 5% test data
# TODO: add more diagnostics

currently using 480 data points
input shape is (10, 63)


##### *Training*

In [9]:
def get_model(): # Defining model to use
  return model_1((SEQUENCE_LENGHT, HAND_LANDMARK_POINTS))

In [None]:
# Logging for TB
mkdir(MODEL_DIR)
mkdir(LOG_DIR)
tb_callback = TensorBoard(log_dir=LOG_DIR)

# Forcing GPU usage
# tf_config = tf.compat.v1.ConfigProto(allow_soft_placement=False)
# tf_config.gpu_options.allow_growth = True
# s = tf.compat.v1.Session(config=tf_config)
# set_session(s)

model = get_model()
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(f'[RUN]: tensorboard --logdir={LOG_DIR}')
model.fit(x_train, y_train, epochs=100, use_multiprocessing=True, workers=4, batch_size=1024, callbacks=[tb_callback])

model.save(SAVED_MODEL_PATH)
del model

# if len(tf.config.list_physical_devices('GPU')) <= 0: raise Exception('no GPU available')

# with tf.device('/gpu:0'):

##### *Test Model*

In [10]:
model = get_model()
model.load_weights(SAVED_MODEL_PATH)

res = model.predict(x_test)

for i in range(len(res)):
  pre = ALL_SIGNS[argmax(res[i])]
  rel = ALL_SIGNS[argmax(y_test[i])]
  if rel != pre:
    print(f'prediction: {pre}, real: {rel}')

prediction: q, real: n
prediction: r, real: u
prediction: r, real: u
prediction: u, real: v
prediction: q, real: t
prediction: u, real: v
prediction: u, real: v
prediction: q, real: n


### **Detection**

In [11]:
model = get_model()
model.load_weights(SAVED_MODEL_PATH)

sequence = []
predicted_sign = 'none'
threshold = 0.5

cap = cv2.VideoCapture(0)
with hands.Hands(
  model_complexity=MP_MODEL_COMPLEXITY,
  min_detection_confidence=MP_DETECTION_CONFIDENCE,
  min_tracking_confidence=MP_TRACKING_CONFIDENCE,
  max_num_hands=MP_NUM_HANDS
) as mp_hands:
  while cap.isOpened():
    
    success, image = cap.read()
    image = flip(image, 1)
    
    if not success:
      print("Ignoring empty camera frame.")
      continue

    image, results = mediapipe_detection(image, mp_hands)
    draw_landmarks(image, results)
    
    keypoints = extract_keypoints_rh(results)
    sequence.insert(0, keypoints)
    sequence = sequence[:SEQUENCE_LENGHT]
    
    if len(sequence) == SEQUENCE_LENGHT:
      output = model.predict(expand_dims(sequence, axis=0))[0]
      res = argmax(output)
      print(ALL_SIGNS[res], output[res])
      
    # Flip the image horizontally for a selfie-view display.
    imshow('MediaPipe Hands', image)
    
    key = cv2.waitKey(1)
    if key == ord('q'):
      break
  
cap.release()
cv2.destroyAllWindows()

none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 1.0
none 0.97728795
none 0.47301954
none 0.16820043
i 0.28457856
i 0.48263204
i 0.48256856
a 0.7304933
a 0.933807
a 0.97840905
a 0.9862487
a 0.9886911
a 0.9900997
a 0.9900684
a 0.99043745
a 0.99165535
a 0.99185514
a 0.99207425
a 0.99212986
a 0.99244434
a 0.9930561
a 0.99330044
a 0.9936069
a 0.99395657
a 0.9938958
a 0.99461234
a 0.99544597
a 0.99617624
a 0.9973099
a 0.9986035
a 0.9990521
a 0.9993212
a 0.99946064
a 0.9995322
a 0.99958366
a 0.9996228
a 0.99966073
a 0.99963176
a 0.9996687
a 0.99969816
a 0.9996369
a 0.9996213
a 0.9994336
i 0.6106169
c 0.80488783
b 0.996031
b 0.9756909
b 0.6630017
p 0.75057423
p 0.8854143
p 0.9265376
p 0.94821995
p 0.9582867
p 0.96688485
p 0.9697056
p 0.9720948
p 0.97383636
p 0.9724134
p 0.9680875

### **Export Model**

#### *Topology*

In [13]:
model = get_model()
model.load_weights(SAVED_MODEL_PATH)
tfjs.converters.save_keras_model(model, EXPORTED_MODEL_DIR)

