### Necessary Packages

*   Pandas
*   Numpy
*   Mediapipe
*   OpenCV
*   OS
*   Shutil
*   Random
*   TensorFlow



In [None]:
!pip install mediapipe

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import mediapipe as mp
import cv2
import os
import shutil
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras import layers 
from tensorflow.keras import Model 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.inception_v3 import InceptionV3

### Image Pre-Process

In [None]:
def crop_img (path):
  '''function for detecting and cropping hands from an image.
     if there is no hand detected, it will delete that image.'''

  mp_hands = mp.solutions.hands
  mp_drawing = mp.solutions.drawing_utils

  with mp_hands.Hands(static_image_mode=True) as hands:

    img = cv2.imread(path)
    img = cv2.resize(img, (540, 960))
    height, width, color = img.shape
    results = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

    if results.multi_hand_landmarks != None:

      x_max = 0
      y_max = 0
      x_min = width
      y_min = height
      tol = 25

      for handLandmarks in results.multi_hand_landmarks:
        for landmarks in handLandmarks.landmark:
          x, y = int(landmarks.x * width), int(landmarks.y * height)
          if x > x_max:
              x_max = x
          if x < x_min:
              x_min = x
          if y > y_max:
              y_max = y
          if y < y_min:
              y_min = y

      coor = [max(x_min - tol, 0), min(x_max+ tol, 540), 
              max(y_min - tol, 0), min(y_max + tol, 960)]
      
      x_min, x_max, y_min, y_max = [round(num) for num in coor]

      cropped_img = img[y_min : y_max, x_min : x_max]
      cv2.imwrite(path, cropped_img)
    
    else:
      os.remove(path)
      print(path, 'No landmark found. File Deleted.')

    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [None]:
path = './Dataset'

for folder in os.listdir(path):
  src = os.path.join(path, folder)
  for img in os.listdir(src):
    name = os.path.join(src, img)
    crop_img(name)

In [None]:
def pad (img_path):
  ''' This function will pad and resize the image to (150, 150, 3) shape.'''

  img = cv2.imread(img_path)
  width, length, channel = img.shape
  size = max([width, length])

  top = round((size - width)/2)
  left = round((size - length)/2)
  bottom = top
  right = left

  img = cv2.copyMakeBorder(img, top = top, bottom = bottom,
                           left = left, right = right,
                           borderType = cv2.BORDER_CONSTANT,
                           value = [0, 0, 0])
  
  img = cv2.resize(img, (150, 150))

  cv2.imwrite(img_path, img)

In [None]:
path = './Dataset'

for folders in os.listdir(path):
  for img in os.listdir(os.path.join(path, folders)):
    pad(os.path.join(path, folders, img))

### Splitting Data into Training and Validation Set

In [None]:
os.mkdir('./SplitData')
os.mkdir('./SplitData/train')
os.mkdir('./SplitData/val')

for classes in os.listdir('./Dataset'):
  os.mkdir(os.path.join('./Split/train', classes))
  os.mkdir(os.path.join('./Split/val', classes))

In [None]:
def data_split (train_perc, source, dest_train, dest_val):
  '''Shuffle and split data into training and validation set'''

  for folders in os.listdir(source):
    fol_path = os.path.join(source, folders)
    files = os.listdir(fol_path)
    random.shuffle(files)

    num_of_files = len(files)
    train_idx = round(train_perc * num_of_files)

    for file_ in files[0:train_idx]:
      origin = os.path.join(fol_path, file_)
      dest = os.path.join(dest_train, folders, file_)
      shutil.copy(origin, dest)
    
    for file_ in files[train_idx:]:
      origin = os.path.join(fol_path, file_)
      dest = os.path.join(dest_val, folders, file_)

      shutil.copy(origin, dest)

data_split(0.7)

In [None]:
source = './Dataset'
dest_train = './Split/train'
dest_val = './Split/val'
data_split(0.7)

In [None]:
#Checking the number of training and validation data

train_img = 0
val_img = 0

for classes in os.listdir(dest_train):
    train_img += len(os.listdir(os.path.join(dest_train, classes)))
    val_img += len(os.listdir(os.path.join(dest_val, classes)))

print(train_img)
print(val_img)

### Creating and Training the Model

In [None]:
# Getting Inception V3 pre-trained model
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O ./tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

In [None]:
# Using ImageDataGenerator to generate the training and validation set

base_dir = './Split'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'val')

train_datagen = ImageDataGenerator(rescale = 1./255.,
                                   horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale = 1./255.)

train_generator = train_datagen.flow_from_directory(train_dir,
                                                    batch_size = 20,
                                                    class_mode = 'categorical',
                                                    target_size = (150, 150))

validation_generator =  test_datagen.flow_from_directory(validation_dir,
                                                         batch_size  = 10,
                                                         class_mode  = 'categorical',
                                                         target_size = (150,150))

In [None]:
# Load the Inception V3 Model

local_weight = './tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

pre_trained_model = InceptionV3(input_shape = (150, 150, 3), 
                                include_top = False, 
                                weights = None)

pre_trained_model.load_weights(local_weight)

for layer in pre_trained_model.layers:
  layer.trainable = False

pre_trained_model.summary()

In [None]:
# Creating the Final Model

last_out = pre_trained_model.get_layer('mixed10').output

x = layers.GlobalAveragePooling2D()(last_out)
x = layers.Dropout(0.2)(x)
x = layers.Dense(1024, activation = 'relu')(x)
x = layers.Dense(num_classes, activation = 'softmax')(x)

model = Model(pre_trained_model.input, x)

model.compile(optimizer = Adam(lr = 0.0001),
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

model.summary()

In [None]:
# Creating Callbacks that saves epoch with the highest Validation Accuracy

checkpoint_filepath = './Checkpoint/model_cp.ckpt'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [None]:
#Train the Model

history = model.fit(train_generator,
                    validation_data = validation_generator,
                    epochs = 100,
                    verbose = 2,
                    callbacks = [model_checkpoint_callback])

In [None]:
# Load the saved weight (from the callbacks), evaluate the scores using
# validation set, and save the model

model.load_weights(checkpoint_filepath)

scores = model.evaluate(validation_generator)
print(f'val_loss: {scores[0]}, val_accuracy: {scores[1]}')

model.save('./final_model.h5')

### Results and Confusion Matrix

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()


plt.show()

In [None]:
# Create a label dictionary to convert model prediction result to the word

word_list = []
for word in (os.listdir('./Split/train')):
    word_list.append(word)

word_list.sort()

for index, word in enumerate(word_list):
  label_dict[index] = word
print(label_dict)

In [None]:
def pred(path):
  '''Function for processing and predicting images'''
  img = image.load_img(path, target_size=(150, 150, 3))
  img = image.img_to_array(img)
  img = img/255.0
  img = np.expand_dims(img, axis=0)
  return label_dict[np.argmax(model.predict(img), axis = 1)[0]]

In [None]:
#Creating a Confusion Matrix DataFrame, The columns are actual word, and the rows are predicted words

pd.set_option("display.max_columns", None)

df = pd.DataFrame(columns = label_dict.values(),
                  index = label_dict.values())

df.replace(np.NaN, 0, inplace = True)

path = './Split/val'
for folders in os.listdir(path):
  for imgs in os.listdir(os.path.join(path, folders)):
    y = pred(os.path.join(path, folders, imgs))
    df.loc[y, folders] += 1

df

#### Function for processing an actual Image to match with the model input

In [None]:
def img_convert (path):
  '''Processing images for prediction'''

  mp_hands = mp.solutions.hands
  mp_drawing = mp.solutions.drawing_utils
  
  with mp_hands.Hands(static_image_mode=True) as hands:

    img = cv2.imread(path)
    img = cv2.resize(img, (540, 960))

    height, width, color = img.shape
    results = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

    if results.multi_hand_landmarks != None:

        x_max = 0
        y_max = 0
        x_min = width
        y_min = height
        tol = 25

        for handLandmarks in results.multi_hand_landmarks:
          for landmarks in handLandmarks.landmark:
            x, y = int(landmarks.x * width), int(landmarks.y * height)
            if x > x_max:
                x_max = x
            if x < x_min:
                x_min = x
            if y > y_max:
                y_max = y
            if y < y_min:
                y_min = y 
        
        coor = [max(x_min - tol, 0), min(x_max+ tol, 540), 
                max(y_min - tol, 0), min(y_max + tol, 960)]

        x_min, x_max, y_min, y_max = [round(num) for num in coor]

        cropped_img = img[y_min : y_max, x_min : x_max]

    cv2.waitKey(0)
    cv2.destroyAllWindows()

  width, length, channel = cropped_img.shape

  size = max([width, length])
  top = round((size - width)/2)
  left = round((size - length)/2)
  bottom = top
  right = left

  cropped_img = cv2.copyMakeBorder(cropped_img, top = top, bottom = bottom,
                                   left = left, right = right,
                                   borderType = cv2.BORDER_CONSTANT,
                                   value = [0, 0, 0])
  
  cropped_img = cv2.resize(cropped_img, (300, 300))
  cropped_img = cropped_img / 255
  cropped_img = np.expand_dims(cropped_img, axis=0)
  
  return blackFrame

### Inference

In [None]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
 
capture = cv2.VideoCapture(0)

prediction_pool = np.zeros((25,))
frame_count = 1

with mp_hands.Hands(static_image_mode=False, 
                    min_detection_confidence=0.7, 
                    min_tracking_confidence=0.7, max_num_hands=2) as hands:
    
    while (True):
        ret, frame = capture.read()
        frame = cv2.flip(frame,1)
        
        results = hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        height, width, color = frame.shape
        
        if results.multi_hand_landmarks != None:
            x_max = 0
            y_max = 0
            x_min = width
            y_min = height
            tol = 25
            
            for handLandmarks in results.multi_hand_landmarks:
                
                for landmarks in handLandmarks.landmark:
                    x, y = int(landmarks.x * width), int(landmarks.y * height)
                    if x > x_max:
                        x_max = x
                    if x < x_min:
                        x_min = x
                    if y > y_max:
                        y_max = y
                    if y < y_min:
                        y_min = y
            
            coor = [max(x_min - tol, 0), min(x_max + tol, width), 
                    max(y_min - tol, 0), min(y_max + tol, height)]
            
            x_min, x_max, y_min, y_max = [round(num) for num in coor]
            
            inp_frame = frame[y_min : y_max, x_min : x_max]
            inp_frame = pad_frame(inp_frame)
            max_pred = np.max(model.predict(inp_frame)).squeeze()
            
            if max_pred >= 0.75:
                pred_idx = np.argmax(model.predict(inp_frame), axis = 1)[0]
                prediction_pool[pred_idx] += 1
                frame_count += 1
            
                cv2.putText(frame, label_dict[pred_idx], (round((x_min + x_max)/2), y_min - 10), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)

        cv2.imshow('Test hand', frame)
        if frame_count % 45 == 0:
            result = np.argmax(prediction_pool).squeeze()
            print(label_dict[result], end=' ')
            prediction_pool = np.zeros((25,))
                
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
 
    cv2.destroyAllWindows()
    capture.release()

### Converting to TFLite Model

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open("./Final_model_default.tflite", "wb") as file:
    file.write(tflite_model)