# Training a Neural Network Model with Mediapipe Input Data

In [62]:
# Import libs

import os
import cv2
import glob
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.activations import linear, relu, sigmoid

import matplotlib.pyplot as plt
from cvzone.HandTrackingModule import HandDetector

In [89]:
# Mediapipe hand detector for detecting hands from a frame
detector = HandDetector(detectionCon=0.8, maxHands=2)

# Returns medipipe_hands data from a single frame
# Output will be a tuple of 42 values
def extract_data_with_mediapipe(frame):

    hands, _ = detector.findHands(frame)

    if len(hands) == 0:
        raise Exception('No hand found!')

    lmlist = hands[0]['lmList']
    center = hands[0]['center']
    bbox = hands[0]['bbox']
    _, _, W, H = bbox

    data = tuple((x - center[0], y - center[1]) for x, y, _ in lmlist)

    min_x = min(tuple(x for x, _ in data))
    min_y = min(tuple(y for _, y in data))

    data = tuple((x - min_x, y - min_y) for x, y in data)

    # Note: Instead of dividing x by w and y by H, an alterative
    #       approach could be divide x and y by sqrt(w*w + h*h) [TO TRY]
    data = tuple((round((x / W), 3) , round((y / H), 3)) for x, y in data)

    return tuple(val for pair in data for val in pair)


# Generate mediapipe_hands data over entire image set
# using extract_data_with_mediapipe function
# Returns an np array of shape(no. of images, 42)
def generate_mediapipe_data(path, labels=None, shape=(128, 128)):

    # Get a list of all the folders in the directory
    folders = [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))]

    if labels is not None:
        folders = [f for f in folders if f in labels]

    folders.sort()

    # Print the list of folders
    print("Folders_Found = ", folders)

    data, labels = (), ()

    for folder in folders:
        # Get a list of all the image files in the directory (supported file extensions: .jpg, .jpeg, .png, .bmp, .gif, .tiff)
        images_path = glob.glob(os.path.join(path + "/" + folder, '*.jpg'))


        # By default, the color space of the loaded image is BGR (blue, green, red)
        # rather than the typical RGB (red, green, blue) order.
        images = tuple(cv2.imread(image_path) for image_path in images_path)

        # Resizing images to reduce computational load.
        images = tuple(cv2.resize(image, shape) for image in images)

        # print(images[0].shape)

        images_mediapipe = tuple(extract_data_with_mediapipe(image) for image in images)

        data   += images_mediapipe
        labels += tuple(folder * len(images))

    return np.array(data), np.array(labels)


# Encoder for encoding labels from ['0' -> 0, 'A' -> 10, 'K' -> 20, 'Z' -> 35]
def encoder(label):
    if label <= '9':
        return ord(label) - 48
    return ord(label) - 55


def encode_labels(labels):
    t = tuple(encoder(label) for label in labels)
    return np.array(t)

In [106]:
# Loading data

mediapipe_images, labels = generate_mediapipe_data('./Dataset/Indian/', labels=['1', '2', 'C'])
combined_data = np.hstack((mediapipe_images, np.array(labels)[:, np.newaxis]))

print(mediapipe_images.shape)

np.savetxt('mediapipe_data.csv', combined_data, delimiter=',', fmt='%s')

Folders_Found =  ['1', '2', 'C']


KeyboardInterrupt: 

In [107]:

df = pd.read_csv('mediapipe_data.csv', header=None)

# Extract the mediapipe images and labels as numpy arrays
images = df.iloc[:, :-1].values
labels = df.iloc[:, -1].values

y_labels = encode_labels(labels)

print(images.shape)
print(y_labels)


(3600, 42)
[ 1  1  1 ... 12 12 12]


In [100]:
# Setting up an Early Stopper

from keras.callbacks import EarlyStopping, ReduceLROnPlateau

class myCallback(tf.keras.callbacks.Callback):
	def on_epoch_end(self, epoch, logs={}):
		if logs.get('val_auc') > 0.99:
			print('\n Validation accuracy has reached upto 99%\
	so, stopping further training.')
			self.model.stop_training = True

es = EarlyStopping(patience=3,
				monitor='val_auc',
				restore_best_weights=True)

lr = ReduceLROnPlateau(monitor='val_loss',
					patience=2,
					factor=0.5,
					verbose=1)

In [108]:
# Setting up the model parameters


tf.random.set_seed(1234) # for consistent results

model = Sequential(
    [
        tf.keras.Input(shape=(42, )),  # To be adjusted for inputs
        Dense(39, activation='relu'),
        Dense(36, activation='linear')

    ], name = "sign_recognition_model" 
)


model.summary()

Model: "sign_recognition_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_18 (Dense)            (None, 39)                1677      
                                                                 
 dense_19 (Dense)            (None, 36)                1440      
                                                                 
Total params: 3,117
Trainable params: 3,117
Non-trainable params: 0
_________________________________________________________________


In [110]:

# Compiling and training the model

model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
)

history = model.fit(
    images, y_labels,
    epochs=20,
    # callbacks=[es, lr, myCallback()]
)

model.save('isl_model.h5')


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
