Installing Libraries

In [29]:
!pip install pyDirectInput
!pip install pyautogui
!pip install tensorflow
!pip install scikit-learn
!pip install mediapipe



Importing Libraries

In [30]:
import csv
import numpy as np
import tensorflow as tf
import copy
import itertools
import cv2 as cv
import mediapipe as mp
import pyautogui
from collections import deque
from sklearn.model_selection import train_test_split

Initialising Inputs

In [31]:
dataset = 'model_helper/train_data.csv'
model_save_path = 'model_helper/gesturesModel.hdf5'
tflite_save_path = 'model_helper/gestureLite.tflite'

NUM_CLASSES = 6

Training of Hand Gestures

In [32]:
train_data = np.loadtxt(dataset, delimiter=',', dtype='float32', usecols=list(range(1, (21 * 2) + 1)))
train_label = np.loadtxt(dataset, delimiter=',', dtype='int32', usecols=(0))
X_train, X_test, y_train, y_test = train_test_split(train_data, train_label, train_size=0.75)

model = tf.keras.models.Sequential([
    tf.keras.layers.Input((21 * 2, )),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
])
model.summary()

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    model_save_path, verbose=1, save_weights_only=False)

es_callback = tf.keras.callbacks.EarlyStopping(patience=20, verbose=1)

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.fit(
    X_train,
    y_train,
    epochs=100,
    batch_size=128,
    validation_data=(X_test, y_test),
    callbacks=[cp_callback, es_callback]
)

val_loss, val_acc = model.evaluate(X_test, y_test, batch_size=128)

model = tf.keras.models.load_model(model_save_path)

predict_result = model.predict(np.array([X_test[0]]))
print(np.squeeze(predict_result))
print(np.argmax(np.squeeze(predict_result)))

Y_pred = model.predict(X_test)
y_pred = np.argmax(Y_pred, axis=1)

model.save(model_save_path, include_optimizer=False)


converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quantized_model = converter.convert()

open(tflite_save_path, 'wb').write(tflite_quantized_model)
interpreter = tf.lite.Interpreter(model_path=tflite_save_path)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.set_tensor(input_details[0]['index'], np.array([X_test[0]]))

interpreter.invoke()
tflite_results = interpreter.get_tensor(output_details[0]['index'])

print(np.squeeze(tflite_results))
print(np.argmax(np.squeeze(tflite_results)))

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dropout_2 (Dropout)         (None, 42)                0         
                                                                 
 dense_3 (Dense)             (None, 20)                860       
                                                                 
 dropout_3 (Dropout)         (None, 20)                0         
                                                                 
 dense_4 (Dense)             (None, 10)                210       
                                                                 
 dense_5 (Dense)             (None, 6)                 66        
                                                                 
Total params: 1,136
Trainable params: 1,136
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
 1/24 [>.............................] -



INFO:tensorflow:Assets written to: C:\Users\lahan\AppData\Local\Temp\tmpaqbn7k5e\assets


INFO:tensorflow:Assets written to: C:\Users\lahan\AppData\Local\Temp\tmpaqbn7k5e\assets


[9.7078192e-01 1.2624540e-02 3.2500820e-06 3.9385886e-05 1.8640583e-04
 1.6364533e-02]
0


Defining Gesture Class

In [33]:
class gestureClassifier(object):
    def __init__(self, model_path='model_helper/gestureLite.tflite',num_threads=1,):
        self.interpreter = tf.lite.Interpreter(model_path=model_path, num_threads=num_threads)
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

    def __call__(self, landmark_list,):
        input_details_tensor_index = self.input_details[0]['index']
        self.interpreter.set_tensor(input_details_tensor_index, np.array([landmark_list], dtype=np.float32))
        self.interpreter.invoke()

        output_details_tensor_index = self.output_details[0]['index']
        result = self.interpreter.get_tensor(output_details_tensor_index)
        result_index = np.argmax(np.squeeze(result))
        return result_index

Drawing and Preprocessing tools

In [34]:
def draw_rect(image, landmarks, h, w):
    landmark_array = np.empty((0, 2), int)

    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * w), w - 1)
        landmark_y = min(int(landmark.y * h), h - 1)

        landmark_point = [np.array((landmark_x, landmark_y))]

        landmark_array = np.append(landmark_array, landmark_point, axis=0)

    x, y, w, h = cv.boundingRect(landmark_array)

    return [x, y, x + w, y + h]


def landmarks(image, landmarks, h, w):
    landmark_point = []

    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * w), w - 1)
        landmark_y = min(int(landmark.y * h), h - 1)

        landmark_point.append([landmark_x, landmark_y])

    return landmark_point


def preprocessing(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]

        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y

    temp_landmark_list = list(
        itertools.chain.from_iterable(temp_landmark_list))

    max_value = max(list(map(abs, temp_landmark_list)))


    temp_landmark_list = list(map(lambda x:x/max_value, temp_landmark_list))

    return temp_landmark_list

def draw_info_text(image, brect, handedness, hand_sign_text):
    cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[1] - 22),
                 (0, 0, 0), -1)

    info_text = handedness.classification[0].label[0:]
    if hand_sign_text != "":
        info_text = info_text + ':' + hand_sign_text
    cv.putText(image, info_text, (brect[0] + 5, brect[1] - 4),
               cv.FONT_HERSHEY_SIMPLEX, 0.6, (100, 220, 50), 1, cv.LINE_AA)


    return image


Describe Cam Enviornment

In [35]:
cap_device = 0
cap_width = 640
cap_height = 320

use_static_image_mode = True
min_detection_confidence = 0.7
min_tracking_confidence = 0.5

use_brect = True

cap=cv.VideoCapture(0,cv.CAP_DSHOW)
cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)
cap.set(cv.CAP_PROP_FRAME_HEIGHT,cap_height)
cap.set(cv.CAP_PROP_FPS, 30)
cap.set(cv.CAP_PROP_FOURCC,cv.VideoWriter_fourcc(*'MJPG'))


True

Classification and Game Playing

In [36]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=use_static_image_mode,
    max_num_hands=2,
    min_detection_confidence=min_detection_confidence,
    min_tracking_confidence=min_tracking_confidence,
)

gesture_classifier = gestureClassifier()

with open('model_helper/gameActions.csv', encoding='utf-8-sig') as f:
    gesture_classifier_labels = csv.reader(f)
    gesture_classifier_labels = [row[0] for row in gesture_classifier_labels]

while True:

    key = cv.waitKey(10)
    if key == 27:
        break
    ret, image = cap.read()
    if not ret:
        break
    image = cv.flip(image, 1)
    debug_image = copy.deepcopy(image)

    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)

    image.flags.writeable = False
    results = hands.process(image)
    image.flags.writeable = True

    if results.multi_hand_landmarks is not None:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
            height, width =  debug_image.shape[0], debug_image.shape[1]
            brect = draw_rect(debug_image, hand_landmarks, height , width)
            landmark_list = landmarks(debug_image, hand_landmarks, height, width)
            preprocessed_data = preprocessing(
                landmark_list)

            hand_sign_id = gesture_classifier(preprocessed_data)

            if str(hand_sign_id) == '0':
                pyautogui.press('g')
            if str(hand_sign_id) == '1':
                pyautogui.press('a')
            if str(hand_sign_id) == '2':
                pyautogui.press('d')
            if str(hand_sign_id) == '3':
                pyautogui.press('w')
            if str(hand_sign_id) == '4':
                pyautogui.press('s')
            if str(hand_sign_id) == '5':
                pyautogui.press('f')

            debug_image = draw_info_text(debug_image, brect, handedness, gesture_classifier_labels[hand_sign_id])

    cv.imshow('Hand controller', debug_image)

cap.release()
cv.destroyAllWindows()