Importing Dependencies

In [None]:
import cv2
import time
import mediapipe as mp
import numpy as np
import matplotlib.pyplot as plt
import os

import pandas as pd
import torch
import torch.nn as nn
import tensorflow as tf
from tensorflow import keras

from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from keras.utils import to_categorical

from tqdm import tqdm
from keras import regularizers

Defining necessary functions

In [None]:
#Handland takes the mediapipe output landmarks, collates all the coordinates and flattens it to create an array of size 63

def handland(results):
    hl = np.array([[landmark.x,landmark.y,landmark.z] for landmark in results.multi_hand_landmarks[0].landmark]).flatten() if results.multi_hand_landmarks else np.zeros(63)
    return hl

def handscore(results):
    sc = results.multi_handedness[0].classification[0].score
    return sc

#Choose is basically the combined model. We have three different custom models and the combination of the three seems to be working better.

def choose(a,b,c):
    if a != '':
        a = ord(a)
        b = ord(b)
        c = ord(c)
        if (b-c) == 0:
            return chr(b)
        else:
            return chr(a)
    else:
        return ''


#Augmentation Function

from PIL import Image as PILImage
def augment_image(image):
    # Read the original image
    original_image = image

    # Convert image from BGR to RGB
    original_image_rgb = cv2.cvtColor(original_image, cv2.COLOR_RGB2BGR)

    # Initialize the list to store augmented images
    augmented_images = [original_image_rgb]

    # Flip the image horizontally
    # flipped_image = cv2.flip(original_image_rgb, 1)
    # augmented_images.append(flipped_image)

    # Rotate the image by custom angles
    rotation_angles = np.arange(-15,16,10)
    for angle in rotation_angles:
        rotated_image = PILImage.fromarray(original_image_rgb)
        rotated_image = rotated_image.rotate(angle)
        rotated_image = np.array(rotated_image)
        augmented_images.append(rotated_image)

    # GaussianBlur the image

    blurred_image = cv2.GaussianBlur(original_image_rgb, (5 , 5), 0)
    augmented_images.append(blurred_image)

    return augmented_images

Mediapipe Functions

In [None]:
# Grabbing the Holistic Model from Mediapipe and
# Initializing the Model
mp_holistic = mp.solutions.holistic
holistic_model = mp_holistic.Holistic(
	min_detection_confidence=0.1,
	min_tracking_confidence=0.1
)

# Initializing the drawing utils for drawing the facial landmarks on image
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
mp_drawing_styles = mp.solutions.drawing_styles

Reading images and labels from the Folder

In [None]:
#Replace the folder dir with r"<your path>\train"

import os
from os import listdir
Image = []
Label = []
folder_dir = r"C:\Users\Manishit\Documents\Academia\Projects\Sign_Language_Recognition\American_Sign_Language_Letters_Yolo\train"
count = 0
for k in tqdm(os.listdir(folder_dir), desc='Processing images', unit='items'):
    count += 1
    if k.endswith('.jpg'):
        i = cv2.imread(os.path.join(folder_dir,k))
        i = cv2.flip(i,1)
        Image.append(i)
        Label.append(k[0])

Expanding the dataset by augmentation

In [None]:
SI = []
SL = []
for num,img in tqdm(enumerate(Image)):
    Au = augment_image(img)
    SI.extend(Au)
    SL.extend([Label[num]]*len(Au))
Image = SI

Extracting mediapipe landmarks for each data point

In [None]:
H = []
label = []
with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=2,
    min_detection_confidence=0.2) as hands:
  for num,image in tqdm(enumerate(Image),desc = 'Creating Features',unit = 'Items'):
    # Read an image, flip it around y-axis for correct handedness output (see
    # above).
    if not((image[:,:,0] == image[:,:,1]).all()*(image[:,:,1] == image[:,:,2]).all()):
        #Label.append(SL[num])
        label.append(Label[num])
        image = cv2.flip(image, 1)
        # Convert the BGR image to RGB before processing.
        results = hands.process(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))
        if results.multi_handedness:
            if results.multi_handedness[0].classification[0].label == 'Left':
                results = hands.process(cv2.flip(cv2.cvtColor(image,cv2.COLOR_BGR2RGB),1))

        H.append(handland(results))

H_new = [i for i in H if np.sum(i**2) != 0]
Label_new = [label[i] for i in range(len(label)) if np.sum(H[i]**2) != 0]
len(Label_new)

Saving the processed dataset to put into FFNN

In [None]:
np.savetxt('Train_X_aug.txt',H_new)
np.savetxt('Train_Y_aug.txt',Label_new,fmt = "%s")