# SIGN LANGUAGE TRANSLATION

### DATA COLLECTION:
BY GANESH DEVADIGA


DATA COLLECTION IS DONE USING CAMERA USING OPENCV MODULE.

In [1]:
import os

import cv2


DATA_DIR = './data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

number_of_classes = 3
dataset_size = 100

cap = cv2.VideoCapture(0)
for j in range(number_of_classes):
    if not os.path.exists(os.path.join(DATA_DIR, str(j))):
        os.makedirs(os.path.join(DATA_DIR, str(j)))

    print('Collecting data for class {}'.format(j))

    done = False
    while True:
        ret, frame = cap.read()
        cv2.putText(frame, 'Press q to collect data', (50, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3,
                    cv2.LINE_AA)
        cv2.imshow('frame', frame)
        if cv2.waitKey(100) == ord('q'):
            break

    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()
        cv2.imshow('frame', frame)
        cv2.waitKey(100)
        cv2.imwrite(os.path.join(DATA_DIR, str(j), '{}.jpg'.format(counter)), frame)

        counter += 1

cap.release()
cv2.destroyAllWindows()

Collecting data for class 0


-

### DATA PREPROCESSING AND DATASET CREATION:

* ABOVE WE ARE COLLECTING FULL SIZED IMAGES OF THE SUBJECT AND FOR OUR MODEL WE ARE ONLY INTRESTED IN THE ONLY POSITION OF THE FINGER
* WE WILL EXTRACT THE POSTION OF THE FIGER USING THE MEDIAPIPE
* MEDIAPIPE WILL CLASSIFY THOSE IMAGES BASED ON THE LANDMARKS
* AND WE STORE THOSE LANDMARKS IN PICKLE FORMAT

In [1]:
import os
import pickle

import mediapipe as mp
import cv2
import matplotlib.pyplot as plt


mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './data'

data = []
labels = []
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []

        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)

f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

-

### TRAINING THE MODEL WITH OBTAINED DATASET:

* WE TRAIN OUR MODEL WITH THE DATASET OBTAINED AFTER PROCESSING COLLECTED DATA
* WE USE SCIKIT-LEARN TO TRAIN OUR MODEL
* WE USE RANDOM FOREST CLASSIFIER TO TRAIN OUR MODEL
* WE USE 80% DATASET FOR TRAINING AND 20% DATA FOR TESTING PURPOSE
* THE ACCURACY OF THE MODEL IS DISPLAYED AT THE END


In [2]:
import pickle

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np


data_dict = pickle.load(open('./data.pickle', 'rb'))

data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

model = RandomForestClassifier()

model.fit(x_train, y_train)

y_predict = model.predict(x_test)

score = accuracy_score(y_predict, y_test)

print('{}% of samples were classified correctly !'.format(score * 100))

f = open('model.p', 'wb')
pickle.dump({'model': model}, f)
f.close()

100.0% of samples were classified correctly !


-


### LIVE DETECTION USING OUR MODEL

* WE USE USE OPENCV TO GET THE CAMERA INPUT
* WE PASS THE IMAGE DATA TO MEDIAPIPE
* MEDIAPIPE EXTRACTS THE LANDMARKS OF THE HAND
* THE MEDIAPIPE DATA IS PASSED INTO OUR MODEL
* THE MODEL DETECTS THE SIGN AND ITS MEANING AND DISPLAYS ON THE SCREEN


In [3]:
import pickle

import cv2
import mediapipe as mp
import numpy as np

model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

cap = cv2.VideoCapture(0)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

labels_dict = {0: 'O', 1: 'L', 2: '3'}
while True:

    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()

    H, W , _= frame.shape

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

        for hand_landmarks in results.multi_hand_landmarks:
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))

        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10

        x2 = int(max(x_) * W) - 10
        y2 = int(max(y_) * H) - 10

        prediction = model.predict([np.asarray(data_aux)])

        predicted_character = labels_dict[int(prediction[0])]

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
        cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                    cv2.LINE_AA)

    cv2.imshow('frame', frame)
    print(frame)
    cv2.waitKey(1)


cap.release()
cv2.destroyAllWindows()

[[[ 35  13   0]
  [ 35   8   0]
  [ 35   3   0]
  ...
  [132 108  46]
  [131 109  48]
  [131 109  48]]

 [[ 35  10   0]
  [ 36   7   0]
  [ 38   4   0]
  ...
  [137 111  47]
  [137 114  52]
  [136 113  51]]

 [[ 38   7   0]
  [ 41   7   0]
  [ 43   7   0]
  ...
  [144 113  47]
  [142 116  53]
  [142 116  53]]

 ...

 [[ 64   8  12]
  [ 61   5  10]
  [ 60   5  10]
  ...
  [ 66  38  41]
  [ 66  38  41]
  [ 66  38  41]]

 [[ 71   8  10]
  [ 67   7  10]
  [ 62   5  10]
  ...
  [ 70  42  44]
  [ 67  39  42]
  [ 67  39  42]]

 [[ 74   8   9]
  [ 69   6  10]
  [ 60   2   7]
  ...
  [ 71  43  45]
  [ 71  43  45]
  [ 70  42  44]]]
[[[ 53   8   0]
  [ 54   9   0]
  [ 52   7   0]
  ...
  [125  92  30]
  [129  92  25]
  [128  91  24]]

 [[ 51   7   0]
  [ 51   7   0]
  [ 51   7   0]
  ...
  [126  92  30]
  [130  92  24]
  [130  92  24]]

 [[ 48   7   0]
  [ 49   7   0]
  [ 50   8   0]
  ...
  [126  90  28]
  [131  91  21]
  [135  96  26]]

 ...

 [[ 83  13  15]
  [ 80  14  15]
  [ 74  11  11]
  ..

KeyboardInterrupt: 