In [22]:
import os
import cv2
import pickle

import numpy as np
import mediapipe as mp

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [19]:
directory = '../data/landry/'
if not os.path.exists(directory):
    os.makedirs(directory)

number_of_classes = 24
dataset_size = 250

cap = cv2.VideoCapture(0)
for i in range(number_of_classes):
    if not os.path.exists(os.path.join(directory, str(i))):
        os.makedirs(os.path.join(directory, str(i)))

    print('Collecting data for class {}'.format(i))

    done = False
    while True:
        ret, frame = cap.read()
        cv2.putText(frame, 'Press Q to Start', (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                    cv2.LINE_AA)
        cv2.imshow('frame', frame)
        if cv2.waitKey(25) == ord('q'):
            break

    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()
        cv2.imshow('frame', frame)
        cv2.waitKey(25)
        cv2.imwrite(os.path.join(directory, str(i), '{}.jpg'.format(counter)), frame)
        counter += 1

cap.release()
cv2.destroyAllWindows()

Collecting data for class 0
Collecting data for class 1
Collecting data for class 2
Collecting data for class 3
Collecting data for class 4
Collecting data for class 5
Collecting data for class 6
Collecting data for class 7
Collecting data for class 8
Collecting data for class 9
Collecting data for class 10
Collecting data for class 11
Collecting data for class 12
Collecting data for class 13
Collecting data for class 14
Collecting data for class 15
Collecting data for class 16
Collecting data for class 17
Collecting data for class 18
Collecting data for class 19
Collecting data for class 20
Collecting data for class 21
Collecting data for class 22
Collecting data for class 23


In [46]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3, max_num_hands=1)

directory = '../data/landry/'

data = []
labels = []
for dir_ in os.listdir(directory):
    for img_path in os.listdir(os.path.join(directory, dir_)):
        data_aux = []

        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(directory, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]  # Consider only the first hand detected

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)

max_length = max(len(sublist) for sublist in data)

data_padded = [sublist + [0] * (max_length - len(sublist)) for sublist in data]

In [47]:
with open('../data/landmark_data.pkl', 'wb') as file:
    pickle.dump({'data': data_padded, 'labels': labels}, file)

In [48]:
data_dict = pickle.load(open('../data/landmark_data.pkl', 'rb'))

data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

In [49]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

In [50]:
model = RandomForestClassifier()

model.fit(x_train, y_train)

In [51]:
y_pred = model.predict(x_test)

score = accuracy_score(y_pred, y_test)

print('{}% of samples were classified correctly !'.format(score * 100))

100.0% of samples were classified correctly !


In [52]:
file = open('../data/model.pkl', 'wb')
pickle.dump({'model': model}, file)
file.close()

In [53]:
model_dict = pickle.load(open('../data/model.pkl', 'rb'))

model = model_dict['model']

In [45]:
cap = cv2.VideoCapture(0)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

labels_dict = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'K',
               10: 'L', 11: 'M', 12: 'N', 13: 'O', 14: 'P', 15: 'Q', 16: 'R', 17: 'S', 18: 'T', 19: 'U',
               20: 'V', 21: 'W', 22: 'X', 23: 'Y'}

while True:

    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()

    H, W, _ = frame.shape

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

        for hand_landmarks in results.multi_hand_landmarks:
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))

        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10

        x2 = int(max(x_) * W) - 10
        y2 = int(max(y_) * H) - 10

        if (len(data_aux) != 84):
            prediction = model.predict([np.asarray(data_aux)])

            predicted_character = labels_dict[int(prediction[0])]

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
            cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                        cv2.LINE_AA)

    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()