In [9]:
# Criando o Dataset

import os
import pickle
import mediapipe as mp
import cv2
import matplotlib.pyplot as plt

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_syles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode = True, min_detection_confidence=0.3)


DATA_DIR = './data'

data = []
labels =[]

for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []
        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks: # todas as informações para o nosso modelo estarão nas landmarks e suas respectivas posições
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)): # a ideia aqui é criar arrays com as coordenadas de cada ponto dos landmarks para x e y
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x)
                    data_aux.append(y)
            
            data.append(data_aux)
            labels.append(dir_)

# Agora, vamos salvar todos esses dados:
f = open('data.pickle', 'wb') # wb: writing as bytes
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

In [1]:
import pickle
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data_dict = pickle.load(open('./data.pickle', 'rb'))

data = data_dict['data']
labels = data_dict['labels']

# Encontrando o máximo comprimento entre todos os elementos
max_length = max([len(item) for item in data])

# Pré-processando os dados para adquir comprimentos consistentes em todo o dataset
processed_data = []
for item in data:
    if len(item) != max_length:
        # Ajustar o item para corresponder ao comprimento máximo
        if len(item) < max_length:
            item = item + [0] * (max_length - len(item))
        else:
            item = item[:max_length]
    processed_data.append(item)

x_train, x_test, y_train, y_test = train_test_split(processed_data, labels, test_size=0.2, shuffle=True, stratify=labels)

model = RandomForestClassifier()
model.fit(x_train, y_train)

y_predict = model.predict(x_test)
score = accuracy_score(y_test, y_predict)

print('{}% da amostra foi classificada corretamente!'.format(score * 100))

with open('model.p', 'wb') as f:
    pickle.dump({'model': model}, f)


98.48111332007953% of samples were classified correctly!


In [15]:
# inference_classifier.py

import pickle
import cv2
import mediapipe as mp
import numpy as np

model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

cap = cv2.VideoCapture(0)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

labels_dict = {
    0: 'A',
    1: 'B',
    2: 'C',
    3: 'D',
    4: 'E',
    5: 'F',
    6: 'G',
    7: 'H',
    8: 'I',
    9: 'J',
    10: 'K',
    11: 'L',
    12: 'M',
    13: 'N',
    14: 'O',
    15: 'P',
    16: 'Q',
    17: 'R',
    18: 'S',
    19: 'T',
    20: 'U',
    21: 'V',
    22: 'W',
    23: 'X',
    24: 'Y',
    25: 'Z'
}

while True:
    ret, frame = cap.read()

    H, W, _ = frame.shape

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

        for hand_landmarks in results.multi_hand_landmarks:
            data_aux = []
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x)
                data_aux.append(y)

            # Ensure the data has the correct shape and features
            processed_data_aux = np.asarray(data_aux).reshape(1, -1)

            # Check if number of features matches the model's expected shape
            if processed_data_aux.shape[1] != 84:
                # Append zeros to match the expected shape
                processed_data_aux = np.pad(processed_data_aux, [(0, 0), (0, 84 - processed_data_aux.shape[1])])

            prediction = model.predict(processed_data_aux)
            predicted_character = labels_dict[int(prediction[0])]

            x1 = int(min(data_aux[::2]) * W) - 10
            y1 = int(min(data_aux[1::2]) * H) - 10
            x2 = int(max(data_aux[::2]) * W) - 10
            y2 = int(max(data_aux[1::2]) * H) - 10

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
            cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                        cv2.LINE_AA)

    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
