In [1]:
import os

import cv2
import string
import pickle
import mediapipe as mp
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
DATA_DIR = './data'
dataset_size = 500  

choice = input("Do you want to collect data for (A) All letters or (B) Specific letters? (Enter A/B): ").strip().upper()

if choice == 'A':
    letters_to_collect = list(string.ascii_uppercase) 
elif choice == 'B':
    letters_to_collect = input("Enter the letters you want to collect (comma-separated): ").strip().upper().split(',')
    letters_to_collect = [letter.strip() for letter in letters_to_collect if letter.strip()]
else:
    print("Invalid choice. Exiting...")
    exit()

for letter in letters_to_collect:
    letter = letter.strip().upper()
    letter_path = os.path.join(DATA_DIR, letter)
    if not os.path.exists(letter_path):
        os.makedirs(letter_path)

cap = cv2.VideoCapture(0)

for letter in letters_to_collect:
    print(f'Collecting data for letter {letter}')
    letter_path = os.path.join(DATA_DIR, letter)

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Unable to access the camera.")
            break

        cv2.putText(frame, f'Ready to collect {letter}? Press "Q"!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.imshow('frame', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    counter = 0
    print(f"Move your hand in different angles, orientations, and lighting conditions for {letter}...")
    while counter < dataset_size:
        ret, frame = cap.read()
        if not ret:
            print("Error: Unable to access the camera.")
            break

        cv2.imshow('frame', frame)

        img_path = os.path.join(letter_path, f'{counter}.jpg')
        cv2.imwrite(img_path, frame)

        if counter % 50 == 0 and counter > 0:
            print(f"{counter} images captured. Keep moving your hand to different positions!")

        counter += 1

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [None]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './data'
labels = sorted(os.listdir(DATA_DIR)) 
labels_dict = {idx: label for idx, label in enumerate(labels)}

data = []
labels = []
for dir_ in os.listdir(DATA_DIR):
    if os.path.isdir(os.path.join(DATA_DIR, dir_)):
        for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
            data_aux = []

            x_ = []
            y_ = []

            img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            results = hands.process(img_rgb)
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    for i in range(len(hand_landmarks.landmark)):
                        x = hand_landmarks.landmark[i].x
                        y = hand_landmarks.landmark[i].y

                        x_.append(x)
                        y_.append(y)

                    for i in range(len(hand_landmarks.landmark)):
                        x = hand_landmarks.landmark[i].x
                        y = hand_landmarks.landmark[i].y
                        data_aux.append(x - min(x_))
                        data_aux.append(y - min(y_))

                data.append(data_aux)
                labels.append(dir_)
                
f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

In [None]:
data_dict = pickle.load(open('./data.pickle', 'rb'))

data = np.array(data_dict['data'], dtype=object)
labels = np.asarray(data_dict['labels'])

max_length = max(len(sublist) for sublist in data)
data = np.array([sublist + [0.0] * (max_length - len(sublist)) if len(sublist) < max_length else sublist for sublist in data], dtype=np.float32)

x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

model = RandomForestClassifier()
model.fit(x_train, y_train)

y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)

print('{}% of samples were classified correctly !'.format(score * 100))

f = open('model.p', 'wb')
pickle.dump({'model': model}, f)
f.close()

In [5]:
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

cap = cv2.VideoCapture(0)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

alphabet = list(string.ascii_uppercase) 
labels_dict = {idx: letter for idx, letter in enumerate(alphabet)}

while True:
    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()
    if not ret:
        print("Error: Unable to capture video frame.")
        break

    H, W, _ = frame.shape

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  
                hand_landmarks, 
                mp_hands.HAND_CONNECTIONS,  
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style()
            )

            for lm in hand_landmarks.landmark:
                x_.append(lm.x)
                y_.append(lm.y)

            for lm in hand_landmarks.landmark:
                data_aux.append(lm.x - min(x_))
                data_aux.append(lm.y - min(y_))

        expected_features = model.n_features_in_
        if len(data_aux) < expected_features: 
            data_aux.extend([0] * (expected_features - len(data_aux)))
        elif len(data_aux) > expected_features: 
            data_aux = data_aux[:expected_features]

        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10
        x2 = int(max(x_) * W) - 10
        y2 = int(max(y_) * H) - 10

        prediction = model.predict([np.asarray(data_aux)])
        predicted_character = labels_dict.get(prediction[0], 'Unknown')

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
        cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)


    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    
cap.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

: 