***Installations***

In [3]:
!pip install opencv-python
!pip install mediapipe
!pip install scikit-learn



***Capture The Images***

In [3]:
import os
import cv2

DATA_DIR = './data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

number_of_classes = 4
dataset_size = 100

cap = cv2.VideoCapture(0)  
if not cap.isOpened():
    print("Error: Could not open video device.")
else:
    for j in range(number_of_classes):
        class_dir = os.path.join(DATA_DIR, str(j))
        if not os.path.exists(class_dir):
            os.makedirs(class_dir)

        print(f'Collecting data for class {j}')

        while True:
            ret, frame = cap.read()
            if not ret:
                print("Error: Could not read frame from camera.")
                break

            cv2.putText(frame, 'Ready? Press "Q" to start or "ESC" to stop.', (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2, cv2.LINE_AA)
            cv2.imshow('frame', frame)

            key = cv2.waitKey(25)
            if key == ord('q'):  
                break
            elif key == 27:  
                print("Stopping data collection.")
                cap.release()
                cv2.destroyAllWindows()
                exit()

        counter = 0
        while counter < dataset_size:
            ret, frame = cap.read()
            if not ret:
                print("Error: Could not read frame from camera.")
                break

            cv2.imshow('frame', frame)
            
            key = cv2.waitKey(25)
            if key == 27: 
                print("Stopping data collection.")
                cap.release()
                cv2.destroyAllWindows()
                exit()

            cv2.imwrite(os.path.join(class_dir, f'{counter}.jpg'), frame)
            counter += 1

    cap.release()
    cv2.destroyAllWindows()


Collecting data for class 0
Collecting data for class 1
Collecting data for class 2
Collecting data for class 3
Collecting data for class 4
Collecting data for class 5
Stopping data collection.
Error: Could not read frame from camera.
Collecting data for class 6
Error: Could not read frame from camera.
Error: Could not read frame from camera.
Collecting data for class 7
Error: Could not read frame from camera.
Error: Could not read frame from camera.
Collecting data for class 8
Error: Could not read frame from camera.
Error: Could not read frame from camera.
Collecting data for class 9
Error: Could not read frame from camera.
Error: Could not read frame from camera.
Collecting data for class 10
Error: Could not read frame from camera.
Error: Could not read frame from camera.
Collecting data for class 11
Error: Could not read frame from camera.
Error: Could not read frame from camera.
Collecting data for class 12
Error: Could not read frame from camera.
Error: Could not read frame from 

***Make the Pickle File***

In [5]:
import os
import pickle

import mediapipe as mp
import cv2
import matplotlib.pyplot as plt


mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './data'

data = []
labels = []
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []

        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)

f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

***Train The Model***

In [7]:
import pickle
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data_dict = pickle.load(open('./data.pickle', 'rb'))

data = data_dict['data']
labels = data_dict['labels']

max_length = max([len(entry) for entry in data])
print(f"Max length of data entries: {max_length}")

fixed_data = []
for entry in data:
    if len(entry) < max_length:
        fixed_data.append(entry + [0] * (max_length - len(entry)))
    elif len(entry) > max_length:
        fixed_data.append(entry[:max_length])
    else:
        fixed_data.append(entry)

data = np.asarray(fixed_data)
labels = np.asarray(labels)

x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

model = RandomForestClassifier()
model.fit(x_train, y_train)

y_predict = model.predict(x_test)

score = accuracy_score(y_predict, y_test)
print('{}% of samples were classified correctly!'.format(score * 100))

with open('model.p', 'wb') as f:
    pickle.dump({'model': model}, f)

f.close()

Max length of data entries: 84
100.0% of samples were classified correctly!


***Test The Model***

In [1]:
import pickle
import cv2
import mediapipe as mp
import numpy as np

model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

labels_dict = {0: 'A', 1: 'B', 2: 'C', 3: 'D'}

while True:
    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()

    if not ret:
        print("Error: Unable to capture video frame.")
        break

    H, W, _ = frame.shape

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  
                hand_landmarks,  
                mp_hands.HAND_CONNECTIONS,  
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

            # Collect the x and y coordinates of each landmark
            x_ = [landmark.x for landmark in hand_landmarks.landmark]
            y_ = [landmark.y for landmark in hand_landmarks.landmark]

            # Compute the features (normalized x and y differences)
            for landmark in hand_landmarks.landmark:
                data_aux.append(landmark.x - min(x_))
                data_aux.append(landmark.y - min(y_))

        # Ensure we have exactly 84 features (42 x and 42 y coordinates)
        if len(data_aux) == 84:
            print(f"Features: {data_aux}")  # Print the extracted features

            # Predict the hand gesture
            prediction = model.predict([np.asarray(data_aux)])
            predicted_character = labels_dict[int(prediction[0])]
            print(f"Predicted Character: {predicted_character}")  # Print the predicted character

            # Calculate bounding box to draw around the hand
            x1 = int(min(x_) * W) - 10
            y1 = int(min(y_) * H) - 10
            x2 = int(max(x_) * W) - 10
            y2 = int(max(y_) * H) - 10

            # Draw a rectangle around the hand
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)

            # Add the predicted character above the bounding box
            cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)
        else:
            print("Insufficient features for prediction.")

    # Display the frame with the hand landmarks and prediction
    cv2.imshow('frame', frame)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and destroy all windows
cap.release()
cv2.destroyAllWindows()




Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient features for prediction.
Insufficient