In [3]:
%pip install mediapipe opencv-python scikit-learn numpy matplotlib

Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import cv2
import time

# Define the mapping of class indices to ASL letters
labels_dict = {
    0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'K',
    10: 'L', 11: 'M', 12: 'N', 13: 'O', 14: 'P', 15: 'Q', 16: 'R', 17: 'S', 18: 'T', 
    19: 'U', 20: 'V', 21: 'W', 22: 'X', 23: 'Y'
}

DATA_DIR = './data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

number_of_classes = 24
dataset_size = 100

cap = cv2.VideoCapture(1)  # Change to 0 if you have only the default camera

for j in range(number_of_classes):
    class_dir = os.path.join(DATA_DIR, str(j))
    if not os.path.exists(class_dir):
        os.makedirs(class_dir)

    print(f'Collecting data for class {j} ({labels_dict[j]})')
    
    # Wait for 5 seconds before starting the data collection for each class
    print("Get ready...")
    time.sleep(5)

    # Capture images for each class
    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()
        if not ret:
            break
        cv2.imshow('frame', frame)
        cv2.imwrite(os.path.join(class_dir, f'{counter}.jpg'), frame)
        counter += 1
        if cv2.waitKey(25) == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


Collecting data for class 0 (A)
Get ready...
Collecting data for class 1 (B)
Get ready...
Collecting data for class 2 (C)
Get ready...


KeyboardInterrupt: 

In [5]:
import pickle
import mediapipe as mp
import cv2

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

data = []
labels = []

DATA_DIR = './data'

for dir_ in os.listdir(DATA_DIR):
    class_dir = os.path.join(DATA_DIR, dir_)
    if not os.path.isdir(class_dir):
        continue

    for img_path in os.listdir(class_dir):
        data_aux = []
        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(class_dir, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Detect hand landmarks
        with mp_hands.Hands() as hands:
            result = hands.process(img_rgb)
            if result.multi_hand_landmarks:
                for hand_landmarks in result.multi_hand_landmarks:
                    for i in range(len(hand_landmarks.landmark)):
                        x = hand_landmarks.landmark[i].x
                        y = hand_landmarks.landmark[i].y
                        x_.append(x)
                        y_.append(y)

                    for i in range(len(hand_landmarks.landmark)):
                        x = hand_landmarks.landmark[i].x
                        y = hand_landmarks.landmark[i].y
                        data_aux.append(x - min(x_))
                        data_aux.append(y - min(y_))

                data.append(data_aux)
                labels.append(int(dir_))

# Save data
with open('data.pickle', 'wb') as f:
    pickle.dump({'data': data, 'labels': labels}, f)


I0000 00:00:1730756386.257469  874341 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1 Pro
W0000 00:00:1730756386.266271  911730 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730756386.272735  911730 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1730756386.315173  874341 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1 Pro
W0000 00:00:1730756386.321479  911741 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730756386.327358  911741 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1730756386.356084  874341 gl

In [6]:
import pickle
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load data
with open('data.pickle', 'rb') as f:
    data_dict = pickle.load(f)

# Filter data to ensure all elements are of the same length
data = [entry for entry in data_dict['data'] if len(entry) == len(data_dict['data'][0])]
labels = [label for entry, label in zip(data_dict['data'], data_dict['labels']) if len(entry) == len(data_dict['data'][0])]

# Convert to numpy arrays
data = np.array(data)
labels = np.array(labels)

# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

# Train the Random Forest model
model = RandomForestClassifier()
model.fit(x_train, y_train)

# Save the trained model
with open('model.p', 'wb') as f:
    pickle.dump({'model': model}, f)

# Evaluate model on the test set
y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy * 100:.2f}%")


Model accuracy: 99.58%


In [2]:
import pickle
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load data
data_dict = pickle.load(open('data.pickle', 'rb'))
data = np.array([entry for entry in data_dict['data'] if len(entry) == len(data_dict['data'][0])])
labels = np.array([label for entry, label in zip(data_dict['data'], data_dict['labels']) if len(entry) == len(data_dict['data'][0])])

# Initialize lists to store training and testing data
x_train, x_test, y_train, y_test = [], [], [], []

# For each class, shuffle and select 80 samples for training and 20 for testing
unique_labels = np.unique(labels)
for label in unique_labels:
    # Find all samples of the current class
    class_indices = np.where(labels == label)[0]
    class_data = data[class_indices]
    class_labels = labels[class_indices]
    
    # Shuffle samples within the class
    shuffled_indices = np.random.permutation(len(class_data))
    class_data = class_data[shuffled_indices]
    class_labels = class_labels[shuffled_indices]
    
    # Select 80 samples for training and 20 for testing
    x_train.extend(class_data[:80])
    y_train.extend(class_labels[:80])
    x_test.extend(class_data[80:100])
    y_test.extend(class_labels[80:100])

# Convert lists to numpy arrays
x_train, x_test = np.array(x_train), np.array(x_test)
y_train, y_test = np.array(y_train), np.array(y_test)

# Train the Random Forest model
model = RandomForestClassifier()
model.fit(x_train, y_train)

# Save the model
with open('model.p', 'wb') as f:
    pickle.dump({'model': model}, f)

# Evaluate model
y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy * 100:.2f}%")


Model accuracy: 99.33%


In [3]:
import cv2
import pickle
import numpy as np
import mediapipe as mp

# Load the trained Random Forest model
with open('model.p', 'rb') as f:
    model_dict = pickle.load(f)
model = model_dict['model']

# Define labels for ASL alphabet (excluding 'J' and 'Z')
labels_dict = {
    0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'K',
    10: 'L', 11: 'M', 12: 'N', 13: 'O', 14: 'P', 15: 'Q', 16: 'R', 17: 'S', 18: 'T',
    19: 'U', 20: 'V', 21: 'W', 22: 'X', 23: 'Y'
}

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Open webcam
cap = cv2.VideoCapture(1)

# Real-time ASL Recognition
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame to RGB for MediaPipe processing
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Use MediaPipe Hands
    with mp_hands.Hands() as hands:
        result = hands.process(img_rgb)

        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                data_aux = []
                x_, y_ = [], []
                
                # Gather landmark data
                for lm in hand_landmarks.landmark:
                    x_.append(lm.x)
                    y_.append(lm.y)

                for lm in hand_landmarks.landmark:
                    data_aux.append(lm.x - min(x_))
                    data_aux.append(lm.y - min(y_))

                # Predict the ASL letter using the model
                prediction = model.predict([np.asarray(data_aux)])
                predicted_character = labels_dict[int(prediction[0])]

                # Draw landmarks and bounding box
                x_min, y_min, x_max, y_max = int(min(x_) * frame.shape[1]), int(min(y_) * frame.shape[0]), int(max(x_) * frame.shape[1]), int(max(y_) * frame.shape[0])
                cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                # Display the prediction on the frame
                cv2.putText(frame, predicted_character, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show the frame
    cv2.imshow("ASL Recognition with Random Forest", frame)

    # Break loop with 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


I0000 00:00:1730756018.677157  874341 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1730756018.685511  874632 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730756018.691834  874629 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1730756018.826173  874341 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1 Pro
W0000 00:00:1730756018.839720  874680 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730756018.846580  874680 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for fe