In [1]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import os

In [2]:
# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [3]:
# Function to extract hand landmarks
def extract_hand_landmarks(image, holistic):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = holistic.process(image_rgb)
    if results.left_hand_landmarks:
        left_hand_landmarks = [(lm.x, lm.y, lm.z) for lm in results.left_hand_landmarks.landmark]
    else:
        left_hand_landmarks = [(0, 0, 0)] * 21
    if results.right_hand_landmarks:
        right_hand_landmarks = [(lm.x, lm.y, lm.z) for lm in results.right_hand_landmarks.landmark]
    else:
        right_hand_landmarks = [(0, 0, 0)] * 21
    return left_hand_landmarks + right_hand_landmarks

In [4]:
# Capture landmarks for training data
def capture_landmarks(sign_name, num_samples):
    cap = cv2.VideoCapture(0)
    holistic = mp_holistic.Holistic()
    data = []
    
    for _ in range(num_samples):
        ret, frame = cap.read()
        if not ret:
            break
        landmarks = extract_hand_landmarks(frame, holistic)
        data.append((landmarks, sign_name))
        
        mp_drawing.draw_landmarks(frame, holistic.process(frame).left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(frame, holistic.process(frame).right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
        cv2.imshow('Frame', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()
    return data

In [5]:
# Capture data for each sign
signs = ['I love you', 'Thank you', 'Hello']
num_samples_per_sign = 100

all_data = []
for sign in signs:
    print(f"Capturing data for {sign}")
    sign_data = capture_landmarks(sign, num_samples_per_sign)
    all_data.extend(sign_data)

Capturing data for I love you
Capturing data for Thank you
Capturing data for Hello


In [6]:
# Prepare the data
landmarks, labels = zip(*all_data)
landmarks = np.array(landmarks)
labels = np.array(labels)

In [7]:
# Encode labels
label_map = {label: idx for idx, label in enumerate(signs)}
labels_encoded = np.array([label_map[label] for label in labels])

In [8]:
# Reshape landmarks array to be 2D
num_samples, num_landmarks, num_coordinates = landmarks.shape
landmarks_reshaped = landmarks.reshape(num_samples, num_landmarks * num_coordinates)

# Train a classification model
X_train, X_test, y_train, y_test = train_test_split(landmarks_reshaped, labels_encoded, test_size=0.2, random_state=42)
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)

# Evaluate the model
y_pred = clf.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")

Accuracy: 91.67%


In [9]:
# Function to predict sign in real-time
def predict_sign():
    cap = cv2.VideoCapture(0)
    holistic = mp_holistic.Holistic()
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        landmarks = extract_hand_landmarks(frame, holistic)
        landmarks = np.array(landmarks).reshape(1, -1)
        prediction = clf.predict(landmarks)
        sign = signs[prediction[0]]
        
        cv2.putText(frame, f'Sign: {sign}', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        mp_drawing.draw_landmarks(frame, holistic.process(frame).left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(frame, holistic.process(frame).right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
        cv2.imshow('Frame', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

In [10]:
# Run the real-time sign prediction
predict_sign()