In [None]:
import cv2
import joblib
import mediapipe as mp
import numpy as np

# Load the trained model
model_path = "../Models/mlp_classifier_best_params.pkl"
mlp_model = joblib.load(model_path)

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Load class labels
class_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
                'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'space']  # Adjust based on your dataset labels

def extract_hand_landmarks(image):
    """Extracts 21 hand landmarks from the given image."""
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)

    if results.multi_hand_landmarks:
        landmarks = results.multi_hand_landmarks[0]
        data = []
        for landmark in landmarks.landmark:
            data.append(landmark.x)
            data.append(landmark.y)
        return np.array(data).reshape(1, -1)  # Reshape for model input
    return None

# Open webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip image for natural hand positioning
    frame = cv2.flip(frame, 1)

    # Extract hand landmarks
    landmarks = extract_hand_landmarks(frame)

    if landmarks is not None:
        # Make prediction
        prediction = mlp_model.predict(landmarks)
        predicted_label = prediction[0]

        # Draw label on the frame
        cv2.putText(frame, f'Prediction: {predicted_label}', (10, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    # Display the video feed
    cv2.imshow("ASL Hand Sign Prediction", frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


I0000 00:00:1740545736.998465 18090294 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M4 Pro
W0000 00:00:1740545737.005649 18260012 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1740545737.010845 18260023 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [5]:
import cv2

def list_available_cameras(max_cameras=10):
    """Check available camera indices."""
    available_cameras = []

    for index in range(max_cameras):
        cap = cv2.VideoCapture(index)
        if cap.isOpened():
            available_cameras.append(index)
            cap.release()

    if available_cameras:
        print(f"Available cameras: {available_cameras}")
    else:
        print("No cameras found.")

# Run the function
list_available_cameras()


Available cameras: [0]


OpenCV: out device of bound (0-0): 1
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 2
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 3
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 4
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 5
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 6
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 7
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 8
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 9
OpenCV: camera failed to properly initialize!


In [3]:
import cv2
import joblib
import mediapipe as mp
import numpy as np
import pandas as pd
import time

# Load the trained MLPClassifier model
model_path = "../Models/mlp_classifier_best_params.pkl"
mlp_model = joblib.load(model_path)

# Create the column names (assuming training used these names)
column_names = [f"x{i}" if i % 2 == 0 else f"y{i//2}" for i in range(42)]

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7
)
mp_drawing = mp.solutions.drawing_utils

def extract_hand_landmarks(image):
    """Extracts 21 hand landmarks (42 values) from an image.
       Returns a NumPy array reshaped to (1,42) if exactly 42 values are detected, else None."""
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    if results.multi_hand_landmarks:
        landmarks = results.multi_hand_landmarks[0]
        data = []
        for landmark in landmarks.landmark:
            data.append(landmark.x)
            data.append(landmark.y)
        if len(data) == 42:
            return np.array(data).reshape(1, -1)
    return None

# Open a video capture (built-in camera)
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip frame horizontally for a mirror view
    frame = cv2.flip(frame, 1)

    # Extract landmarks from the current frame
    landmarks = extract_hand_landmarks(frame)
    
    if landmarks is not None:
        # Convert landmarks to a DataFrame with proper column names
        landmarks_df = pd.DataFrame(landmarks, columns=column_names)
        
        # Get prediction probabilities from the model
        probs = mlp_model.predict_proba(landmarks_df)[0]
        classes = mlp_model.classes_
        
        # Pair each class with its probability
        ranked = list(zip(classes, probs))
        # Sort by probability (descending)
        ranked.sort(key=lambda x: x[1], reverse=True)
        
        # Take top 5 predictions
        top5 = ranked[:5]
        
        # Overlay the top 5 predictions on the frame
        y0 = 30
        dy = 30
        for i, (cls, prob) in enumerate(top5):
            text = f"Rank {i+1}: {cls} ({prob*100:.1f}%)"
            cv2.putText(frame, text, (10, y0 + i*dy),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2, cv2.LINE_AA)
        
        # Optionally, also draw the hand landmarks on the frame
        for hand_landmarks in mp_hands.Hands().process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).multi_hand_landmarks or []:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
    
    # Display the frame
    cv2.imshow("ASL Live Prediction", frame)
    
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()


I0000 00:00:1740628698.161617 19141616 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M4 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1740628698.169791 19143020 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1740628698.182307 19143028 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
2025-02-26 19:58:18.900 python[28478:19141616] +[IMKClient subclass]: chose IMKClient_Modern
2025-02-26 19:58:18.900 python[28478:19141616] +[IMKInputSession subclass]: chose IMKInputSession_Modern
W0000 00:00:1740628700.668853 19143016 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
I0000 00:00:1740628700.672989 19141616 gl_context.cc:36

: 