# Realtime classifying

In [None]:
import cv2
import numpy as np
import mediapipe as mp
import joblib
from collections import deque, Counter

# Load the trained model
model = joblib.load('RF_Hand_LandMark.pkl')

# Load MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

# Open webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open camera")
    exit()

# Buffer for smoothing predictions
WINDOW_SIZE = 30
predictions_buffer = deque(maxlen=WINDOW_SIZE)

def preprocess_landmarks(landmarks):
    """
    Normalize Mediapipe hand landmarks using palm size.
    Returns a 1D array of 63 features or a zero vector if invalid.
    """
    coords = np.array([[lm.x, lm.y, lm.z] for lm in landmarks])  # Shape: (21, 3)
    #print("coords= ",coords)
    # Compute hand size
    palm_x = abs(coords[9, 0] - coords[0, 0])  
    palm_y = abs(coords[9, 1] - coords[0, 1])  

    if palm_x == 0 or palm_y == 0:
        return np.zeros(63)  # Return neutral feature vector to prevent crashes

    # Normalize
    coords[:, 0] = (coords[:, 0] - coords[0, 0]) / palm_x
    coords[:, 1] = (coords[:, 1] - coords[0, 1]) / palm_y

    return coords.flatten()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Can't receive frame. Exiting ...")
        break

    frame = cv2.flip(frame, 1)  # Mirror effect for better usability
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Feature extraction
            input_data = preprocess_landmarks(hand_landmarks.landmark)
            if input_data is not None:
                #print("input_data= ",input_data)
                
                prediction = model.predict([input_data])[0]  # Ensure correct label handling
                predictions_buffer.append(prediction)

                # Compute mode of last N predictions
                most_common_prediction = Counter(predictions_buffer).most_common(1)[0][0]

                # Display classification
                cv2.putText(frame, f'Gesture: {most_common_prediction}', (10, 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    else:
        cv2.putText(frame, "No hand detected", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Show video feed
    cv2.imshow('Hand Gesture Classification', frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
