In [None]:
import cv2
import numpy as np
import joblib
import mediapipe as mp
from collections import deque

# -----------------------
# CONFIG
# -----------------------
MODEL_PATH = "model1_best_svm_mediapipe_keypoints.joblib"
LE_PATH = "model1_label_encoder.joblib"

CAM_INDEX = 0  # try 1 if you have multiple cameras
MAX_NUM_HANDS = 1

# simple prediction smoothing (helps reduce flicker)
SMOOTHING_WINDOW = 7

# -----------------------
# LOAD MODEL + ENCODER
# -----------------------
model = joblib.load(MODEL_PATH)
le = joblib.load(LE_PATH)

print("Loaded model:", type(model))
print("Loaded label encoder classes:", list(le.classes_))

# -----------------------
# MEDIAPIPE INIT
# -----------------------
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=MAX_NUM_HANDS,
    model_complexity=1,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6
)

# -----------------------
# HELPERS
# -----------------------
def landmarks_to_63(hand_landmarks):
    """
    Convert MediaPipe 21 landmarks into a (63,) float32 vector: [x1,y1,z1, x2,y2,z2, ...]
    """
    feats = []
    for lm in hand_landmarks.landmark:
        feats.extend([lm.x, lm.y, lm.z])
    return np.array(feats, dtype=np.float32)

def predict_letter(x63: np.ndarray):
    """
    Returns (label_str, score_float).
    Score is based on SVM decision_function margin (not a true probability unless SVC(probability=True)).
    """
    X = x63.reshape(1, -1)
    pred_idx = model.predict(X)[0]
    label = le.inverse_transform([pred_idx])[0]

    # decision_function gives margins; larger = more confident
    score = None
    if hasattr(model, "decision_function"):
        margins = model.decision_function(X)
        # margins shape: (n_classes,) for OvR or (1, n_classes)
        margins = np.array(margins).reshape(-1)
        score = float(np.max(margins))
    return label, score

# For smoothing: keep last N labels and majority vote
recent = deque(maxlen=SMOOTHING_WINDOW)

def majority_vote(labels):
    if not labels:
        return None
    vals, counts = np.unique(labels, return_counts=True)
    return vals[np.argmax(counts)]

# -----------------------
# WEBCAM LOOP
# -----------------------
cap = cv2.VideoCapture(CAM_INDEX)
if not cap.isOpened():
    raise RuntimeError("Could not open webcam. Try CAM_INDEX = 1 or check camera permissions.")

print("Press 'q' to quit.")

while True:
    ok, frame = cap.read()
    if not ok:
        break

    frame = cv2.flip(frame, 1)  # mirror view
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    result = hands.process(rgb)

    display_text = "No hand detected"
    score_text = ""

    if result.multi_hand_landmarks:
        hand_lms = result.multi_hand_landmarks[0]

        # Draw landmarks
        mp_draw.draw_landmarks(frame, hand_lms, mp_hands.HAND_CONNECTIONS)

        # Extract features
        x63 = landmarks_to_63(hand_lms)

        # Predict
        label, score = predict_letter(x63)

        recent.append(label)
        smooth_label = majority_vote(list(recent)) or label

        display_text = f"Pred: {smooth_label}"
        if score is not None:
            score_text = f"Margin: {score:.3f}"

    # UI overlay
    cv2.rectangle(frame, (10, 10), (320, 90), (0, 0, 0), thickness=-1)
    cv2.putText(frame, display_text, (20, 45),
                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)
    if score_text:
        cv2.putText(frame, score_text, (20, 75),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)

    cv2.imshow("ASL SVM - Real-time", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
hands.close()
cv2.destroyAllWindows()


Loaded model: <class 'sklearn.pipeline.Pipeline'>
Loaded label encoder classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'space']
Press 'q' to quit.
