In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import pickle
import os
from sklearn.preprocessing import MinMaxScaler

# Load trained MLP model and label encoder
model = tf.keras.models.load_model("asl_mlp_model.h5")
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])  # Fix for model.compile_metrics warning

# Load label encoder
with open("label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

# Load the pre-trained scaler used during training (Handle missing file)
scaler_path = "scaler.pkl"
if os.path.exists(scaler_path):
    with open(scaler_path, "rb") as f:
        scaler = pickle.load(f)
    print("Scaler loaded successfully.")
else:
    print("⚠ Warning: scaler.pkl not found. Creating a new MinMaxScaler (may affect accuracy).")
    scaler = MinMaxScaler()

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

# Open webcam
cap = cv2.VideoCapture(0)

print("Starting real-time ASL detection...")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(frame_rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Extract 21 hand landmark coordinates (x, y)
            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.append(lm.x)
                landmarks.append(lm.y)

            # Convert landmarks to NumPy array and normalize using the trained scaler
            landmarks = np.array(landmarks).reshape(1, -1)
            landmarks = scaler.transform(landmarks)

            # Predict the ASL letter
            prediction = model.predict(landmarks)
            predicted_index = np.argmax(prediction)

            # Debugging prints
            print("Predicted Raw Output:", prediction)
            print("Argmax Index:", predicted_index)
            print("Available Labels:", label_encoder.classes_)

            # Ensure the index is within range of label_encoder classes
            if predicted_index < len(label_encoder.classes_):
                predicted_label = label_encoder.inverse_transform([predicted_index])[0]
            else:
                predicted_label = "?"

            # Display the detected letter
            cv2.putText(frame, f"Detected: {predicted_label}", (50, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    cv2.imshow("ASL Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

print("Real-time ASL detection stopped.")




Scaler loaded successfully.
Starting real-time ASL detection...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Predicted Raw Output: [[9.5281763e-31 1.5834641e-02 4.7734257e-09 1.7815115e-14 1.4636963e-07
  6.0321753e-08 2.0616783e-12 7.6503633e-21 9.2186746e-20 2.5132695e-32
  4.0574753e-07 2.8506839e-10 5.6540139e-29 1.3681591e-29 4.3124181e-22
  1.6588105e-21 8.8752778e-23 1.3960424e-04 1.9228607e-20 4.7286581e-25
  5.5903415e-03 1.3839708e-01 8.4003770e-01 2.3883754e-15 2.1989685e-35
  1.6764531e-11]]
Argmax Index: 22
Available Labels: ['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R'
 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Predicted Raw Output: [[2.7165251e-25 7.8065852e-03 1.3706471e-06 2.2146420e-08 1.0396844e-05
  2.0567621e-09 6.4719374e-10 2.0065640e-19 2.3918014e-15 1.7735290e-29
  6.7227980e-04 2.1374616e-04 7.9090683e-26 1.5064239e-25 1.1382502e-17
  5.4801688e