In [12]:
import cv2
import mediapipe as mp
import torch
import torch.nn as nn
import numpy as np

# ------------------------------
# Model
# ------------------------------
class HandSignModel(nn.Module):
    def __init__(self, input_size=63, num_classes=26):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return self.fc3(x)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = HandSignModel().to(device)
model.load_state_dict(torch.load("models/best_model.pt", map_location=device))
model.eval()

# ------------------------------
# MediaPipe
# ------------------------------
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)

def extract_landmarks(hand_landmarks):
    coords = []
    for lm in hand_landmarks.landmark:
        coords.extend([lm.x, lm.y, lm.z])
    return np.array(coords)

# ------------------------------
# Real-time Loop
# ------------------------------
cap = cv2.VideoCapture(0)
typed_message = ""
current_signe = ""

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                   mp_draw.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2),
                                   mp_draw.DrawingSpec(color=(0, 0, 255), thickness=2))

            landmarks = extract_landmarks(hand_landmarks)
            landmarks_tensor = torch.tensor(landmarks, dtype=torch.float32).unsqueeze(0).to(device)

            with torch.no_grad():
                logits = model(landmarks_tensor)
                pred_class = torch.argmax(logits, dim=1).item()
                pred_letter = chr(pred_class + 65)

            current_signe = pred_letter

    # Controls
    key = cv2.waitKey(1) & 0xFF
    if key == ord(" "):
        typed_message += current_signe
    elif key == 8:
        typed_message = typed_message[:-1]
    elif key == 13:
        print("Final Message:", typed_message)
        typed_message = ""
    elif key == 27:
        break

    # Overlay
    cv2.putText(frame, f"Signe: {current_signe}", (30, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 0, 0), 3)
    cv2.putText(frame, f"Message: {typed_message}", (30, 100),
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3)

    cv2.imshow("Hand Sign Recognition", frame)

cap.release()
cv2.destroyAllWindows()
