In [6]:
import os
from PIL import Image
import torch
from torchvision import transforms, models

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class_names = ['Hi',"Thank You","peace","ok",'home',"indian","you","women"]  # Update
num_classes = len(class_names)

# Load model
model = models.resnet18(pretrained=False)
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load("ep10.pth", map_location=device))
model.to(device)
model.eval()

# Image transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Lambda(lambda x: x.convert("RGB")),
    transforms.ToTensor(),
])


In [7]:
import cv2
import torch
from PIL import Image
import numpy as np
from torchvision import transforms
import mediapipe as mp

# --- Load Model ---
model.eval()
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    h, w, _ = frame.shape
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb)

    if results.multi_hand_landmarks:
        for handLms in results.multi_hand_landmarks:
            x_list = [lm.x for lm in handLms.landmark]
            y_list = [lm.y for lm in handLms.landmark]
            xmin, xmax = int(min(x_list) * w), int(max(x_list) * w)
            ymin, ymax = int(min(y_list) * h), int(max(y_list) * h)

            # Add padding
            padding = 20
            x1, y1 = max(xmin - padding, 0), max(ymin - padding, 0)
            x2, y2 = min(xmax + padding, w), min(ymax + padding, h)

            hand_roi = frame[y1:y2, x1:x2]
            if hand_roi.size == 0:
                continue

            img = cv2.cvtColor(hand_roi, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(img)
            img = transform(img).unsqueeze(0).to(device)

            with torch.no_grad():
                output = model(img)
                _, pred = torch.max(output, 1)
                label = class_names[pred.item()]

            # Draw result
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)

            mp_draw.draw_landmarks(frame, handLms, mp_hands.HAND_CONNECTIONS)

    cv2.imshow("Sign Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
