In [2]:
pip install torch torchvision opencv-python pillow

Collecting torchvision
  Using cached torchvision-0.22.1-cp312-cp312-win_amd64.whl.metadata (6.1 kB)
Using cached torchvision-0.22.1-cp312-cp312-win_amd64.whl (1.7 MB)
Installing collected packages: torchvision
Successfully installed torchvision-0.22.1
Note: you may need to restart the kernel to use updated packages.


In [4]:
import cv2
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image
import numpy as np

#Define class labels, numbers 0 through 9 and 26 alphabets
classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
           'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
           'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
           'U', 'V', 'W', 'X', 'Y', 'Z']
num_classes = len(classes)

#Load model and weights, saved from colab notebook
model = models.mobilenet_v2(pretrained=False)
model.classifier[1] = nn.Linear(model.last_channel, num_classes)
model.load_state_dict(torch.load('best_sign_model2.pth', map_location=torch.device('cpu')))
model.eval()

#Transformation of images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],  # Standard ImageNet normalization values
                         [0.229, 0.224, 0.225])
])

#Starting webcam
cap = cv2.VideoCapture(0)
print("Press 'q' to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    #Box for hand placement
    x, y, w, h = 100, 100, 300, 300
    roi = frame[y:y+h, x:x+w]

    #BGR to RGB
    img_rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(img_rgb)

    #Apply transformation
    input_tensor = transform(img_pil).unsqueeze(0)

    #Get prediction from model
    with torch.no_grad():
        outputs = model(input_tensor)
        probs = torch.softmax(outputs, dim=1)
        confidence, predicted = torch.max(probs, 1)
        label = classes[predicted.item()]
        conf_percent = confidence.item() * 100

    #Draw box and prediction
    cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
    cv2.putText(frame, f'{label} ({conf_percent:.1f}%)', (x, y - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    #Show the frame
    cv2.imshow('Sign Language Interpreter', frame)

    #Quit when 'q' is pressed by user
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

#Cleaning up...
cap.release()
cv2.destroyAllWindows()


Press 'q' to quit
