In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import os

In [2]:
class ASLModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 32 x 32
            
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ReLU(),
            nn.Dropout(0.25),  # output: 128 x 16 x 16
            
            nn.Conv2d(256, 128, kernel_size=1, stride=1, padding=0),
        
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 8 x 8
            
            
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            
            nn.MaxPool2d(2, 2), # output: 1024 x 4 x 4
            
            nn.Flatten(), 
            nn.Linear(1024*4*4, 1024), # (1024*4*4, 1024)
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 29))
        
    def forward(self, xb):
        return self.network(xb)

In [3]:
model = ASLModel()
model.load_state_dict(torch.load(r"C:\Give directory here", map_location=torch.device('cpu')))

<All keys matched successfully>

In [4]:
import numpy as np
import cv2
#import tensorflow as tf
#mport keras

word_dict = {0:'A', 1:'B', 2:'C', 3:'D', 4:'E', 5:'F', 6:'G', 7:'H', 8:'I', 9:'J', 10:'K', 11:'L', 12:'M', 13:'N', 14:'O', 15:'P', 16:'Q', 17:'R', 18:'S', 19:'T', 20:'U', 21:'V', 22:'W', 23:'X', 24:'Y', 25:'Z', 26:'del', 27:'nothing', 28:'space'}


In [5]:
def predict(input, model):
    predictions = model(input)
    _, preds = torch.max(predictions, dim=1)
    
    #print("Prediction:", preds)
    return (preds[0])

In [None]:
# Using cv2.rectangle() method
# Draw a rectangle with blue line borders of thickness of 2 px
#image = cv2.rectangle(image, start_point, end_point, color, thickness)

# Start coordinate, here (ROI_left, ROI_top)
# represents the top left corner of rectangle

# Ending coordinate, here (ROI-right, ROI_bottom)
# represents the bottom right corner of rectangle

ROI_top = 100
ROI_bottom = 300
ROI_right = 150
ROI_left = 350

cap = cv2.VideoCapture(0)
#cap.set(3, 1280)
#cap.set(4, 720)
num_frames = 0

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    
    
    # Our operations on the frame come here
    
    #flipping the frame
    #frame = cv2.flip(frame, 1)
    
    frame_copy = frame.copy()
    
    # Draw ROI on frame_copy
    cv2.rectangle(frame_copy, (ROI_left, ROI_top), (ROI_right, ROI_bottom), (255,0,0), 2)    
    crop_frame = frame_copy[ROI_top:ROI_bottom, ROI_right:ROI_left]
    
    crop_frame = cv2.resize(crop_frame, (64, 64))          
    
    
    #NORMALIZING     
    MEAN = 255 * torch.tensor([0.5, 0.5, 0.5])
    STD = 255 * torch.tensor([0.5, 0.5, 0.5])
    #converting to tensor from np_ndarray
    x = torch.from_numpy(crop_frame)
    #converting to float32
    x = x.type(torch.float32)
    x = x.permute(-1, 0, 1)
    x = (x - MEAN[:, None, None]) / STD[:, None, None]
    
    #print(x.view(1, 3, 32, 32))    
    
    #first dimension of tensor = batchsize, second = colour, third = height, fourth = widt
                    
    pred = predict(x.view(1, 3, 64, 64), model)
    
    
    cv2.putText(frame_copy, word_dict[pred.item()], (170, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
       
    
    
    # Display the frame with segmented hand
    cv2.putText(frame_copy, "Place handsign in region of interest...", (10, 20), cv2.FONT_ITALIC, 0.5, (51,255,51), 1)
    cv2.imshow("Sign Recognition", frame_copy)
    
    # incrementing the number of frames for tracking
    num_frames += 1
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
