In [8]:
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image
import sys

In [9]:
class CNN(torch.nn.Module): 
    def __init__(self):
        super(CNN, self).__init__()

        #initializing 4 convolution layer
        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3)
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3)
        self.conv4 = torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4)

        #initializing dropout 
        self.dropout = torch.nn.Dropout(0.2)

        #initializing dropout 
        self.pool= torch.nn.MaxPool2d(2,2)
 
        #initializing linear 
        self.fc1 = torch.nn.Linear(256 * 5 * 5, 512)
        self.fc2 = torch.nn.Linear(512, 64)
        self.fc3 = torch.nn.Linear(64, 32)
        self.fc4 = torch.nn.Linear(32, 5)
 
    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.conv1(x))) #sending input into 1st convolution layer,then to relu ,then to pooling layer
        x = self.pool(torch.nn.functional.relu(self.conv2(x))) #sending previous output into 2nd convolution layer,then to relu ,then to pooling layer
        x = self.dropout(x) #dropout unnecessary output
        x = self.pool(torch.nn.functional.relu(self.conv3(x)))
        x = self.pool(torch.nn.functional.relu(self.conv4(x)))
        x = self.dropout(x)
        x = x.view(-1, 256 * 5 * 5) # for flatten layer
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.fc3(x))
        x = self.fc4(x)
        return x
        

In [10]:
def nothing(x):
    pass

transform = transforms.Compose([
                                transforms.Resize(128),
                                transforms.CenterCrop(128),
                                transforms.ToTensor(),
                               ])

def image_loader(image_name):
    image = Image.open(image_name)
    image = image.convert('RGB')
    image = transform(image).float()
    image = image.unsqueeze(0) 
    return image  


model = CNN()
model.load_state_dict(torch.load("Hand_Finger_Classification_1.pth"))
model.eval()

# print(model)

cap = cv2.VideoCapture(0)


# cv2.namedWindow('image')
# cv2.createTrackbar('R','image',0,255,nothing)
# cv2.createTrackbar('G','image',0,255,nothing)


while(1):

    _, frame = cap.read()
    frame = cv2.flip(frame, 1)
    
    x1 = int(0.5*frame.shape[1])
    y1 = 0
    x2 = frame.shape[1]
    y2 = int(0.5*frame.shape[1])
    cv2.rectangle(frame, (x1-1, y1-1), (x2+1, y2+1), (255,255,255) ,10)
    roi = frame[y1:y2, x1:x2]
    
#     r = cv2.getTrackbarPos('R','image')
#     g = cv2.getTrackbarPos('G','image')
    
    roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    roi = cv2.GaussianBlur(roi, (5, 5), 0)

    _, test_image = cv2.threshold(roi, 127, 255, cv2.THRESH_BINARY_INV)
#     print(test_image,end='\r')
    
#     cv2.imshow("test", test_image)  #Segmentaded Image
    
    image = Image.fromarray(test_image)
    image = image.convert('RGB')
    image = transform(image).float()
    image = image.unsqueeze(0) 
    
    with torch.no_grad():
        output = model.forward(image)
    ps = torch.exp(output)
    probability = ps.data.numpy().squeeze()
    print(probability.argmax() + 1, end='\r')
    
    image = cv2.putText(frame, str(probability.argmax() + 1 ), (50, 100) , cv2.FONT_HERSHEY_SIMPLEX ,  
                   3, (255,255,255), 5, cv2.LINE_AA) 
    
    
    cv2.imshow('frame',frame)

    k = cv2.waitKey(5) & 0xFF
    if k == 27:
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()



1

In [None]:
# def nothing(x):
#     pass

# transform = transforms.Compose([
#                                 transforms.Resize(128),
#                                 transforms.CenterCrop(128),
#                                 transforms.ToTensor(),
#                                ])

# def image_loader(image_name):
#     image = Image.open(image_name)
#     image = image.convert('RGB')
#     image = transform(image).float()
#     image = image.unsqueeze(0) 
#     return image  


# model = CNN()
# model.load_state_dict(torch.load("Hand_Finger_Classification_1.pth"))
# model.eval()

# # print(model)

# cap = cv2.VideoCapture(0)


# # cv2.namedWindow('image')
# # cv2.createTrackbar('R','image',0,255,nothing)
# # cv2.createTrackbar('G','image',0,255,nothing)



# while(1):

#     _, frame = cap.read()
#     frame = cv2.flip(frame, 1)
    
#     x1 = int(0.5*frame.shape[1])
#     y1 = 0
#     x2 = frame.shape[1]
#     y2 = int(0.5*frame.shape[1])
#     cv2.rectangle(frame, (x1-1, y1-1), (x2+1, y2+1), (255,255,255) ,10)
#     roi = frame[y1:y2, x1:x2]
    
#     roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
# #     _, test_image = cv2.threshold(roi, 140, 255, cv2.THRESH_BINARY_INV)
    
#     _, test_image = cv2.threshold(roi, 140, 255, cv2.THRESH_BINARY_INV)
    
#     cv2.imshow("test", test_image)  #Segmentaded Image
    
#     image = Image.fromarray(test_image)
#     image = image.convert('RGB')
#     image = transform(image).float()
#     image = image.unsqueeze(0) 
    
#     with torch.no_grad():
#         output = model.forward(image)
#     ps = torch.exp(output)
#     probability = ps.data.numpy().squeeze()
#     print(probability.argmax() + 1, end='\r')
    
#     image = cv2.putText(frame, str(probability.argmax() + 1 ), (50, 50) , cv2.FONT_HERSHEY_SIMPLEX ,  
#                    1, (255,255,255), 5, cv2.LINE_AA) 
    
    
# #     r = cv2.getTrackbarPos('R','image')
# #     g = cv2.getTrackbarPos('G','image')

    
#     cv2.imshow('frame',frame)

#     k = cv2.waitKey(5) & 0xFF
#     if k == 27:
#         break

# # When everything done, release the capture
# cap.release()
# cv2.destroyAllWindows()

