In [14]:
import torch
from PIL import Image
from torch import nn
from torch import optim
import torch.nn.functional as F
import torchvision.transforms.functional as transF
import numpy as np
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms, models
from torchvision.datasets import ImageFolder
import math
import os
import cv2
import PIL 

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [17]:
Labels = { 
           0 : 'ClickMode',
           1 : 'Continue',
           2 : 'Fan',
           3 : 'Light',
           4 : 'Off',
           5 : 'On',
           6 : 'One',
           7 : 'Stop',
           8 : 'Two',
        }

In [18]:
class SquarePad:
    def __call__(self, image):
        max_wh = max(image.size)
        p_left, p_top = [(max_wh - s) // 2 for s in image.size]
        p_right, p_bottom = [max_wh - (s+pad) for s, pad in zip(image.size, [p_left, p_top])]
        padding = (p_left, p_top, p_right, p_bottom)
        return transF.pad(image, padding, 0, 'constant')

In [19]:
test_transform = transforms.Compose([#SquarePad(),
                                      transforms.Resize((240, 240)),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5345, 0.5550, 0.5419],
                                                           [0.2360, 0.2502, 0.2615])
                                                           ])

In [20]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
                     padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

In [21]:
class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=9):
        super().__init__()
        
        self.inplanes = 64

        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
#         self.fc = nn.Linear(512 , num_classes)
        self.fc = nn.Sequential(nn.Dropout(0.5),nn.Linear(512, num_classes))


    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None  
   
        if stride != 1 or self.inplanes != planes:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, 1, stride, bias=False),
                nn.BatchNorm2d(planes),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        
        self.inplanes = planes
        
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
    
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x) 
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [22]:
def ResNet34():
    layers=[3, 4, 6, 3]
    model = ResNet(BasicBlock, layers)
    return model

In [23]:
# MyModel = ResNet34()
# path_model = "hand_model_3.pt"

# MyModel.to(device)
# MyModel.load_state_dict(torch.load(path_model, map_location=device))
# MyModel.eval()

In [24]:
pretrain_model = models.resnet18()
pretrain_model.fc = nn.Sequential(nn.Dropout(0.5), nn.Linear(512, 9))
path_model_pretrain = "hand_model18.pt"

pretrain_model.to(device)
pretrain_model.load_state_dict(torch.load(path_model_pretrain, map_location=device),strict=False)
pretrain_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [25]:
def argmax(prediction):
    prediction = prediction.to(device)
    prediction = prediction.detach().numpy()
    top_1 = np.argmax(prediction, axis=1)
    score = np.amax(prediction)
    score = '{:6f}'.format(score)
    prediction = top_1[0]
    result = Labels[prediction]

    return result,score

def preprocess(image):
    image = PIL.Image.fromarray(image) #Webcam frames are numpy array format
                                       #Therefore transform back to PIL image
    print(image)                             
    image = test_transform(image)
    image = image.float()
    #image = Variable(image, requires_autograd=True)
    image = image.to(device)
    image = image.unsqueeze(0) #I don't know for sure but Resnet-50 model seems to only
                               #accpets 4-D Vector Tensor so we need to squeeze another
    return image   

def classification(image):
    image = PIL.Image.fromarray(image)
    image = test_transform(image).float()
    image = image.unsqueeze(0)
    image.to(device)
    #out = pretrain_model(image)
    out = pretrain_model(image)
    
    _, pre = torch.max(out.data, 1)
    
#     sm = torch.nn.Softmax()
#     sm.eval()
#     probabilities = sm(out) 
# #     print(probabilities) #Converted to probabilities
#     probabilities = probabilities.detach().numpy()
#     print("MAX : " + str(max(probabilities)))
    
    prob = F.softmax(out, dim=1)
    
    
    top_p, top_class = prob.topk(1, dim = 1)
    #print(top_p)
    return Labels[pre.item()], top_p.item()

In [26]:
cap = cv2.VideoCapture(0)
while True:
    _, frame = cap.read()

    frame = cv2.resize(frame, (600, 600))
    frame = cv2.flip(frame, 1)
    frame = cv2.GaussianBlur(frame, (3, 3), 0)
    frame = cv2.bilateralFilter(frame, 5, 20, 20)
    
    x = 600 - 400
    y = 0
    h = 400
    w = 400
    
    
    
    frame = cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
    

    img_hand = frame[0:400, 200:600]
    img_resize = cv2.resize(img_hand, (240, 240))
    
    result, score = classification(img_resize)
#     image_data = preprocess(img_resize)
#     prediction = pretrain_model(image_data)
#     result,score = argmax(prediction)
    #score=0
    
    cv2.putText(frame, result+", "+str(score), (0, 500), cv2.FONT_HERSHEY_SIMPLEX, 2,
                        (0, 0, 255), 2, cv2.LINE_AA)
    #img_resize = np.array(img_resize)
    cv2.imshow("resize", img_resize)
    
    k = cv2.waitKey(100)
    if k == ord('x'):
        break
    
    cv2.imshow("anh", frame)
cap.release()
cv2.destroyAllWindows()