In [1]:
import torch
from PIL import Image
from sklearn.model_selection import train_test_split
from torch import nn
from torch import optim
import torch.nn.functional as F
import torchvision.transforms.functional as transF
import numpy as np
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms, models
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from tqdm.notebook import tqdm
import math
import os
import cv2
import PIL 

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [3]:
Labels = { 
           0 : 'Attack',
           1 : 'Bottom',
           2 : 'Left',
           3 : 'Right',
           4 : 'Stop',
           5 : 'Top',
        }

In [4]:
class SquarePad:
    def __call__(self, image):
        max_wh = max(image.size)
        p_left, p_top = [(max_wh - s) // 2 for s in image.size]
        p_right, p_bottom = [max_wh - (s+pad) for s, pad in zip(image.size, [p_left, p_top])]
        padding = (p_left, p_top, p_right, p_bottom)
        return transF.pad(image, padding, 0, 'constant')

In [5]:
test_transform = transforms.Compose([#SquarePad(),
                                      transforms.Resize((240, 240)),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5345, 0.5550, 0.5419],
                                                           [0.2360, 0.2502, 0.2615])
                                    ])

In [6]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
                     padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

In [7]:
model = models.mobilenet_v3_small()
model.fc = nn.Sequential(nn.Dropout(0.5), nn.Linear(512, 6))
path_model_pretrain = "../weight/hand_model_mobi_v3_small.pt"

model.to(device)
model.load_state_dict(torch.load(path_model_pretrain, map_location=device))
model.eval()

MobileNetV3(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): SqueezeExcitation(
          (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
          (relu): ReLU(inplace=True)
          (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
        )
        (2): ConvBNActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_r

In [8]:
def argmax(prediction):
    prediction = prediction.to(device)
    prediction = prediction.detach().numpy()
    top_1 = np.argmax(prediction, axis=1)
    score = np.amax(prediction)
    score = '{:6f}'.format(score)
    prediction = top_1[0]
    result = Labels[prediction]

    return result,score

def preprocess(image):
    image = PIL.Image.fromarray(image)
    print(image)                             
    image = test_transform(image)
    image = image.float()
    image = image.to(device)
    image = image.unsqueeze(0) 
    return image   

def classification(image):
    image = PIL.Image.fromarray(image)
    image = test_transform(image).float()
    image = image.unsqueeze(0)
    image.to(device)
    out = model(image)
    
    _, pre = torch.max(out.data, 1)
    prob = F.softmax(out, dim=1)
    top_p, top_class = prob.topk(1, dim = 1)
    #print(top_p)
    return Labels[pre.item()], top_p.item()

In [9]:
cap = cv2.VideoCapture(0)
while True:
    _, frame = cap.read()

    frame = cv2.resize(frame, (600, 600))
    frame = cv2.flip(frame, 1)
    frame = cv2.GaussianBlur(frame, (3, 3), 0)
    frame = cv2.bilateralFilter(frame, 5, 20, 20)
    
    x = 600 - 400
    y = 0
    h = 400
    w = 400
    
    frame = cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
    
    img_hand = frame[0:400, 200:600]
    img_resize = cv2.resize(img_hand, (240, 240))
    result, score = classification(img_resize)
    
    cv2.putText(frame, result+", "+str(score), (0, 500), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 2, cv2.LINE_AA)
    cv2.imshow("resize", img_resize)
    
    k = cv2.waitKey(100)
    if k == ord('x'):
        break
    
    cv2.imshow("anh", frame)
cap.release()
cv2.destroyAllWindows()