In [2]:
import time
import math
import cv2
import mediapipe as mp
import numpy as np
from handUnits import HandDetector
from gestureUnits import GestureDetector
import os

In [3]:
import torch
import torch.optim
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import TensorDataset,DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd

%matplotlib inline

In [4]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layers1 = nn.Sequential(
            nn.Conv2d(1,16,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True)
        )
        self.layers2 = nn.Sequential(
            nn.Conv2d(16,32,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.layers3 = nn.Sequential(
            nn.Conv2d(32,64,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layers4 = nn.Sequential(
            nn.Conv2d(64,128,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

        )
        self.fc = nn.Sequential(
            nn.Linear(7*7*128,1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024,100),
            nn.ReLU(inplace=True),
            nn.Linear(100,26)
        )
    def forward(self, x):
        x = self.layers1(x)
        x = self.layers2(x)
        x = self.layers3(x)
        x = self.layers4(x)
        x = x.view(x.size(0),-1)
        x = self.fc(x)

        return x

In [5]:
cnn = CNN()
cnn.load_state_dict(torch.load('E:/MyProject/GR/model/cnn_1.pth')) # 导入网络的参数

<All keys matched successfully>

In [6]:
def Frame(pTime):
    cTime = time.time()
    fps = 1 / (cTime - pTime)
    pTime = cTime
    cv2.putText(
        img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 255, 255), 2
    )
    return pTime


# 圆
def joint_circle(img, finger, color=(0, 0, 255)):
    if finger:
        cv2.circle(img, (finger[0], finger[1]), 8, color, cv2.FILLED)


# 线
def joint_line(img, ptStart, ptEnd, color=(0, 255, 0), thickness=2):
    if ptStart and ptEnd:
        cv2.line(img, ptStart, ptEnd, color, thickness)


# 求两点距离
def joint_distance(x, y):
    Xd = x[0] - y[0]
    Yd = x[1] - y[1]
    return int(math.sqrt(Xd * Xd + Yd * Yd))


# 结点与基点的距离计算
def jointList_distance(jointList, base, Name='None '):
    distance = np.zeros(len(jointList), dtype=int)
    if base:
        # print(Name +'distance: ' )
        for i in range(0, len(jointList)):
            distance[i] = joint_distance(jointList[i], base)  # 求距离
            # print(distance[i])
    return distance


# 获取坐标
def get_lms(position, direction):
    lms = []
    if any(position[direction]):
        for i in range(21):
            pos_x = position[direction].get(i)[0]
            pos_y = position[direction].get(i)[1]
            lms.append([int(pos_x), int(pos_y)])
    return lms


# 获取部分坐标
def get_partOflms(pos, lms):
    get_list = []
    for i in pos:
        if lms:
            get_list.append(lms[i])
    return get_list


# 获得凸包
def convexHull(lms, draw=True):
    if lms:
        left_lmsTonp = np.array(lms, dtype=np.int32)
        hull_index = [0, 1, 2, 3, 6, 10, 14, 19, 18, 17, 10]
        hull = cv2.convexHull(left_lmsTonp[hull_index])
        if draw:
            cv2.polylines(img, [hull], True, (0, 255, 0), 2)
        return hull


# 得到凸包外的点
def outFingers(lms, tips=[4, 8, 12, 16, 20]):
    out_fingers = []
    hull = convexHull(lms, draw=False)
    if lms:
        for i in tips:
            pt = (int(lms[i][0]), int(lms[i][1]))
            dist = cv2.pointPolygonTest(hull, pt, True)
            if dist < 0:
                out_fingers.append(i)
    return out_fingers


# 识别手势(数字)
def gestureNum(lms, show=True, text_pos=(20, 300), color=(255, 0, 0)):
    out_fingers = outFingers(lms)
    gesture_Detector = GestureDetector(out_fingers, lms)
    if show == True:
        cv2.putText(
            img,
            gesture_Detector.get_guester(),
            text_pos,
            cv2.FONT_HERSHEY_PLAIN,
            3,
            color,
            2,
        )
    return gesture_Detector.get_guester()


# 一键调用手势识别
def gestureNum2(img, direction, hand_detector, text_pos=(20, 300), color=(255, 0, 255)):
    hand_detector.process(img, draw=False)
    position = hand_detector.find_position(img)
    lms = get_lms(position, direction)
    out_fingers = outFingers(lms)
    gesture_Detector = GestureDetector(out_fingers, lms)
    cv2.putText(
        img,
        gesture_Detector.get_guester(),
        text_pos,
        cv2.FONT_HERSHEY_PLAIN,
        3,
        color,
        2,
    )


def Move(img, lms):
    h, w, c = img.shape
    width, height = pag.size()

    H = height / int(0.5 * h)
    W = width / int(0.5 * w)
    pag.PAUSE = 0.01
    pag.FAILSAFE = False

    cv2.rectangle(
        img,
        (0, 0),
        (int(0.5 * w), int(0.5 * h)),
        (0, 255, 255),
        2,
    )

    if any(lms):
        joint_circle(img, lms[12])
        x = W * lms[12][0]
        y = H * lms[12][1]
        pag.moveTo(x, y, duration=0)


def Click(lms):
    gesture = gestureNum(lms, text_pos=(300, 300), color=(255, 255, 0))
    if gesture == '2':
        base_distance = joint_distance(lms[9], lms[0])
        click_distance = joint_distance(lms[8], lms[12])

        if base_distance > int(4 * click_distance):
            pag.click(interval=0.1)
        if base_distance < int(1.1 * click_distance):
            pag.click(clicks=2, interval=0.1)

        # print('click_distance: ', click_distance)
        # print(' base_distance: ', base_distance)


def strart(img, hand_detector):
    hand_detector.process(img, draw=False)
    position = hand_detector.find_position(img)
    return position


# 获取感兴趣的区域图像
def get_roi(lms, draw=True, show=False):
    if any(lms):
        # 得到相对距离
        # alpha = joint_distance(lms[17], lms[5])  # 5结点到17节点的距离,基本不变
        beta = joint_distance(lms[9], lms[0])

        alpha = beta
        # 得到起始点的位置及roi区域的宽和高
        x_0 = (
            lms[0][0] - int(1 * alpha) if lms[0][0] - int(1 * alpha) > 0 else 0
        )  # 如果小于0则是0
        y_0 = lms[0][1] - int(2.3 * beta) if lms[0][1] - int(2.3 * beta) > 0 else 0
        width = int(2.5 * alpha)
        height = int(2.5 * beta)

        # x_0 = 100
        # y_0 = 200
        # width = 160
        # height = 160

        if draw == True:
            cv2.rectangle(
                img, (x_0, y_0), (x_0 + width, y_0 + height), (0, 255, 255), 2
            )

        roi = img[
            y_0 + 2 : y_0 + height - 2, x_0 + 2 : x_0 + width - 2
        ]  # 避免如果绘制矩形把矩形也加入roi

        roi = cv2.bilateralFilter(roi, 9, 75, 75)
        if show == True:
            cv2.imshow('Roi', roi)
        return roi


# 对获取到的图像进行处理
def get_data_img(roi, size=28):
    img = cv2.resize(roi, (size, size))  # 默认重整为28*28尺寸
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 灰度处理
    return img


# 得到要保存的文件路径，并判断是否放进文件夹里
def get_file_path(file_name, putTofolder=False, folder=None):
    current_dir = os.getcwd()
    target_dir_path = current_dir

    if putTofolder == True:
        target_dir_name = folder
        target_dir_path = os.path.join(current_dir, target_dir_name)
        os.makedirs(target_dir_path, exist_ok=True)

    file_path = os.path.join(target_dir_path, file_name)
    return file_path


def skinMask(roi):
    YCrCb = cv2.cvtColor(roi, cv2.COLOR_BGR2YCR_CB)  # 转换至YCrCb空间
    (y, cr, cb) = cv2.split(YCrCb)  # 拆分出Y,Cr,Cb值
    cr1 = cv2.GaussianBlur(cr, (5, 5), 0)
    _, skin = cv2.threshold(cr1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)  # Ostu处理
    res = cv2.bitwise_and(roi, roi, mask=skin)
    return res



In [7]:
def cnnOut(img):
    img = get_data_img(img)
    img = img/255
    img = torch.from_numpy(img)
    img = img.to(torch.float32)
    img = img.view(1,1,28,28)
    img = Variable(img)
    out = cnn(img)
    pre = torch.max(out.data,1)[1]
    return pre.numpy()[0]

In [None]:
def modelOut(model, img):
    

In [8]:
Characterdict = {0: 'a', 1:'b',2:'c',3:'d',4:'e',5:'f',6:'g',7:'h',8:'i',9:'j',10:'k',11:'l',12:'m',
                 13:'n',14:'o',15:'p',16:'q',17:'r',18:'s',19:'t',20:'u',21:'v',22:'w',23:'x',24:'y',25:'z'}

In [9]:
camera = cv2.VideoCapture(0)
hand_detector = HandDetector()

num = 0
thickness = 2
turn_on = False

# 帧率显示
pTime = 0

while True:
    time_1 = time.time()
    success, img = camera.read()

    if success:
        img = cv2.flip(img, 1)  # 水平翻转
        num = num + 1


        hand_detector.process(img, draw=False)
        position = hand_detector.find_position(img)

        # 获取右手坐标点
        right_lms = get_lms(position, 'Right')

        # 获取左手坐标点
        left_lms = get_lms(position, 'Left')
        
        h, w, c = img.shape

        if right_lms:
            if (
                right_lms[12][0] in range(int(0.9 * w) - 10, int(0.9 * w) + 10)
                and right_lms[12][1] in range(int(0.25 * h) - 10, int(0.25 * h) + 10)
                and num == 1
            ):
                turn_on = not turn_on

        if turn_on == False:
            thickness = 2
        else:
            thickness = -1

        cv2.circle(img, (int(0.9 * w), int(0.25 * h)), 20, (0, 0, 255), thickness)

        
        
        
        
        
        
        
        if turn_on == True:
            if left_lms:
        
                roi = get_roi(left_lms)
                res = cnnOut(roi)
                cv2.putText(img, Characterdict[res], (200, 200), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 255), 2)
        
        else:gestureNum(left_lms)
        
        
        
        
        
        

        # 统计屏幕帧率
        pTime = Frame(pTime)

        cv2.imshow('Video', img)

        time_2 = time.time()
        
        
        if num == 3:
            num = 0


    else:
        print("获取失败")
        break

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break



camera.release()
cv2.destroyAllWindows()

In [9]:
img = cv2.imread('E:/MyProject/GR/dataset/test_0.jpg',cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img,(28,28))
img = img/255
img = torch.from_numpy(img)
x = img
x = x.to(torch.float32)
x = x.view(1,1,28,28)
x = Variable(x)
out = cnn(x)
pre = torch.max(out.data,1)[1]

In [23]:
pre_ = pre.numpy()
str(pre.numpy())

'0'