In [None]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
from pynq.overlays.base import BaseOverlay
from pynq.lib.video import *

In [None]:
base = BaseOverlay("base.bit")

hdmi_out = base.video.hdmi_out
Mode = VideoMode(640,480,24)
hdmi_out = base.video.hdmi_out
hdmi_out.configure(Mode,PIXEL_BGR)
hdmi_out.start()

In [None]:
def SkinDetect(frame):
    ycrcb = cv2.cvtColor(frame, cv2.COLOR_BGR2YCrCb) # 分解为YUV图像,得到CR分量
    (_, cr, cb) = cv2.split(ycrcb)
    _, cr = cv2.threshold(cr, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    res = cv2.bitwise_and(frame,frame,mask = cr)
    res = cv2.dilate(res, None, iterations=2)
    return res

In [None]:
#计算轮廓面积与外接矩形比值，判断是否为手
def CalRatio(cnts):
    area1 = cv2.contourArea(cnts)
    x, y, w, h = cv2.boundingRect(cnts)
    area2 = w * h
    ratio = area1 / area2
    return x,y,w,h,ratio

In [None]:
#手势提取
def HandExtract(frame):
    stan = 1.0  # 比值阈值
    HandFlag = False

    frame1 = SkinDetect(frame)  # 肤色分割
    rows, cols, channels = frame.shape
    res = np.zeros((((rows, cols, channels))), dtype=np.uint8)  # 最终提取的手势
    
    # 对轮廓操作，获得手势
    frame_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    _, thres = cv2.threshold(frame_gray, 20, 255, cv2.THRESH_BINARY)
    thres = cv2.dilate(thres, None, iterations=1)
    img,cnts, _ = cv2.findContours(thres.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    c = 0
    ratio = 0
    if (len(cnts) > 0):
        cnts1 = sorted(cnts, key=cv2.contourArea, reverse=True)  # 降序排列
        _, _, _, _, ratio1 = CalRatio(cnts1[0])
        ratio = ratio1
        if (len(cnts) > 1):  # 同时出现手和脸的情况
            _, _, _, _, ratio2 = CalRatio(cnts1[1])
            if ratio1 < ratio2:
                c = cnts1[0]
            else:
                c = cnts1[1]
                ratio = ratio2
        else:
            c = cnts1[0]
        # 最终通过肤色面积以及比值判断是否为手势
        if (cv2.contourArea(c) > 300 and ratio < stan):
            HandFlag = True
            x, y, w, h, ratio = CalRatio(c)
            res[y:y + h, x:x + w] = frame[y:y + h, x:x + w]
            res = cv2.bitwise_and(res, res, mask=thres)
    return c, HandFlag, res

In [None]:
def calSiftFeature(frame):
    img = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
    _, thres = cv2.threshold(img,50,255,cv2.THRESH_BINARY)
    cnts, _ = cv2.findContours(thres.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    c = max(cnts,key=cv2.contourArea)
    hull = cv2.convexHull(c, returnPoints=False)
    des = cv2.convexityDefects(c,hull)
    #print(des.shape)
    return des

In [None]:
def calFeatVec(features, centers):
    featVec = np.zeros((1, 100))
    for i in range(0, features.shape[0]):
        fi = features[i]
        diffMat = np.tile(fi, (100, 1)) - centers
        sqSum = (diffMat ** 2).sum(axis=1)
        dist = sqSum ** 0.5
        sortedIndices = dist.argsort()
        idx = sortedIndices[0]  # index of the nearest center
        featVec[0][idx] += 1
    return featVec

In [None]:
def testImg(img):
    _, centers = np.load("./vocabulary.npy", allow_pickle=True)
    #svm = cv2.ml.SVM_load("./svm.xml")
    svm = cv2.ml.SVM_create()
    svm.load("./svm.xml")
    features = calSiftFeature(img)
    featVec = calFeatVec(features, centers)
    case = np.float32(featVec)
    dict_svm = svm.predict(case)
    dict_svm = int(dict_svm[1])
    return dict_svm

In [None]:
cap = cv2.VideoCapture(0) 
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

text = "The Result is: No Gesture"
result = 0
res = []
frame_count = 0

print(cap.isOpened())

while (cap.isOpened()):
    ret, frame = cap.read()
    if (ret):
        img = frame.copy()
        class_num = 0
        
        roi = img[120:360, 160:480]
        gesture, HandFlag, hand_img = HandExtract(roi)
        
        if (HandFlag):
            hull = cv2.convexHull(gesture, returnPoints=True)
#             try:
#                 class_num = testImg(hand_img)
#                 print(class_num)
#             except:
#                 pass
            class_num = testImg(hand_img)

        # 每五帧清零数据并统计
        if (frame_count < 5):
            res.append(class_num)
            frame_count += 1
        else:
            count = np.bincount(res)
            result = np.argmax(count)
            frame_count = 0
            res = []

        if (result == 0):
            text = "The Result is: No Gesture"
        else:
            text = "The Result is: Gesture " + str(result)
    
        cv2.putText(img, text, (10, 400), cv2.FONT_HERSHEY_TRIPLEX, 1.0, (0, 0, 255), 1, 4)
        outframe = hdmi_out.newframe()
        outframe[:,:,:] = img[:,:,:]
        hdmi_out.writeframe(outframe)
    else: 
        print("Failed to read from camera.")
        
        
cap.release()
hdmi_out.close()