In [1]:
import cv2
import mediapipe as mp

import json
import pandas as pd
from os import listdir
import re
import time
import numpy as np

import torch


mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

In [2]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.l1=torch.nn.Linear(63,40)
        self.l2=torch.nn.Linear(40,20)
        self.l3=torch.nn.Linear(20,10)
        self.l4=torch.nn.Linear(10,1)
        self.sigmoid=torch.nn.Sigmoid()

    def forward(self,x):
        x=self.sigmoid(self.l1(x))
        x=self.sigmoid(self.l2(x))
        x=self.sigmoid(self.l3(x))
        return self.sigmoid(self.l4(x))

model=torch.load('model.pkl')

In [3]:
def StandardScaler(x):
    '''数据标准化，且以21个骨骼点的第一个为原点'''
    x_array=np.array(x)
    x_array-=x_array[0]

    mean=np.mean(x_array)
    std=np.std(x_array)

    x_array-=mean
    x_array/=std

    x_corr=x_array.tolist()

    return x_corr


def Landmarks2array(hand_landmarks):
    '''将mediapipe的输出变为numpy.array形式(63, )'''
    # ->txt
    with open('temp/temp.txt','w') as f:
        print(hand_landmarks,file=f)

    # txt->json
    seq = re.compile(":")
    result = []

    with open('temp/temp.txt') as f:
        for line in f:
            lst = seq.split(line.strip())
            if (len(lst)>=2): 
                item = {str(lst[0]): float(lst[1])}
                result.append(item)    

    with open('temp/temp.json', 'w') as dump_f:
        json.dump(result,dump_f)
        
    # json->array
    with open('temp/temp.json') as f:
        data = json.load(f)
        x = []
        y = []
        z = []
        xyz=[]
        for pt in data:
            if 'x' in pt: x.append(pt['x'])
            if 'y' in pt: y.append(pt['y'])
            if 'z' in pt: z.append(pt['z'])
        
        # 数据标准化
        x=StandardScaler(x)
        y=StandardScaler(y)
        z=StandardScaler(z)

    xyz=x+y+z # 将21*3=63个值排成一列，便于下一步处理

    return np.array(xyz)


In [4]:
cap = cv2.VideoCapture(0)

with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5, max_num_hands=1) as hands:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      continue

    # Flip the image horizontally for a later selfie-view display, and convert the BGR image to RGB.
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    # To improve performance, optionally mark the image as not writeable to pass by reference.
    image.flags.writeable = False
    results = hands.process(image)

    # Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
        
        #if cv2.waitKey(100) & 0xFF == ord('t'): # 按下 t 键进行一次检测
          #inputs=torch.from_numpy(Landmarks2array(hand_landmarks)).float()
          
          #outputs=model(inputs)
          #predicted=(outputs.data>0.5).long()
          #print(predicted)

        inputs=torch.from_numpy(Landmarks2array(hand_landmarks)).float()
        outputs=model(inputs)
        cv2.putText(image, str(outputs.data), (40, 80),cv2.FONT_HERSHEY_COMPLEX,1,(0, 255, 0),1,4)
        

    cv2.imshow('MediaPipe Hands', image)
    if cv2.waitKey(100) & 0xFF == 27: # 按下 esc 退出
      break
cap.release()