In [9]:
import cv2
import mediapipe as mp

import json
import pandas as pd
from os import listdir
import re
import time
import numpy as np

import torch


mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

In [10]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.l1=torch.nn.Linear(63,21)
        self.l2=torch.nn.Linear(21,5)
        self.l3=torch.nn.Linear(5,3)
        self.sigmoid=torch.nn.Sigmoid()

    def forward(self,x):
        x=self.sigmoid(self.l1(x))
        x=self.sigmoid(self.l2(x))
        return self.l3(x)


model=torch.load('model.pkl')

In [11]:
def StandardScaler(x):
    '''data normalization based on sk[0]'''
    x_array=np.array(x)
    x_array-=x_array[0]

    mean=np.mean(x_array)
    std=np.std(x_array)

    x_array-=mean
    x_array/=std

    x_corr=x_array.tolist()

    return x_corr

def Landmarks2array(hand_landmarks):
    x = []
    y = []
    z = []
    xyz=[]
    for i in range(0, len(hand_landmarks.landmark)):
        x.append(hand_landmarks.landmark[i].x)
        y.append(hand_landmarks.landmark[i].y)
        z.append(hand_landmarks.landmark[i].z)

    x=StandardScaler(x)
    y=StandardScaler(y)
    z=StandardScaler(z)

    xyz=x+y+z
    return np.array(xyz)

def num_to01(num):
    if (num<0): return 0
    if (num>1): return 1
    else: return num



![](mediapipe_handlandmarks.jpg)

In [12]:
start_landmark=8
end_landmark=17

last_point=[]
currunt_point=[]

cap = cv2.VideoCapture(0)
width=int(cap.get(3))
height=int(cap.get(4))

board=np.ones((height,width),np.uint8)
board*=255

with mp_hands.Hands(min_detection_confidence=0.75, min_tracking_confidence=0.75, max_num_hands=1) as hands:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      continue

    # Flip the image horizontally for a later selfie-view display, and convert the BGR image to RGB.
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    # To improve performance, optionally mark the image as not writeable to pass by reference.
    image.flags.writeable = False
    results = hands.process(image)

    # Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_hand_landmarks: # with hands
      for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(image,hand_landmarks,mp_hands.HAND_CONNECTIONS)

        inputs=torch.from_numpy(Landmarks2array(hand_landmarks)).float()
        # calculate output for gesture classification
        outputs=model(inputs)
        _,predicted=torch.max(outputs.data,dim=0)
        #cv2.putText(image, str(outputs.data), (40, 80),cv2.FONT_HERSHEY_COMPLEX,1,(0, 255, 0),1,4)
        #cv2.putText(image, str(predicted.item()), (40, 120),cv2.FONT_HERSHEY_COMPLEX,1,(0, 255, 0),1,4) 
        # 0 for write/ 1 for erase/ 2 for others

        if (predicted.item()==0): # write
          cv2.putText(image, 'Writing', (10, 30),cv2.FONT_HERSHEY_COMPLEX,1,(0, 255, 0),1,4)
          currunt_point = mp_drawing._normalized_to_pixel_coordinates(hand_landmarks.landmark[8].x, hand_landmarks.landmark[8].y, width, height)
          
          if (last_point):
            cv2.line(board, last_point, currunt_point, (0), 4, 4)
            last_point=currunt_point
          else:
            last_point=currunt_point

        elif(predicted.item()==1): # erase
          cv2.putText(image, 'Erasing', (10, 30),cv2.FONT_HERSHEY_COMPLEX,1,(0, 255, 0),1,4)

          hand_landmarks.landmark[start_landmark].x=num_to01(hand_landmarks.landmark[start_landmark].x)
          hand_landmarks.landmark[start_landmark].y=num_to01(hand_landmarks.landmark[start_landmark].y)
          hand_landmarks.landmark[end_landmark].x=num_to01(hand_landmarks.landmark[end_landmark].x)
          hand_landmarks.landmark[end_landmark].y=num_to01(hand_landmarks.landmark[end_landmark].y)
          
          (start_x,start_y)=mp_drawing._normalized_to_pixel_coordinates(hand_landmarks.landmark[start_landmark].x, hand_landmarks.landmark[start_landmark].y, width, height)
          (end_x,end_y)=mp_drawing._normalized_to_pixel_coordinates(hand_landmarks.landmark[end_landmark].x, hand_landmarks.landmark[end_landmark].y, width, height)

          cv2.rectangle(image, (start_x,start_y), (end_x,end_y), (255,0,0),4)

          board[start_y:end_y,start_x:end_x]=np.logical_or(board[start_y:end_y,start_x:end_x],1)*255
          last_point=[]
          
        else: # others
          cv2.putText(image, 'Try writing/Erasing', (10, 30),cv2.FONT_HERSHEY_COMPLEX,1,(0, 255, 0),1,4)
          last_point=[]

    else: # without hands
      cv2.putText(image, 'No hands', (10, 30),cv2.FONT_HERSHEY_COMPLEX,1,(255, 0, 0),1,4)
      last_point=[]
    
    image=cv2.bitwise_and(image, image,mask=board)
    cv2.imshow('Air writing', image)
    if cv2.waitKey(100) & 0xFF == 27: # 按下 esc 退出
      break
cap.release()