# 手势识别七分类-摄像头

同济子豪兄 https://space.bilibili.com/1900783

2021-07-12

## 导入工具包

In [1]:
import time
import cv2
import numpy as np

from matplotlib import pyplot as plt
%matplotlib inline

import mediapipe as mp
from mediapipe.framework.formats import landmark_pb2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

In [2]:
# 导入手部关键点检测模型
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# 载入手势识别七分类模型
base_options = python.BaseOptions(model_asset_path='gesture_recognizer.task')



options = vision.GestureRecognizerOptions(base_options=base_options, 
                                          num_hands=4, # 最多检测多少只手
                                         )

# def print_result(result, timestamp_ms):
#     pass
# options = vision.GestureRecognizerOptions(base_options=base_options, 
#                                           num_hands=4, # 最多检测多少只手
#                                           running_mode=mp.tasks.vision.RunningMode.LIVE_STREAM,
#                                           result_callback=print_result
#                                          )

recognizer = vision.GestureRecognizer.create_from_options(options)

W20230417 16:52:18.386955  9390 gesture_recognizer_graph.cc:128] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I20230417 16:52:18.387935  9390 hand_gesture_recognizer_graph.cc:249] Custom gesture classifier is not defined.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [3]:
# 字符串大小间距比例因子
scaler = 1

## 逐帧处理函数

In [4]:
def process_frame(img):
    '''输入BGR格式的 numpy array，输出BGR格式的 numpy array'''
    global scaler
    
    # 记录该帧开始处理的时间
    start_time = time.time()
    
    # 水平翻转
    # 水平镜像翻转图像，使图中左右手与真实左右手对应
    # 参数 1：水平翻转，0：竖直翻转，-1：水平和竖直都翻转
    img = cv2.flip(img, 1)
    
    # BGR 转 RGB
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_rgb)
    
    # 手势识别七分类结果
    recognition_result = recognizer.recognize(image)
    
    # recognition_result = recognizer.recognize_async(image, timestamp_ms=100)
    
    # 手势识别预测结果：类别与置信度
    top_gesture = recognition_result.gestures
    print("top_g: ", top_gesture)
    
    # 左右手预测结果
    handness = recognition_result.handedness
    print("handness: ", handness)
    
    # 手部关键点坐标（相对图像宽高归一化）
    hand_landmarks = recognition_result.hand_landmarks
    
    img = image.numpy_view()
    
    # 在画面左上角写预测结果
    for i in range(len(hand_landmarks)): # 遍历每一只手
        id_str = str(i+1) # 手的ID号
        handness_str = handness[i][0].category_name # 左右手信息
        gesture_str = top_gesture[i][0].category_name # 手势预测类别
        confidence = '{:.2f}'.format(top_gesture[i][0].score)
        opencv_str = '{:<2}{:<6}{:<5}{}'.format(id_str, handness_str, confidence, gesture_str)
        # print(opencv_str)

        # 图片，添加的文字，左上角坐标，字体，字体大小，颜色，字体粗细
        color = (0,0,255) if i % 2 == 0 else (255,0,0)
        img = cv2.putText(img, opencv_str, (25 * scaler, 60 + 60 * scaler * (i+1)), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, color, 2 * scaler)
    
    print("hand_landmarks", hand_landmarks)
    # 可视化手部关键点检测结果
    for hand_landmarks in hand_landmarks:
        hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        hand_landmarks_proto.landmark.extend([landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks])
        print(hand_landmarks_proto)
        mp_drawing.draw_landmarks(
            img,
            hand_landmarks_proto,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
    
    # 记录该帧处理完毕的时间
    end_time = time.time()
    # 计算每秒处理图像帧数FPS
    FPS = 1/(end_time - start_time)

    # 在画面上写字：图片，字符串，左上角坐标，字体，字体大小，颜色，字体粗细
    scaler = 1 # 文字大小
    FPS_string = 'FPS  '+str(int(FPS)) # 写在画面上的字符串
    img = cv2.putText(img, FPS_string, (25 * scaler, 60 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (255, 0, 255), 2 * scaler)
    
    img = img[:,:,::-1] # RGB 转 BGR
    return img


## 调用摄像头实时画面

In [6]:
# 调用摄像头逐帧实时处理模板
# 不需修改任何代码，只需修改process_frame函数即可
# 同济子豪兄 2021-7-8

# 导入opencv-python
import cv2
import time

# 获取摄像头，传入0表示获取系统默认摄像头
cap = cv2.VideoCapture(1)

# 打开cap
cap.open(0)

# 无限循环，直到break被触发
while cap.isOpened():
    
    # 获取画面
    success, frame = cap.read()
    
    if not success: # 如果获取画面不成功，则退出
        print('获取画面不成功，退出')
        break
    
    ## 逐帧处理
    frame = process_frame(frame)
    
    # 展示处理后的三通道图像
    cv2.imshow('my_window',frame)
    
    key_pressed = cv2.waitKey(60) # 每隔多少毫秒毫秒，获取键盘哪个键被按下
    # print('键盘上被按下的键：', key_pressed)

    if key_pressed in [ord('q'),27]: # 按键盘上的q或esc退出（在英文输入法下）
        break
    
    time.sleep(1)
# 关闭摄像头
cap.release()

# 关闭图像窗口
cv2.destroyAllWindows()

[ WARN:0@8.624] global /io/opencv/modules/videoio/src/cap_v4l.cpp (902) open VIDEOIO(V4L2:/dev/video1): can't open camera by index


top_g:  [[Category(index=-1, score=0.6962020993232727, display_name='', category_name='Open_Palm')], [Category(index=-1, score=0.7948184013366699, display_name='', category_name='Open_Palm')]]
handness:  [[Category(index=1, score=0.9288433790206909, display_name='Right', category_name='Right')], [Category(index=0, score=0.9481089115142822, display_name='Left', category_name='Left')]]
hand_landmarks [[NormalizedLandmark(x=0.7983127236366272, y=0.8592564463615417, z=3.735851805686252e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7063477039337158, y=0.8106436729431152, z=-0.053631994873285294, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6315605044364929, y=0.7125087380409241, z=-0.08613883703947067, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.576614499092102, y=0.622820258140564, z=-0.11381294578313828, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5228546857833862, y=0.5612127780914307, z=-0.1423361599445343, visibility=0.0, presence=0.0), Normali



top_g:  [[Category(index=-1, score=0.6937075257301331, display_name='', category_name='Open_Palm')], [Category(index=-1, score=0.8024998307228088, display_name='', category_name='Open_Palm')]]
handness:  [[Category(index=1, score=0.9365276098251343, display_name='Right', category_name='Right')], [Category(index=0, score=0.9448603987693787, display_name='Left', category_name='Left')]]
hand_landmarks [[NormalizedLandmark(x=0.7981182336807251, y=0.8609946966171265, z=3.7021357002231525e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7072067856788635, y=0.8119121789932251, z=-0.0554911307990551, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6325018405914307, y=0.7149474024772644, z=-0.08934742957353592, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5790634751319885, y=0.6241581439971924, z=-0.11804354190826416, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5255609154701233, y=0.5594772100448608, z=-0.14773154258728027, visibility=0.0, presence=0.0), Norma

top_g:  [[Category(index=-1, score=0.6970793604850769, display_name='', category_name='Open_Palm')], [Category(index=-1, score=0.7821234464645386, display_name='', category_name='Open_Palm')]]
handness:  [[Category(index=1, score=0.9416312575340271, display_name='Right', category_name='Right')], [Category(index=0, score=0.9472101926803589, display_name='Left', category_name='Left')]]
hand_landmarks [[NormalizedLandmark(x=0.8002603650093079, y=0.8586972951889038, z=3.7497366633942875e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7100613117218018, y=0.8094390630722046, z=-0.05690491572022438, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6343566179275513, y=0.7128171920776367, z=-0.09215068817138672, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5799331068992615, y=0.620802640914917, z=-0.12188161164522171, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5271227359771729, y=0.5540696978569031, z=-0.15249140560626984, visibility=0.0, presence=0.0), Norma

top_g:  [[Category(index=-1, score=0.6990806460380554, display_name='', category_name='Open_Palm')], [Category(index=-1, score=0.7854285836219788, display_name='', category_name='Open_Palm')]]
handness:  [[Category(index=1, score=0.9474290609359741, display_name='Right', category_name='Right')], [Category(index=0, score=0.9387540221214294, display_name='Left', category_name='Left')]]
hand_landmarks [[NormalizedLandmark(x=0.7975132465362549, y=0.8563352823257446, z=3.8983534977887757e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.709072470664978, y=0.8076794147491455, z=-0.05475744232535362, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6343628764152527, y=0.7108147144317627, z=-0.08887461572885513, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5810683369636536, y=0.6196309924125671, z=-0.11786113679409027, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5282689332962036, y=0.555130124092102, z=-0.14795030653476715, visibility=0.0, presence=0.0), Normal

top_g:  [[Category(index=-1, score=0.6912922263145447, display_name='', category_name='Open_Palm')], [Category(index=-1, score=0.7935149073600769, display_name='', category_name='Open_Palm')]]
handness:  [[Category(index=1, score=0.9388414621353149, display_name='Right', category_name='Right')], [Category(index=0, score=0.954933226108551, display_name='Left', category_name='Left')]]
hand_landmarks [[NormalizedLandmark(x=0.7980126142501831, y=0.8548680543899536, z=3.8036094451854296e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7094370722770691, y=0.8076992034912109, z=-0.05692090094089508, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6348649263381958, y=0.7120461463928223, z=-0.09192505478858948, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5815529823303223, y=0.6213374137878418, z=-0.12135499715805054, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5290571451187134, y=0.5564597249031067, z=-0.15177585184574127, visibility=0.0, presence=0.0), Norma

top_g:  [[Category(index=-1, score=0.6889620423316956, display_name='', category_name='Open_Palm')], [Category(index=-1, score=0.7757895588874817, display_name='', category_name='Open_Palm')]]
handness:  [[Category(index=1, score=0.9420912861824036, display_name='Right', category_name='Right')], [Category(index=0, score=0.9100846648216248, display_name='Left', category_name='Left')]]
hand_landmarks [[NormalizedLandmark(x=0.7943149209022522, y=0.8658655285835266, z=3.596008184558741e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7064894437789917, y=0.8173069357872009, z=-0.05497182160615921, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6353347301483154, y=0.7213696837425232, z=-0.08830045908689499, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5855563879013062, y=0.6306426525115967, z=-0.11663293838500977, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5345975160598755, y=0.5650847554206848, z=-0.1457328200340271, visibility=0.0, presence=0.0), Normal

top_g:  [[Category(index=-1, score=0.6909418702125549, display_name='', category_name='Open_Palm')], [Category(index=-1, score=0.7431052327156067, display_name='', category_name='Open_Palm')]]
handness:  [[Category(index=1, score=0.9072278738021851, display_name='Right', category_name='Right')], [Category(index=0, score=0.9773321151733398, display_name='Left', category_name='Left')]]
hand_landmarks [[NormalizedLandmark(x=0.787664532661438, y=0.8616583943367004, z=3.5485896887621493e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6958186030387878, y=0.8122316598892212, z=-0.05193308740854263, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6219182014465332, y=0.7120411396026611, z=-0.08225817233324051, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5696606040000916, y=0.6199545860290527, z=-0.1081206426024437, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5199571847915649, y=0.5587562918663025, z=-0.13435545563697815, visibility=0.0, presence=0.0), Normal

## 按`q`键或`Esc`键关闭画面