# 手势识别七分类-摄像头

同济子豪兄 https://space.bilibili.com/1900783

2021-07-12

## 导入工具包

In [1]:
import time
import cv2
import numpy as np

from matplotlib import pyplot as plt
%matplotlib inline

import mediapipe as mp
from mediapipe.framework.formats import landmark_pb2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

In [2]:
# 导入手部关键点检测模型
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# 载入手势识别七分类模型
base_options = python.BaseOptions(model_asset_path='gesture_recognizer.task')



options = vision.GestureRecognizerOptions(base_options=base_options, 
                                          num_hands=4, # 最多检测多少只手
                                         )

# def print_result(result, timestamp_ms):
#     pass
# options = vision.GestureRecognizerOptions(base_options=base_options, 
#                                           num_hands=4, # 最多检测多少只手
#                                           running_mode=mp.tasks.vision.RunningMode.LIVE_STREAM,
#                                           result_callback=print_result
#                                          )

recognizer = vision.GestureRecognizer.create_from_options(options)

RuntimeError: File loading is not yet supported on Windows

In [18]:
# 字符串大小间距比例因子
scaler = 1

## 逐帧处理函数

In [19]:
def process_frame(img):
    '''输入BGR格式的 numpy array，输出BGR格式的 numpy array'''
    global scaler
    
    # 记录该帧开始处理的时间
    start_time = time.time()
    
    # 水平翻转
    # 水平镜像翻转图像，使图中左右手与真实左右手对应
    # 参数 1：水平翻转，0：竖直翻转，-1：水平和竖直都翻转
    img = cv2.flip(img, 1)
    
    # BGR 转 RGB
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_rgb)
    
    # 手势识别七分类结果
    recognition_result = recognizer.recognize(image)
    
    # recognition_result = recognizer.recognize_async(image, timestamp_ms=100)
    
    # 手势识别预测结果：类别与置信度
    top_gesture = recognition_result.gestures
    
    # 左右手预测结果
    handness = recognition_result.handedness
    
    # 手部关键点坐标（相对图像宽高归一化）
    hand_landmarks = recognition_result.hand_landmarks
    
    img = image.numpy_view()
    
    # 在画面左上角写预测结果
    for i in range(len(hand_landmarks)): # 遍历每一只手
        id_str = str(i+1) # 手的ID号
        handness_str = handness[i][0].category_name # 左右手信息
        gesture_str = top_gesture[i][0].category_name # 手势预测类别
        confidence = '{:.2f}'.format(top_gesture[i][0].score)
        opencv_str = '{:<2}{:<6}{:<5}{}'.format(id_str, handness_str, confidence, gesture_str)
        # print(opencv_str)

        # 图片，添加的文字，左上角坐标，字体，字体大小，颜色，字体粗细
        color = (0,0,255) if i % 2 == 0 else (255,0,0)
        img = cv2.putText(img, opencv_str, (25 * scaler, 60 + 60 * scaler * (i+1)), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, color, 2 * scaler)
    
    # 可视化手部关键点检测结果
    for hand_landmarks in hand_landmarks:
        hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        hand_landmarks_proto.landmark.extend([landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks])

        mp_drawing.draw_landmarks(
            img,
            hand_landmarks_proto,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
    
    # 记录该帧处理完毕的时间
    end_time = time.time()
    # 计算每秒处理图像帧数FPS
    FPS = 1/(end_time - start_time)

    # 在画面上写字：图片，字符串，左上角坐标，字体，字体大小，颜色，字体粗细
    scaler = 1 # 文字大小
    FPS_string = 'FPS  '+str(int(FPS)) # 写在画面上的字符串
    img = cv2.putText(img, FPS_string, (25 * scaler, 60 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (255, 0, 255), 2 * scaler)
    
    img = img[:,:,::-1] # RGB 转 BGR
    return img


## 调用摄像头实时画面

In [20]:
# 调用摄像头逐帧实时处理模板
# 不需修改任何代码，只需修改process_frame函数即可
# 同济子豪兄 2021-7-8

# 导入opencv-python
import cv2
import time

# 获取摄像头，传入0表示获取系统默认摄像头
cap = cv2.VideoCapture(1)

# 打开cap
cap.open(0)

# 无限循环，直到break被触发
while cap.isOpened():
    
    # 获取画面
    success, frame = cap.read()
    
    if not success: # 如果获取画面不成功，则退出
        print('获取画面不成功，退出')
        break
    
    ## 逐帧处理
    frame = process_frame(frame)
    
    # 展示处理后的三通道图像
    cv2.imshow('my_window',frame)
    
    key_pressed = cv2.waitKey(60) # 每隔多少毫秒毫秒，获取键盘哪个键被按下
    # print('键盘上被按下的键：', key_pressed)

    if key_pressed in [ord('q'),27]: # 按键盘上的q或esc退出（在英文输入法下）
        break
    
# 关闭摄像头
cap.release()

# 关闭图像窗口
cv2.destroyAllWindows()

## 按`q`键或`Esc`键关闭画面