# 导入工具包

In [1]:
import cv2
import mediapipe as mp

#进度条库
from tqdm import tqdm
import time
import matplotlib.pyplot as plt

#使用ipython魔法方法，将绘制得到的图像直接嵌入在notebook单元格中
%matplotlib inline

# 导入手部关键点检测模型

In [2]:
#导入solution
mp_hands = mp.solutions.hands

#导入模型
hands = mp_hands.Hands(static_image_mode=False,       #是静态图片还是连续视频帧
                      max_num_hands=4,                #最多检测几只手
                      min_detection_confidence=0.5,   #置信度阈值
                      min_tracking_confidence=0.5,    #追踪阈值
                      )

#导入绘图函数
mpDraw = mp.solutions.drawing_utils

## 一个问号查找用法

In [8]:
mp_hands.Hands?

[1;31mInit signature:[0m
[0mmp_hands[0m[1;33m.[0m[0mHands[0m[1;33m([0m[1;33m
[0m    [0mstatic_image_mode[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mmax_num_hands[0m[1;33m=[0m[1;36m2[0m[1;33m,[0m[1;33m
[0m    [0mmin_detection_confidence[0m[1;33m=[0m[1;36m0.5[0m[1;33m,[0m[1;33m
[0m    [0mmin_tracking_confidence[0m[1;33m=[0m[1;36m0.5[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
MediaPipe Hands.

MediaPipe Hands processes an RGB image and returns the hand landmarks and
handedness (left v.s. right hand) of each detected hand.

Note that it determines handedness assuming the input image is mirrored,
i.e., taken with a front-facing/selfie camera (
https://en.wikipedia.org/wiki/Front-facing_camera) with images flipped
horizontally. If that is not the case, use, for instance, cv2.flip(image, 1)
to flip the image first for a correct handedness output.

Please refer to https://solutions.mediap

## 两个问号查找源代码

In [9]:
mp_hands.Hands??

[1;31mInit signature:[0m
[0mmp_hands[0m[1;33m.[0m[0mHands[0m[1;33m([0m[1;33m
[0m    [0mstatic_image_mode[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mmax_num_hands[0m[1;33m=[0m[1;36m2[0m[1;33m,[0m[1;33m
[0m    [0mmin_detection_confidence[0m[1;33m=[0m[1;36m0.5[0m[1;33m,[0m[1;33m
[0m    [0mmin_tracking_confidence[0m[1;33m=[0m[1;36m0.5[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mSource:[0m        
[1;32mclass[0m [0mHands[0m[1;33m([0m[0mSolutionBase[0m[1;33m)[0m[1;33m:[0m[1;33m
[0m  [1;34m"""MediaPipe Hands.

  MediaPipe Hands processes an RGB image and returns the hand landmarks and
  handedness (left v.s. right hand) of each detected hand.

  Note that it determines handedness assuming the input image is mirrored,
  i.e., taken with a front-facing/selfie camera (
  https://en.wikipedia.org/wiki/Front-facing_camera) with images flipped
  horizontally. If that is not the case, use, for insta

### 处理单帧的函数

In [10]:
# 处理帧函数
def process_frame(img):
    
    #记录该帧开始处理的时间
    start_time = time.time()
    
    #获取图像的宽高
    h,w = img.shape[0],img.shape[1]
    
    
    # 水平镜像翻转图像，使得图中的左右手与真实左右手相对应
    #参数，1：水平翻转，0：竖直翻转，-1：水平和竖直都翻转
    img = cv2.flip(img,1)
    # BGR转RGB
    img_RGB = cv2.cvtColor(img,cv2.COLOR_BGRA2RGB)

    #将RGB图像输入模型。获取预测结果
    results = hands.process(img_RGB)
    
    
    if results.multi_hand_landmarks:   #如果检测到手
    
        handness_str = ''
        index_finger_tip_str = ''
        for hand_idx in range(len(results.multi_hand_landmarks)):

            #获取该手的21个关键点坐标
            hand_21 = results.multi_hand_landmarks[hand_idx] 

            #可视化关键点及骨架连线
            mpDraw.draw_landmarks(img,hand_21,connections = mp_hands.HAND_CONNECTIONS)

            #记录左右手信息
            temp_handness = results.multi_handedness[hand_idx].classification[0].label
    #        handness_str += str(hand_idx) + ':' + temp_handness + ' '       
            handness_str += '{}:{} '.format(hand_idx,temp_handness)

            #获取手腕根部深度坐标
            cz0 = hand_21.landmark[0].z

            for i in range(21):   #遍历该手的21个关键点

                #获取3D坐标
                cx = int(hand_21.landmark[i].x * w)
                cy = int(hand_21.landmark[i].y * h)
                cz = hand_21.landmark[i].z
                depth_z = cz0 - cz

                #用圆的半径反映深度大小
                radius = int(6 * (1 + depth_z))

                if i == 0:    #手腕
                    img = cv2.circle(img,(cx,cy),radius * 2,(0,0,255),-1)
                if i == 8:   #食指指尖
                    img = cv2.circle(img,(cx,cy),radius * 2,(193,184,67),-1)
                    index_finger_tip_str += '{}:{} '.format(hand_idx,depth_z)
                if i in [1,5,9,13,17]:  #指根
                    img = cv2.circle(img,(cx,cy),radius,(19,14,67),-1)
                if i in [2,6,10,14,18]:  #第一指节
                    img = cv2.circle(img,(cx,cy),radius,(122,164,67),-1)                
                if i in [3,7,11,15,19]:  #第二指节
                    img = cv2.circle(img,(cx,cy),radius,(12,150,89),-1)                
                if i in [4,12,16,20]:  #指尖（除食指指尖）
                    img = cv2.circle(img,(cx,cy),radius,(223,155,60),-1)            

        scaler = 1
        img = cv2.putText(img,handness_str,(25 * scaler,100 * scaler),cv2.FONT_HERSHEY_SIMPLEX,1.25 * scaler,(255,0,0),2 * scaler)
        img = cv2.putText(img,index_finger_tip_str,(25 * scaler,150 * scaler),cv2.FONT_HERSHEY_SIMPLEX,1.25 * scaler,(255,0,255),2 * scaler)
        
        #记录该帧处理完毕的时间
        end_time = time.time()
        #计算每秒处理图像帧数FPS
        FPS = 1/(end_time - start_time)   #FPS大于30，成为实时目标检测算法
        
        #在图像上写FPS数值，参数依次为：图片、添加的文字、左上角坐标、字体、字体大小、颜色、字体粗细
        scaler = 1
        img = cv2.putText(img,'FPS '+ str(int(FPS)), (25 * scaler,50 * scaler),cv2.FONT_HERSHEY_SIMPLEX,1.25 * scaler,(255,0,0),2 * scaler)
    return img

### 视频逐帧处理（模板）

In [28]:
## 视频逐帧处理模板
## 不需修改任何代码，只需定义process_frame函数即可

def generate_video(input_path = './videos/hands.video.mp4'):
    filehead = input_path.split('/')[-1]
    output_path = 'out_' + filehead
    
    print('视频开始处理',input_path)
    
    #获取视频总帧数
    cap = cv2.VideoCapture(input_path)
    frame_count = 0
    while cap.isOpened():
        success,frame = cap.read()
        frame_count += 1
        if not success:
            break
    cap.release()
    print('视频总帧数为:',frame_count)
        
        
    #设置输出视频参数
    cap = cv2.VideoCapture(input_path)
    frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH),cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    #fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
    #fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    out = cv2.VideoWriter(output_path,fourcc,fps,(int(frame_size[0]),int(frame_size[1])))
    
    
    #进度条绑定视频总帧数
    with tqdm(total = frame_count - 1) as pbar:
        try:
            while cap.isOpened():
                success,frame = cap.read()
                if not success:
                    break
                    
                try:
                    frame = process_frame(frame)
                except:
                    print('error')
                    pass
                
                if success == True:
                    #cv2.imshow('Video Processing',frame)
                    out.write(frame)
                    
                    #进度条更新一帧
                    pbar.update(1)
                    
                #if cv2.waitKey(1) in [ord('q'),27]:    #按键盘上的q或esc退出（在英文输入法下）
                    #break
                    
        except:
                print('中途中断')
                pass
    cv2.destroyAllWindows()
    out.release()
    cap.release()
    print('视频已保存:',output_path)

In [29]:
generate_video(input_path = './videos/hands.video.mp4')

视频开始处理 ./videos/hands.video.mp4
视频总帧数为: 256


100%|████████████████████████████████████████████████████████████████████████████████| 255/255 [00:07<00:00, 31.97it/s]

视频已保存: out_hands.video.mp4



