# 使用Mediapipe
> 由Google Research 所開發的多媒體機器學習模型應用框架，透過 MediaPipe 可以簡單地實現手部追蹤、人臉檢測或物體檢測等功能
### MediaPipe的AI功能
請進入[MediaPipe Studio](https://mediapipe-studio.webapps.google.com/home)可看到MediaPipe提供可直接使用的AI功能

使用 Python 語言進行開發，MediaPipe 支援下列幾種辨識模型
* MediaPipe Face Detection ( 人臉追蹤 )
* MediaPipe Face Mesh ( 人臉網格 )
* MediaPipe Hands ( 手掌偵測 )
* MediaPipe Holistic ( 全身偵測 )
* MediaPipe Pose ( 姿勢偵測 )
* MediaPipe Objectron ( 物體偵測 )
* MediaPipe Selfie Segmentation ( 人物去背 )

ref: https://hackmd.io/@am534143/r1pch8Y1p

# MediaPipe-Pose提取骨架做角度運算
以使用Pose(姿勢預測)為例，則會顯示出33個點，其點Index如下圖：
![]("data/pose1.png")

In [1]:
import cv2
import numpy as np 
import mediapipe as mp

In [1]:
mp_drawing = mp.solutions.drawing_utils         
mp_drawing_styles = mp.solutions.drawing_styles 
mp_holistic = mp.solutions.holistic 

cap = cv2.VideoCapture(0)

'''
with語句中
# Holistic模型的初始化
使用mp_holistic.Holistic方法創建一個Holistic物件
這個物件用於後續的人體姿態檢測和追踪

# 設置信心閾值：
min_detection_confidence設置為0.5
表示只有當模型對檢測結果的信心度高於或等於50%時，該檢測結果才被認為是有效的
min_tracking_confidence設置為0.5
表示只有當模型對追踪到的姿態的信心度高於或等於50%時，該追踪才持續進行
'''
with mp_holistic.Holistic(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as holistic:

    if not cap.isOpened():
        print("Cannot open camera")
        exit()  
    while True: 
        ret, img = cap.read()
        if not ret:
            print("Cannot receive frame")
            break
        # 開始讀檔進行識別    
        img = cv2.resize(img,(640,480))               
        img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  

        # 處理影像以檢測全身姿態
        results = holistic.process(img2)
        # 使用MediaPipe的繪圖功能將姿態關節點繪製到圖像上
        mp_drawing.draw_landmarks(
            img,
            results.pose_landmarks,
            mp_holistic.POSE_CONNECTIONS, # 指定關節點之間的連接線，用於繪製姿態的連接結構。
            landmark_drawing_spec=mp_drawing_styles
            .get_default_pose_landmarks_style()) # 指定繪製關節點和連接線的樣式（顏色，厚度等）
        cv2.imshow('warmup', img) 
        if cv2.waitKey(5) & 0xFF == 27:
            break
cap.release()           
cv2.destroyAllWindows() 

Cannot receive frame


# Mediapipe 手掌特徵點偵測
Mediapipe 偵測手掌後，會在手掌與手指上產生 21 個具有 x、y、z 座標的節點<br>
透過包含立體深度的節點，就能在 3D 場景中做出多種不同的應用，下圖標示出每個節點的順序和位置
![](data/hand.png)


### 設定值(Configuration Options)
```
1. STATIC_IMAGE_MODE
設置為false (default)適合影片、攝像頭做為input；設置為true 適合一整批靜態資料，如圖片。

2. MAX_NUM_HANDS
可以一次檢測幾個手的最大數量，預設為2。

3. MIN_DETECTION_CONFIDENCE
model產出的最小信度檢測為成功門檻，也就是threshold。Range為[0.0, 1.0]，預設為0.5 。

4. MIN_TRACKING_CONFIDENCE
Landmark(手的座標)追蹤model產出的最小信度成功門檻，成功則會在下一張圖像，取用前一圖像，作為手部檢測參考。Range為[0.0, 1.0]，預設為0.5 。

TRACKING_CONFIDENCE是為了讓手指在移動中，也能透過前一個手的座標進行判斷追縱，例如部分手指已不在畫面中(鏡頭拍不到的位置)，但mediapipe的Hands還是會持續辨識。
```
### 輸出(Output)
1. `MULTI_HAND_LANDMARKS`<br>
每一隻手都會有21個Landmark，其座標包含x, y, z。<br>
x和y分別[0.0, 1.0]通過圖像的寬度和高度進行歸一化。<br>
z為深度，以手腕作原點計算深度，值越小，Landmark就越靠近相機。<br>
2. `MULTI_HANDEDNESS`<br>
判斷為左手或右手，每隻手資料包含`label`和`score`。
    * label用來表示 "Left" 或"Right" 。
    * score是手的慣性估計概率。

In [None]:
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# For static images:
IMAGE_FILES = []
with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=2,
    min_detection_confidence=0.5) as hands:
  for idx, file in enumerate(IMAGE_FILES):
    # Read an image, flip it around y-axis for correct handedness output (see
    # above).
    image = cv2.flip(cv2.imread(file), 1)
    # Convert the BGR image to RGB before processing.
    results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # Print handedness and draw hand landmarks on the image.
    print('Handedness:', results.multi_handedness)
    if not results.multi_hand_landmarks:
      continue
    image_height, image_width, _ = image.shape
    annotated_image = image.copy()
    for hand_landmarks in results.multi_hand_landmarks:
      print('hand_landmarks:', hand_landmarks)
      print(
          f'Index finger tip coordinates: (',
          f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
          f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
      )
      mp_drawing.draw_landmarks(
          annotated_image,
          hand_landmarks,
          mp_hands.HAND_CONNECTIONS,
          mp_drawing_styles.get_default_hand_landmarks_style(),
          mp_drawing_styles.get_default_hand_connections_style())
    cv2.imwrite(
        '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
    # Draw hand world landmarks.
    if not results.multi_hand_world_landmarks:
      continue
    for hand_world_landmarks in results.multi_hand_world_landmarks:
      mp_drawing.plot_landmarks(
        hand_world_landmarks, mp_hands.HAND_CONNECTIONS, azimuth=5)

# For webcam input:
cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    model_complexity=0,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      continue

    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image)

    # Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
    # Flip the image horizontally for a selfie-view display.
    cv2.imshow('MediaPipe Hands', cv2.flip(image, 1))
    if cv2.waitKey(5) & 0xFF == 27:
      break
cap.release()
cv2.destroyAllWindows()

# 手勢辨識 
```
啟用手掌偵測並將偵測到的節點座標，帶入 hand_angle 函式
將計算出的角度串列帶入 hand_pos 求出目前的手勢。
如果偵測到手指的角度小於 50 度，表示手指伸直，大於等於 50 度表示手指捲縮
可使用 print 先印出結果，再根據結果調整角度範圍。
```
參考 https://steam.oxxostudio.tw/category/python/ai/ai-mediapipe-gesture.html

In [None]:
import cv2
import mediapipe as mp
import math

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# 根據兩點的座標，計算角度
def vector_2d_angle(v1, v2):
    v1_x = v1[0]
    v1_y = v1[1]
    v2_x = v2[0]
    v2_y = v2[1]
    try:
        angle_= math.degrees(math.acos((v1_x*v2_x+v1_y*v2_y)/(((v1_x**2+v1_y**2)**0.5)*((v2_x**2+v2_y**2)**0.5))))
    except:
        angle_ = 180
    return angle_

# 根據傳入的 21 個節點座標，得到該手指的角度
def hand_angle(hand_):
    angle_list = []
    # thumb 大拇指角度
    angle_ = vector_2d_angle(
        ((int(hand_[0][0])- int(hand_[2][0])),(int(hand_[0][1])-int(hand_[2][1]))),
        ((int(hand_[3][0])- int(hand_[4][0])),(int(hand_[3][1])- int(hand_[4][1])))
        )
    angle_list.append(angle_)
    # index 食指角度
    angle_ = vector_2d_angle(
        ((int(hand_[0][0])-int(hand_[6][0])),(int(hand_[0][1])- int(hand_[6][1]))),
        ((int(hand_[7][0])- int(hand_[8][0])),(int(hand_[7][1])- int(hand_[8][1])))
        )
    angle_list.append(angle_)
    # middle 中指角度
    angle_ = vector_2d_angle(
        ((int(hand_[0][0])- int(hand_[10][0])),(int(hand_[0][1])- int(hand_[10][1]))),
        ((int(hand_[11][0])- int(hand_[12][0])),(int(hand_[11][1])- int(hand_[12][1])))
        )
    angle_list.append(angle_)
    # ring 無名指角度
    angle_ = vector_2d_angle(
        ((int(hand_[0][0])- int(hand_[14][0])),(int(hand_[0][1])- int(hand_[14][1]))),
        ((int(hand_[15][0])- int(hand_[16][0])),(int(hand_[15][1])- int(hand_[16][1])))
        )
    angle_list.append(angle_)
    # pink 小拇指角度
    angle_ = vector_2d_angle(
        ((int(hand_[0][0])- int(hand_[18][0])),(int(hand_[0][1])- int(hand_[18][1]))),
        ((int(hand_[19][0])- int(hand_[20][0])),(int(hand_[19][1])- int(hand_[20][1])))
        )
    angle_list.append(angle_)
    return angle_list

# 根據手指角度的串列內容，返回對應的手勢名稱
def hand_pos(finger_angle):
    f1 = finger_angle[0]   # 大拇指角度
    f2 = finger_angle[1]   # 食指角度
    f3 = finger_angle[2]   # 中指角度
    f4 = finger_angle[3]   # 無名指角度
    f5 = finger_angle[4]   # 小拇指角度

    # 小於 50 表示手指伸直，大於等於 50 表示手指捲縮
    if f1<50 and f2>=50 and f3>=50 and f4>=50 and f5>=50:
        return 'good'
    elif f1>=50 and f2>=50 and f3<50 and f4>=50 and f5>=50:
        return 'no!!!'
    elif f1<50 and f2<50 and f3>=50 and f4>=50 and f5<50:
        return 'ROCK!'
    elif f1>=50 and f2>=50 and f3>=50 and f4>=50 and f5>=50:
        return '0'
    elif f1>=50 and f2>=50 and f3>=50 and f4>=50 and f5<50:
        return 'pink'
    elif f1>=50 and f2<50 and f3>=50 and f4>=50 and f5>=50:
        return '1'
    elif f1>=50 and f2<50 and f3<50 and f4>=50 and f5>=50:
        return '2'
    elif f1>=50 and f2>=50 and f3<50 and f4<50 and f5<50:
        return 'ok'
    elif f1<50 and f2>=50 and f3<50 and f4<50 and f5<50:
        return 'ok'
    elif f1>=50 and f2<50 and f3<50 and f4<50 and f5>50:
        return '3'
    elif f1>=50 and f2<50 and f3<50 and f4<50 and f5<50:
        return '4'
    elif f1<50 and f2<50 and f3<50 and f4<50 and f5<50:
        return '5'
    elif f1<50 and f2>=50 and f3>=50 and f4>=50 and f5<50:
        return '6'
    elif f1<50 and f2<50 and f3>=50 and f4>=50 and f5>=50:
        return '7'
    elif f1<50 and f2<50 and f3<50 and f4>=50 and f5>=50:
        return '8'
    elif f1<50 and f2<50 and f3<50 and f4<50 and f5>=50:
        return '9'
    else:
        return ''

cap = cv2.VideoCapture(0)            # 讀取攝影機
fontFace = cv2.FONT_HERSHEY_SIMPLEX  # 印出文字的字型
lineType = cv2.LINE_AA               # 印出文字的邊框

# mediapipe 啟用偵測手掌
with mp_hands.Hands(
    model_complexity=0,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:

    if not cap.isOpened():
        print("Cannot open camera")
        exit()
    w, h = 640, 480                                  # 影像尺寸
    while True:
        ret, img = cap.read()
        img = cv2.resize(img, (w,h))                 # 縮小尺寸，加快處理效率
        if not ret:
            print("Cannot receive frame")
            break
        img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # 轉換成 RGB 色彩
        results = hands.process(img2)                # 偵測手勢
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                finger_points = []                   # 記錄手指節點座標的串列
                for i in hand_landmarks.landmark:
                    # 將 21 個節點換算成座標，記錄到 finger_points
                    x = i.x*w
                    y = i.y*h
                    finger_points.append((x,y))
                if finger_points:
                    finger_angle = hand_angle(finger_points) # 計算手指角度，回傳長度為 5 的串列
                    #print(finger_angle)                     # 印出角度 ( 有需要就開啟註解 )
                    text = hand_pos(finger_angle)            # 取得手勢所回傳的內容
                    cv2.putText(img, text, (30,120), fontFace, 5, (200,100,10), 10, lineType) # 印出文字

        cv2.imshow('oxxostudio', img)
        if cv2.waitKey(5) == ord('q'):
            break
cap.release()
cv2.destroyAllWindows()