# Importing Lib and docs

In [None]:
# Pandas and Numpy
import numpy as np
import pandas as pd

# Charting and plotting
from matplotlib import pyplot as plt
import seaborn as sns
from pylab import rcParams
rcParams['figure.figsize'] = 15,8

# Ignore Warnings
from warnings import filterwarnings
filterwarnings("ignore")

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


## Installing MediaPipe

In [None]:
# installing MediaPipe
!pip install -q mediapipe==0.10.0

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.9/33.9 MB[0m [31m60.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
hand_model_path = '/content/drive/MyDrive/Mtech_Project/Code/hand_landmarker.task'
face_model_path = '/content/drive/MyDrive/Mtech_Project/Code/face_landmarker.task'
pose_model_path = '/content/drive/MyDrive/Mtech_Project/Code/pose_landmarker.task'

In [None]:
# Import the mediapipe modules and Hand and Face Landmarker API 
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

In [None]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
mp_face = mp.solutions.face_mesh
mp_pose = mp.solutions.pose
mp_holistic = mp.solutions.holistic

In [None]:
# visualization tools
import cv2
from google.colab.patches import cv2_imshow

# Videos - Holestic, without Face detection

## Create MP Objects

In [None]:
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
FaceLandmarker = mp.tasks.vision.FaceLandmarker
FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Create a hand landmarker instance with the video mode:
options_hand = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path = hand_model_path),
    running_mode=VisionRunningMode.VIDEO)

# Create a pose landmarker instance with the video mode:
options_pose = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=pose_model_path),
    running_mode=VisionRunningMode.VIDEO)

# Create a face landmarker instance with the video mode:
options_face = FaceLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=face_model_path),
    running_mode=VisionRunningMode.VIDEO)

In [None]:
detector_face = vision.FaceLandmarker.create_from_options(options_face)
detector_hand = vision.HandLandmarker.create_from_options(options_hand)
detector_pose = vision.PoseLandmarker.create_from_options(options_pose)

## Not used

In [None]:
# For static images:
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5)

pose = mp_pose.Pose(
    static_image_mode=False,
    min_detection_confidence=0.5, 
    min_tracking_confidence=0.5)

face = mp_face.FaceMesh(
    static_image_mode=False,
    max_num_faces=1, 
    refine_landmarks=False, 
    min_detection_confidence=0.5, 
    min_tracking_confidence=0.5)


## Load Image

In [None]:
holistic = mp_holistic.Holistic(
    static_image_mode=False, 
    model_complexity=1, 
    smooth_landmarks=True, 
    enable_segmentation=False, 
    refine_face_landmarks=True, 
    min_detection_confidence=0.5, 
    min_tracking_confidence=0.5
    )

In [None]:
# Load the input Video
cap = cv2.VideoCapture('/content/drive/MyDrive/Mtech_Project/Dataset_Video/MVI_9855.MOV')
# cap = cv2.VideoCapture('Advantage_gesture.mp4')

In [None]:
fps = cap.get(cv2.CAP_PROP_FPS)
print('frames per second =',fps)

frames per second = 25.0


## Conversion

In [None]:
while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      break

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # results = hands.process(image)
    # results = detector_vid.detect(image)
    results = holistic.process(image)

    # Draw the  annotations on the image.
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    #if results.segmentation_mask:
    mp_drawing.draw_landmarks(
        image,
        results.left_hand_landmarks,
        mp_hands.HAND_CONNECTIONS,
        landmark_drawing_spec = mp_drawing_styles.get_default_hand_landmarks_style(),
        connection_drawing_spec = mp_drawing_styles.get_default_hand_connections_style()
        )
    mp_drawing.draw_landmarks(
        image,
        results.right_hand_landmarks,
        mp_hands.HAND_CONNECTIONS,
        landmark_drawing_spec = mp_drawing_styles.get_default_hand_landmarks_style(),
        connection_drawing_spec = mp_drawing_styles.get_default_hand_connections_style()
        )
    mp_drawing.draw_landmarks(
        image,
        results.pose_landmarks,
        mp_holistic.POSE_CONNECTIONS,
        landmark_drawing_spec = mp_drawing_styles.get_default_pose_landmarks_style(),
        #connection_drawing_spec = None
        )
    #mp_drawing.draw_landmarks(
    #    image,
    #    results.face_landmarks,
    #    mp_holistic.FACEMESH_CONTOURS,
    #    landmark_drawing_spec = None,
    #    connection_drawing_spec = mp_drawing_styles.get_default_face_mesh_contours_style()
    #    )
    
    # Flip the image horizontally for a selfie-view display.
    cv2_imshow(cv2.flip(image, 1))

    #if cv2.waitKey(5) & 0xFF == ord('r'):
    #  break

cap.release()
cv2.destroyAllWindows()

Output hidden; open in https://colab.research.google.com to view.

## Not used

In [None]:
while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      break

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # results = hands.process(image)
    # results = detector_vid.detect(image)
    results = holistic.process(image)
    print(results.pose_landmarks)
    mp_drawing.draw_landmarks(
        image,
        results.pose_landmarks,
        mp_holistic.POSE_CONNECTIONS,
        landmark_drawing_spec = mp_drawing_styles.get_default_pose_landmarks_style(),
        #connection_drawing_spec = None
        )

    # Flip the image horizontally for a selfie-view display.
    cv2_imshow(cv2.flip(image, 1))

    #if cv2.waitKey(5) & 0xFF == ord('r'):
    #  break
cap.release()
cv2.destroyAllWindows()

Output hidden; open in https://colab.research.google.com to view.

# Videos - Face and Pose - Not used

In [None]:
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic

# Create a hand landmarker instance with the video mode:
options_hand = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path = hand_model_path),
    running_mode=VisionRunningMode.VIDEO)

# Create a pose landmarker instance with the video mode:
options_pose = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path= pose_model_path),
    running_mode=VisionRunningMode.VIDEO)

In [None]:
detector_vid = vision.FaceLandmarker.create_from_options(options)

In [None]:
# Load the input Video
cap = cv2.VideoCapture('/content/drive/MyDrive/Mtech_Project/Dataset_Video/MVI_9855.MOV')
# cap = cv2.VideoCapture('Advantage_gesture.mp4')

In [None]:
fps = cap.get(cv2.CAP_PROP_FPS)
print('frames per second =',fps)

frames per second = 25.0


In [None]:
# For static images:
holistic = mp_holistic.Holistic(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)


while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      continue
    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = holistic.process(image)

    # Draw the landmark annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.face_landmarks:
        mp_drawing.draw_landmarks(
            image,
            results.face_landmarks,
            mp_holistic.FACEMESH_CONTOURS,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style())
        
        mp_drawing.draw_landmarks(
            image,
            results.pose_landmarks,
            mp_holistic.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
        

    # Flip the image horizontally for a selfie-view display.
    cv2_imshow(cv2.flip(image, 1))
    if cv2.waitKey(1) & 0xFF == ord('r'):
      break

cap.release()
cv2.destroyAllWindows()

# Creating Dataframe structure

In [None]:
# For videos with continuous images:
holistic = mp_holistic.Holistic(
    static_image_mode=False,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

In [None]:
hand_points = mp_holistic.HandLandmark
for point in hand_points:
  print(point)

HandLandmark.WRIST
HandLandmark.THUMB_CMC
HandLandmark.THUMB_MCP
HandLandmark.THUMB_IP
HandLandmark.THUMB_TIP
HandLandmark.INDEX_FINGER_MCP
HandLandmark.INDEX_FINGER_PIP
HandLandmark.INDEX_FINGER_DIP
HandLandmark.INDEX_FINGER_TIP
HandLandmark.MIDDLE_FINGER_MCP
HandLandmark.MIDDLE_FINGER_PIP
HandLandmark.MIDDLE_FINGER_DIP
HandLandmark.MIDDLE_FINGER_TIP
HandLandmark.RING_FINGER_MCP
HandLandmark.RING_FINGER_PIP
HandLandmark.RING_FINGER_DIP
HandLandmark.RING_FINGER_TIP
HandLandmark.PINKY_MCP
HandLandmark.PINKY_PIP
HandLandmark.PINKY_DIP
HandLandmark.PINKY_TIP


In [None]:
li_headers = []
left_hand_points = mp_holistic.HandLandmark
for point in hand_points:
  x = str(point)[13:]
  li_headers.append('LEFT_' + x + '_x')
  li_headers.append('LEFT_' + x + '_y')
  li_headers.append('LEFT_' + x + '_z')
  li_headers.append('LEFT_' + x + '_vis')
for point in hand_points:
  x = str(point)[13:]
  li_headers.append('RIGHT_' + x + '_x')
  li_headers.append('RIGHT_' + x + '_y')
  li_headers.append('RIGHT_' + x + '_z')
  li_headers.append('RIGHT_' + x + '_vis')
pose_points = mp_holistic.PoseLandmark
for point in pose_points:
  x = str(point)[13:]
  li_headers.append('POSE_' + x + '_x')
  li_headers.append('POSE_' + x + '_y')
  li_headers.append('POSE_' + x + '_z')
  li_headers.append('POSE_' + x + '_vis')

In [None]:
li_headers.insert(0,'POSE')
li_headers.insert(1,'FRAME')
li_headers.insert(2,'TS')

In [None]:
len(li_headers)

303

In [None]:
df_landmark = pd.DataFrame(columns=li_headers)

In [None]:
df_landmark

Unnamed: 0,POSE,FRAME,TS,LEFT_WRIST_x,LEFT_WRIST_y,LEFT_WRIST_z,LEFT_WRIST_vis,LEFT_THUMB_CMC_x,LEFT_THUMB_CMC_y,LEFT_THUMB_CMC_z,...,POSE_RIGHT_HEEL_z,POSE_RIGHT_HEEL_vis,POSE_LEFT_FOOT_INDEX_x,POSE_LEFT_FOOT_INDEX_y,POSE_LEFT_FOOT_INDEX_z,POSE_LEFT_FOOT_INDEX_vis,POSE_RIGHT_FOOT_INDEX_x,POSE_RIGHT_FOOT_INDEX_y,POSE_RIGHT_FOOT_INDEX_z,POSE_RIGHT_FOOT_INDEX_vis


# Extracting the pose data

In [None]:
# Load the input Video
cap = cv2.VideoCapture('/content/drive/MyDrive/Mtech_Project/Dataset_Video/MVI_9855.MOV')
# cap = cv2.VideoCapture('Advantage_gesture.mp4')

## NOT USE - Validation only

In [None]:
frame_no = 0
while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      break

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = holistic.process(image)
    landmarks = results.left_hand_landmarks.landmark

    # Load the landmarks to a dataframe
    print('Left landmark for frame',frame_no)

    left_hand_points = mp_holistic.HandLandmark
    for i,j in zip(left_hand_points, landmarks):
      print(j.x,j.y,j.z)
    #list_landmark.append(results.left_hand_landmarks) 
    #list_landmark.append(results.right_hand_landmarks) 
    #list_landmark.append(results.pose_landmarks) 
    frame_no += 1

cap.release()
cv2.destroyAllWindows()

Left landmark for frame 0
0.5631774067878723 0.8469591736793518 2.50969538484469e-08
0.549839437007904 0.8656520843505859 -0.001150613185018301
0.5428478121757507 0.890868604183197 -0.0020210205111652613
0.5405250787734985 0.9143166542053223 -0.0028105750679969788
0.5393763184547424 0.93081134557724 -0.0030845783185213804
0.5531207323074341 0.9141701459884644 -0.0022379769943654537
0.5487013459205627 0.9444729685783386 -0.004369852598756552
0.5432438254356384 0.9476161599159241 -0.00540073961019516
0.5398918986320496 0.942460298538208 -0.005369588732719421
0.5621703863143921 0.9148458242416382 -0.002091221045702696
0.5575021505355835 0.9453299641609192 -0.002875594887882471
0.5513681769371033 0.9462419152259827 -0.0028240554966032505
0.5478171110153198 0.9396279454231262 -0.002684203675016761
0.568996250629425 0.9113269448280334 -0.001957901753485203
0.565208911895752 0.9379764795303345 -0.001527378219179809
0.5591727495193481 0.938581109046936 -0.00026650942163541913
0.555361270904541

In [None]:
frame_no = 0
while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      break

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = holistic.process(image)
    landmarks = results.right_hand_landmarks.landmark

    # Load the landmarks to a dataframe
    print('Left landmark for frame',frame_no)

    left_hand_points = mp_holistic.HandLandmark
    for i,j in zip(left_hand_points, landmarks):
      print(j.x,j.y,j.z)
    #list_landmark.append(results.left_hand_landmarks) 
    #list_landmark.append(results.right_hand_landmarks) 
    #list_landmark.append(results.pose_landmarks) 
    frame_no += 1

cap.release()
cv2.destroyAllWindows()

AttributeError: ignored

## Updating dataframe

In [None]:
frame_no = 0
while cap.isOpened(): 
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      break

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = holistic.process(image)

    frame_li = []
    frame_li.append('Pose1')
    frame_li.append(frame_no)
    frame_li.append(str(cap.get(cv2.CAP_PROP_POS_MSEC)))
    
    # Load the landmarks to a dataframe

    hand_points = mp_holistic.HandLandmark
    if results.left_hand_landmarks:
      left_landmarks = results.left_hand_landmarks.landmark
      for i,j in zip(hand_points, left_landmarks):
        frame_li.append(j.x)
        frame_li.append(j.y)
        frame_li.append(j.z)
        frame_li.append(j.visibility)
    else:
      for i in hand_points:
        frame_li.append(0)
        frame_li.append(0)
        frame_li.append(0)
        frame_li.append(0)
    
    if results.right_hand_landmarks :
      right_landmarks = results.right_hand_landmarks.landmark
      for i,j in zip(hand_points, right_landmarks):
        frame_li.append(j.x)
        frame_li.append(j.y)
        frame_li.append(j.z)
        frame_li.append(j.visibility)
    else:
      for i in hand_points:
        frame_li.append(0)
        frame_li.append(0)
        frame_li.append(0)
        frame_li.append(0)

    pose_points = mp_holistic.PoseLandmark
    if results.pose_landmarks:
      pose_landmarks = results.pose_landmarks.landmark
      for i,j in zip(pose_points, pose_landmarks):
        frame_li.append(j.x)
        frame_li.append(j.y)
        frame_li.append(j.z)
        frame_li.append(j.visibility)
    else:
      for i in pose_points:
        frame_li.append(0)
        frame_li.append(0)
        frame_li.append(0)
        frame_li.append(0)
    #list_landmark.append(results.left_hand_landmarks) 
    #list_landmark.append(results.right_hand_landmarks) 
    #list_landmark.append(results.pose_landmarks) 
    frame_no += 1
    df_landmark.loc[len(df_landmark)] = frame_li

cap.release()
cv2.destroyAllWindows()

Ignoring empty camera frame.


In [None]:
len(frame_li)

303

In [None]:
df_landmark.head()

Unnamed: 0,POSE,FRAME,TS,LEFT_WRIST_x,LEFT_WRIST_y,LEFT_WRIST_z,LEFT_WRIST_vis,LEFT_THUMB_CMC_x,LEFT_THUMB_CMC_y,LEFT_THUMB_CMC_z,...,POSE_RIGHT_HEEL_z,POSE_RIGHT_HEEL_vis,POSE_LEFT_FOOT_INDEX_x,POSE_LEFT_FOOT_INDEX_y,POSE_LEFT_FOOT_INDEX_z,POSE_LEFT_FOOT_INDEX_vis,POSE_RIGHT_FOOT_INDEX_x,POSE_RIGHT_FOOT_INDEX_y,POSE_RIGHT_FOOT_INDEX_z,POSE_RIGHT_FOOT_INDEX_vis
0,Pose1,0,0.0,0.563406,0.853281,5.745335e-08,0.0,0.549474,0.867098,-0.001324,...,0.221925,0.066953,0.518232,1.385464,0.156647,0.057172,0.452479,1.376694,0.030312,0.06158
1,Pose1,1,40.0,0.563565,0.852163,5.860454e-08,0.0,0.549964,0.866835,-0.001807,...,0.219452,0.064969,0.518285,1.392965,0.160012,0.056635,0.452505,1.383184,0.024569,0.059586
2,Pose1,2,80.0,0.564077,0.853164,5.997504e-08,0.0,0.549865,0.866497,-0.00218,...,0.21983,0.06521,0.517627,1.392435,0.162183,0.057387,0.452151,1.382686,0.025703,0.059775
3,Pose1,3,120.0,0.564347,0.852983,6.545045e-08,0.0,0.550394,0.866365,-0.002463,...,0.22812,0.065242,0.517412,1.391068,0.161473,0.057118,0.451844,1.381673,0.031206,0.059265
4,Pose1,4,160.0,0.564635,0.852615,7.277987e-08,0.0,0.551099,0.865584,-0.0023,...,0.18521,0.064941,0.517016,1.378976,0.12818,0.056529,0.4518,1.368085,-0.01329,0.059577


# Testing 

In [None]:
type(list_landmark[3])

mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList

In [None]:
temp = list_landmark[3]
type(temp)

mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList

In [None]:
for temp in list_landmark:
  for temp2 in temp:
    print('hand',temp2)

TypeError: ignored