# This notebook will be used to retrieve all handposes from all dataset videos. 

In [119]:
import mediapipe as mp
import cv2
import numpy as np
import uuid
import os
from tqdm import tqdm

In [3]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

## Open a video

In [16]:
cap = cv2.VideoCapture("dataset/videos/00335.mp4")
while(cap.isOpened()):
    ret, frame = cap.read()
    if ret:
        cv2.imshow('frame',frame)
    else:
        break
    if cv2.waitKey(20) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

## Detect handpose one a single video

In [79]:
hand_position = []
cap = cv2.VideoCapture("dataset/videos/00335.mp4")
with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands: 
    while cap.isOpened():
        ret, frame = cap.read()
        
        if ret:

            # Flip on horizontal
            image = cv2.flip(frame, 1)

            # Set flag
            image.flags.writeable = False

            # Detections
            results = hands.process(image)

            # Set flag to true
            image.flags.writeable = True

            # Rendering results
            if results.multi_hand_landmarks:
                temp_hand_position = []
                for num, hand in enumerate(results.multi_hand_landmarks):
                    mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, 
                                            mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                            mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                                             )
                    temp_hand_position.append(hand)
                hand_position.append(temp_hand_position[0])

            # Save our image    
            cv2.imshow('Hand Tracking', image)
        else :
            break

        if cv2.waitKey(20) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


### get the first coordinate from the first recognized hand placement. Each tracked hand is composed of 21 reference points characterized by 3 coordinates: x, y (2D coordinates) and z (depth).

In [80]:
hand_position[0].landmark[0]

x: 0.6498881578445435
y: 0.9029449820518494
z: 3.3803087262640474e-07

In [82]:
hand_position[0].landmark[0].x

0.6498881578445435

In [111]:
def get_coordinate_from_landmark(landmark):
    return np.array([[landmark.x, landmark.y, landmark.z]])

def get_positions_from_track(hand):
    
    all_positions = np.concatenate([get_coordinate_from_landmark(landmark) for landmark in hand.landmark], axis = 0)
    return all_positions

def get_all_tracked_hand(finename_video):
    
    hand_position = []
    cap = cv2.VideoCapture(finename_video)
    with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands: 
        while cap.isOpened():
            ret, frame = cap.read()

            if ret:

                # Flip on horizontal
                image = cv2.flip(frame, 1)

                # Set flag
                image.flags.writeable = False

                # Detections
                results = hands.process(image)
                
                if results.multi_hand_landmarks:
                    # Hands has been recognized
                    hand_position.append(get_positions_from_track(results.multi_hand_landmarks[0]))

            else :
                break

    cap.release()
    hand_position = np.array(hand_position)
    return hand_position

In [114]:
single_track = get_all_tracked_hand("dataset/videos/00335.mp4")

In [115]:
single_track.shape

(22, 21, 3)

In [116]:
single_track[0]

array([[ 6.49888158e-01,  9.02944982e-01,  3.38030873e-07],
       [ 6.28637075e-01,  8.64731610e-01, -1.25175426e-02],
       [ 6.01684928e-01,  8.39013338e-01, -2.33906657e-02],
       [ 5.76628625e-01,  8.22816014e-01, -3.21975984e-02],
       [ 5.59654772e-01,  8.05503666e-01, -4.15830463e-02],
       [ 5.79283714e-01,  8.80465508e-01, -3.07945199e-02],
       [ 5.35873353e-01,  8.83060515e-01, -4.49427404e-02],
       [ 5.08747935e-01,  8.84980261e-01, -5.30005805e-02],
       [ 4.86826181e-01,  8.87657225e-01, -5.76398112e-02],
       [ 5.80925167e-01,  9.12913680e-01, -3.12421340e-02],
       [ 5.34501970e-01,  9.13887620e-01, -4.38396782e-02],
       [ 5.02465010e-01,  9.14558411e-01, -5.05705252e-02],
       [ 4.76479590e-01,  9.16072905e-01, -5.51613569e-02],
       [ 5.86200535e-01,  9.41723466e-01, -3.16360258e-02],
       [ 5.43561637e-01,  9.43803966e-01, -4.22130413e-02],
       [ 5.15513062e-01,  9.43563044e-01, -4.82520759e-02],
       [ 4.94611114e-01,  9.42920208e-01

In [128]:
path_to_videos = os.getcwd()+'/dataset/videos'

In [130]:
hands_tracked = {}
for filename in tqdm(os.listdir(path_to_videos)):
    hands_tracked[filename] = get_all_tracked_hand(path_to_videos+'/'+filename)

  0%|▎                                                                           | 45/11980 [02:21<10:26:51,  3.15s/it]


KeyboardInterrupt: 