In [1]:
import cv2 as cv
import mediapipe as mp
import pandas as pd

In [2]:
def landmark_bbox(hand_landmarks, height, width) -> float :
    x_min, y_min = width, height
    x_max, y_max = 0, 0

    for landmark in hand_landmarks.landmark:
        x, y = landmark.x * width, landmark.y * height

        x_min = min(x_min, x)
        y_min = min(y_min, y)
        x_max = max(x_max, x)
        y_max = max(y_max, y)
    return x_min, y_min, x_max, y_max

In [3]:
def normalized_landmarks(hand_landmarks, bbox, height, width) -> float:
    x_min, y_min, x_max, y_max = bbox
    bbox_W, bbox_H = (x_max - x_min), (y_max - y_min)
    normalized_landmarks = []
    for landmark in hand_landmarks.landmark:
        x = (landmark.x * width - x_min) / bbox_W
        y = (landmark.y * height - y_min) / bbox_H
        normalized_landmarks.append((x, y))

    return normalized_landmarks

In [4]:
def save_landmarks(landmarks):
    df = pd.DataFrame()
    for landmark in landmarks:
        df_dic = {}
        for i in range(21):
            for j in [(0, 'x'), (1, 'y')]:
                column_name = 'landmark_' + str(i) + '_' + j[1]
                df_dic[column_name] = landmark[i][j[0]]
                
        temp_df = pd.DataFrame(df_dic, index=[0])
        df = pd.concat([df,temp_df], ignore_index=True, axis=0)
    return df

In [5]:
def collect(PATH):
    video = cv.VideoCapture(PATH)
    total_frames = int(video.get(cv.CAP_PROP_FRAME_COUNT))
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)
    
    Trail = []
    posotions = []
    i = 0
    while(video.isOpened()):
        ret, frame = video.read()
        if ret == True:
            frame_rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
            results = hands.process(frame_rgb)
            H, W, _ = frame.shape
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    bbox = landmark_bbox(hand_landmarks, H, W)
                    posotion = normalized_landmarks(hand_landmarks, bbox, H, W)
                    posotions.append(posotion)
            if cv.waitKey(25) & 0xFF == ord('q'):
                  break
            if(i%100 == 0):
                print(f'frame : {i} / {total_frames}')
            i += 1 
        else:
            break
    print(f'frame : {i} / {total_frames}')
    video.release()
    cv.destroyAllWindows()
    return posotions

In [6]:
PATH_NAMES = (
    ('data/videos/index_finger_up.mp4', 'data/dataframes/index_finger_up.csv'),
    ('data/videos/clenched_fist.mp4', 'data/dataframes/clenched_fist.csv'),
    ('data/videos/open_palm.mp4', 'data/dataframes/open_palm.csv'),
)
for path_name in PATH_NAMES:
    print('-'*10, path_name[0], '-'*10)
    posotions = collect(path_name[0])
    df = save_landmarks(posotions)
    df.to_csv(path_name[1], index=False)

---------- data/videos/index_finger_up.mp4 ----------
frame : 0 / 1186
frame : 100 / 1186
frame : 200 / 1186
frame : 300 / 1186
frame : 400 / 1186
frame : 500 / 1186
frame : 600 / 1186
frame : 700 / 1186
frame : 800 / 1186
frame : 900 / 1186
frame : 1000 / 1186
frame : 1100 / 1186
frame : 1186 / 1186
---------- data/videos/clenched_fist.mp4 ----------
frame : 0 / 451
frame : 100 / 451
frame : 200 / 451
frame : 300 / 451
frame : 400 / 451
frame : 451 / 451
---------- data/videos/open_palm.mp4 ----------
frame : 0 / 609
frame : 100 / 609
frame : 200 / 609
frame : 300 / 609
frame : 400 / 609
frame : 500 / 609
frame : 600 / 609
frame : 609 / 609
