In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [21]:
import shutil

def delete_extra_folders():
    for root, dirname, filenames in os.walk('/kaggle/input'):
        for dir in dirname:
            if (dir=='extra' or dir=='Extra'):
                file_path = os.path.join(root,dir)
                if os.path.exists(file_path):
                    print(file_path)
                    shutil.rmtree(file_path)  
#delete_extra_folders()

In [6]:
pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.9.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (33.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.0/33.0 MB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: mediapipe
Successfully installed mediapipe-0.9.0.1
[0mNote: you may need to restart the kernel to use updated packages.


In [26]:
import os
import json
import multiprocessing
import argparse
import os.path
import cv2
import mediapipe as mp
from tqdm.auto import tqdm
from joblib import Parallel, delayed
import numpy as np
import gc
import warnings

def process_landmarks(landmarks):
    x_list, y_list = [], []
    if landmarks is not None:
        for landmark in landmarks.landmark:
            x_list.append(landmark.x)
            y_list.append(landmark.y)
    return x_list, y_list


def process_hand_keypoints(results):
    hand1_x, hand1_y, hand2_x, hand2_y = [], [], [], []

    if results.multi_hand_landmarks is not None:
        if len(results.multi_hand_landmarks) > 0:
            hand1 = results.multi_hand_landmarks[0]
            hand1_x, hand1_y = process_landmarks(hand1)

        if len(results.multi_hand_landmarks) > 1:
            hand2 = results.multi_hand_landmarks[1]
            hand2_x, hand2_y = process_landmarks(hand2)

    return hand1_x, hand1_y, hand2_x, hand2_y


def process_pose_keypoints(results):
    pose = results.pose_landmarks
    pose_x, pose_y = process_landmarks(pose)
    return pose_x, pose_y


def swap_hands(pose_x, pose_y, hand, input_hand):
    left_wrist_x, left_wrist_y = np.nan, np.nan
    right_wrist_x, right_wrist_y = np.nan, np.nan

    if len(pose_x) >= 17 and len(pose_y) >= 17:
        left_wrist_x, left_wrist_y = pose_x[15], pose_y[15]
        right_wrist_x, right_wrist_y = pose_x[16], pose_y[16]
        
    hand_x, hand_y = hand

    left_dist = (left_wrist_x - hand_x) ** 2 + (left_wrist_y - hand_y) ** 2
    right_dist = (right_wrist_x - hand_x) ** 2 + (right_wrist_y - hand_y) ** 2

    if left_dist < right_dist and input_hand == "h2":
        return True

    if right_dist < left_dist and input_hand == "h1":
        return True

    return False


def process_video(path, save_dir):
    hands = mp.solutions.hands.Hands(
        min_detection_confidence=0.5, min_tracking_confidence=0.5
    )
    pose = mp.solutions.pose.Pose(
        min_detection_confidence=0.5, min_tracking_confidence=0.5#, upper_body_only=True
    )

    pose_points_x, pose_points_y = [], []
    hand1_points_x, hand1_points_y = [], []
    hand2_points_x, hand2_points_y = [], []
    
    
    # skip processing if 'extra' or 'Extra' is present in path
#     if 'extra' in path or 'Extra' in path:
#         print(f"Skipping {path}")
#         return
    label = path.split("/")[-2]
    label = "".join([i for i in label if i.isalpha()]).lower()
    uid = os.path.splitext(os.path.basename(path))[0]
    uid = "_".join([label, uid])
    n_frames = 0
    if not os.path.isfile(path):
        warnings.warn(path + " file not found")
    cap = cv2.VideoCapture(path)
    while cap.isOpened():
        ret, image = cap.read()
        if not ret:
            break
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        hand_results = hands.process(image)
        pose_results = pose.process(image)

        hand1_x, hand1_y, hand2_x, hand2_y = process_hand_keypoints(hand_results)
        pose_x, pose_y = process_pose_keypoints(pose_results)
        
        ## Assign hands to correct positions
        if len(hand1_x) > 0 and len(hand2_x) == 0:
            if swap_hands(
                 pose_x,
                 pose_y,
                 hand=(hand1_x[0], hand1_y[0]),
                 input_hand="h1",
                ):
                hand1_x, hand1_y, hand2_x, hand2_y = hand2_x, hand2_y, hand1_x, hand1_y

        elif len(hand1_x) == 0 and len(hand2_x) > 0:
            if swap_hands(
                 pose_x,
                 pose_y,
                 hand=(hand2_x[0], hand2_y[0]),
                 input_hand="h2",
             ):
                hand1_x, hand1_y, hand2_x, hand2_y = hand2_x, hand2_y, hand1_x, hand1_y

         ## Set to nan so that values can be interpolated in dataloader
        pose_x = pose_x if pose_x else [np.nan] * 25
        pose_y = pose_y if pose_y else [np.nan] * 25

        hand1_x = hand1_x if hand1_x else [np.nan] * 21
        hand1_y = hand1_y if hand1_y else [np.nan] * 21
        hand2_x = hand2_x if hand2_x else [np.nan] * 21
        hand2_y = hand2_y if hand2_y else [np.nan] * 21

        pose_points_x.append(pose_x)
        pose_points_y.append(pose_y)
        hand1_points_x.append(hand1_x)
        hand1_points_y.append(hand1_y)
        hand2_points_x.append(hand2_x)
        hand2_points_y.append(hand2_y)

        n_frames += 1

    cap.release()

    ## Set to nan so that values can be interpolated in dataloader
    pose_points_x = pose_points_x if pose_points_x else [[np.nan] * 25]
    pose_points_y = pose_points_y if pose_points_y else [[np.nan] * 25]

    hand1_points_x = hand1_points_x if hand1_points_x else [[np.nan] * 21]
    hand1_points_y = hand1_points_y if hand1_points_y else [[np.nan] * 21]
    hand2_points_x = hand2_points_x if hand2_points_x else [[np.nan] * 21]
    hand2_points_y = hand2_points_y if hand2_points_y else [[np.nan] * 21]

    save_data = {
        "uid": uid,
        "label": label,
        "pose_x": pose_points_x,
        "pose_y": pose_points_y,
        "hand1_x": hand1_points_x,
        "hand1_y": hand1_points_y,
        "hand2_x": hand2_points_x,
        "hand2_y": hand2_points_y,
        "n_frames": n_frames,
    }
    with open(os.path.join(save_dir, f"{uid}.json"), "w") as f:
        json.dump(save_data, f)

    hands.close()
    pose.close()
    del hands, pose, save_data
    gc.collect()

def correct_paths(paths):
    """
    If the video is not in include 50m then it can be in include-50-2, so update path to check there.
    And also check if MOV or MP4 exension works
    """
    n = len(paths)
    for i in range(n):
        if not os.path.exists(paths[i]):
            new_path = paths[i].replace('MOV','MP4')
            if(os.path.exists(new_path)):
                paths[i] = new_path
            else:
                paths[i] = paths[i].replace('include-50','include-50-2')
                if(not os.path.exists(paths[i])):
                    paths[i] = paths[i].replace('MOV','MP4')
               
    return paths

def load_file(path, include_dir):
    with open(path, "r") as fp:
        data = fp.read()
        data = data.split("\n")
    data = list(map(lambda x: os.path.join(include_dir, x), data))
    data = correct_paths(data)
    return data


def load_train_test_val_paths(args):
    train_paths = load_file(
        f"/kaggle/input/train-test-paths/train_test_paths/{args['dataset']}_train.txt", args['include_dir']
    )
    val_paths = load_file(f"/kaggle/input/train-test-paths/train_test_paths/{args['dataset']}_val.txt", args['include_dir'])
    test_paths = load_file(
        f"/kaggle/input/train-test-paths/train_test_paths/{args['dataset']}_test.txt", args['include_dir']
    )
    return train_paths, val_paths, test_paths

def save_keypoints(dataset, file_paths, mode, args):
    save_dir = os.path.join(args['save_direc'], f"{dataset}_keypoints")
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    
    Parallel(n_jobs=n_cores, backend="multiprocessing")(
        delayed(process_video)(path, save_dir)
        for path in tqdm(file_paths, desc=f"processing videos")
    )
#689 train
#77 val
#192 test

args = {
    'include_dir':'/kaggle/input/include-50/',
    #/kaggle/input/include-50-2/
    'save_direc':'/kaggle/working/keypoints1/',
    'dataset':'include50'
}

n_cores = multiprocessing.cpu_count()
train_paths, val_paths, test_paths = load_train_test_val_paths(args)


# args['save_direc']='/kaggle/working/val_keypoints/'
# save_keypoints('include50', val_paths, "val", args)
args['save_direc']='/kaggle/working/test_keypoints/'
save_keypoints('include50', test_paths, "test", args)

args['save_direc']='/kaggle/working/train_keypoints/'
save_keypoints('include50', train_paths, "train", args)

processing videos:   0%|          | 0/192 [00:00<?, ?it/s]

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


processing videos:   0%|          | 0/689 [00:00<?, ?it/s]

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [27]:
#Execute only this box vee
import pandas as pd
import json
import glob

def convert_json_to_dataframe(file_paths):
    data = []
    for file_path in file_paths:
        with open(file_path, 'r') as file:
            json_data = json.load(file)
            data.append(json_data)
    
    df = pd.DataFrame(data, columns=["uid", "label", "pose_x", "pose_y", "hand1_x", "hand1_y", "hand2_x", "hand2_y", "n_frames"])
    return df

# Example usage
val_file_paths = glob.glob("/kaggle/working/val_keypoints/include50_keypoints/*.json")
test_file_paths = glob.glob("/kaggle/working/test_keypoints/include50_keypoints/*.json")
train_file_paths = glob.glob("/kaggle/working/train_keypoints/include50_keypoints/*.json")

val_df = convert_json_to_dataframe(val_file_paths)
test_df = convert_json_to_dataframe(test_file_paths)
train_df = convert_json_to_dataframe(train_file_paths)

#convert to csvs
val_df.to_csv('val.csv')
test_df.to_csv('test.csv')
train_df.to_csv('train.csv')

Vee, u can ignore below this

In [73]:
args = {'data_dir':'ls',
       'dataset':'include50',
       'use_augs': False,
       'save_dir': '/kaggle/working/'}

In [68]:
import pandas as pd
import numpy as np


class Augmentation:
    def __init__(self, aug_func, p=1):
        self.aug_func = aug_func
        self.p = p

    def __call__(self, df):
        if np.random.rand() <= self.p:
            return self.aug_func(df)
        return df


def OneOf(aug_a, aug_b):
    if np.random.rand() < 0.5:
        return aug_a
    return aug_b


def plus7rotation(df):
    # +7 degree rotation
    df_augmented = pd.DataFrame()
    df_augmented["uid"] = df["uid"]
    df_augmented["pose"] = ""
    df_augmented["hand1"] = ""
    df_augmented["hand2"] = ""
    df_augmented["label"] = df["label"]

    theta = 7 * (np.pi / 180)
    c, s = np.cos(theta), np.sin(theta)
    rotation_matrix = np.array([[c, -s], [s, c]])

    for i in range(df.shape[0]):
        for col in ["pose", "hand1", "hand2"]:
            matrix = np.array(df.loc[i, col], dtype=np.float)
            matrix = np.matmul(matrix, rotation_matrix)
            matrix = np.where(np.isnan(matrix), None, matrix).tolist()
            df_augmented.at[i, col] = matrix

    return df_augmented


def minus7rotation(df):
    # -7 degree rotation
    df_augmented = pd.DataFrame()
    df_augmented["uid"] = df["uid"]
    df_augmented["pose"] = ""
    df_augmented["hand1"] = ""
    df_augmented["hand2"] = ""
    df_augmented["label"] = df["label"]

    theta = -7 * (np.pi / 180)
    c, s = np.cos(theta), np.sin(theta)
    rotation_matrix = np.array([[c, -s], [s, c]])

    for i in range(df.shape[0]):
        for col in ["pose", "hand1", "hand2"]:
            matrix = np.array(df.loc[i, col], dtype=np.float)
            matrix = np.matmul(matrix, rotation_matrix)
            matrix = np.where(np.isnan(matrix), None, matrix).tolist()
            df_augmented.at[i, col] = matrix

    return df_augmented


def gaussSample(df):
    # Random Gaussian sampling
    df_augmented = df.copy()
    dv = 0.05 * 10 ** -2
    sv = 0.08 * 10 ** -2
    lv = 0.08 * 10 ** -1
    sigma = [
        sv,
        dv,
        dv,
        dv,
        dv,
        dv,
        dv,
        sv,
        sv,
        sv,
        sv,
        lv,
        lv,
        lv,
        lv,
        sv,
        sv,
        sv,
        sv,
        sv,
        sv,
        sv,
        sv,
        lv,
        lv,
    ]

    ## Check if keypoints is range [0, 1]
    x_width = 1920
    y_height = 1080
    for i in range(df.shape[0]):
        if np.count_nonzero(df.loc[i, "pose"]) == 0:
            break

        pose = np.array(df.loc[i, "pose"], dtype=np.float)
        pose[:, 0] /= x_width
        pose[:, 1] /= y_height
        pose_variance = np.column_stack((sigma, sigma))
        pose = np.random.normal(pose, pose_variance)
        pose[:, 0] *= x_width
        pose[:, 1] *= y_height
        pose = np.where(np.isnan(pose), None, pose).tolist()

        hand1 = np.array(df.loc[i, "hand1"], dtype=np.float)
        hand1[:, 0] /= x_width
        hand1[:, 1] /= y_height
        hand1 = np.random.normal(hand1, dv)
        hand1[:, 0] *= x_width
        hand1[:, 1] *= y_height
        hand1 = np.where(np.isnan(hand1), None, hand1).tolist()

        hand2 = np.array(df.loc[i, "hand2"], dtype=np.float)
        hand2[:, 0] /= x_width
        hand2[:, 1] /= y_height
        hand2 = np.random.normal(hand2, dv)
        hand2[:, 0] *= x_width
        hand2[:, 1] *= y_height
        hand2 = np.where(np.isnan(hand2), None, hand2).tolist()

        df_augmented.at[i, "pose"] = pose
        df_augmented.at[i, "hand1"] = hand1
        df_augmented.at[i, "hand2"] = hand2

    return df_augmented


def cutout(df):
    # cutout
    df_augmented = df.copy()

    pad_idx = 0
    for i in range(df.shape[0]):
        if np.count_nonzero(df.loc[i, "pose"]) == 0:
            pad_idx = i
            break

    for i in range(df.shape[0]):
        if np.count_nonzero(df.loc[i, "pose"]) == 0:
            break

        if i < pad_idx:
            pose = np.array(df.loc[i, "pose"])
            hand1 = np.array(df.loc[i, "hand1"])
            hand2 = np.array(df.loc[i, "hand2"])
            pose_zero_idx = np.random.choice(25, 3, replace=False)
            hand1_zero_idx = np.random.choice(21, 3, replace=False)
            hand2_zero_idx = np.random.choice(21, 3, replace=False)

            for i in pose_zero_idx:
                pose[i] = [0, 0]
            for i in hand1_zero_idx:
                hand1[i] = [0, 0]
            for i in hand2_zero_idx:
                hand2[i] = [0, 0]

            pose = pose.tolist()
            hand1 = hand1.tolist()
            hand2 = hand2.tolist()

            df_augmented.at[i, "pose"] = pose
            df_augmented.at[i, "hand1"] = hand1
            df_augmented.at[i, "hand2"] = hand2

    return df_augmented


def downsample(df):
    # downsample
    frame_len = df.shape[0]
    if frame_len < 15:
        return df.copy()

    df_augmented = df.copy()
    drop_idx = np.random.choice(frame_len, 15)  # 154 frames , 15 frames
    df_augmented = df_augmented.drop(index=drop_idx)
    return df_augmented


def upsample(df):
    # upsample
    def get_avg(df, idx, col):
        aug_points = (
            (
                np.array(df.loc[idx - 1, col], dtype=np.float)
                + np.array(df.loc[idx, col], dtype=np.float)
            )
            / 2
        ).tolist()
        return np.where(np.isnan(aug_points), None, aug_points).tolist()

    frame_length = df.shape[0]
    additional_frames = frame_length // 10
    df_augmented = pd.DataFrame(
        index=np.arange(frame_length + additional_frames),
        columns=["uid", "pose", "hand1", "hand2", "label"],
    )
    df_augmented["uid"] = df.iloc[0].loc["uid"]

    j = 0
    for i in range(df_augmented.shape[0]):
        if i % 10 != 0 or i == 0:
            df_augmented.at[i, "pose"] = df.loc[j, "pose"]
            df_augmented.at[i, "hand1"] = df.loc[j, "hand1"]
            df_augmented.at[i, "hand2"] = df.loc[j, "hand2"]
            j += 1
            continue

        df_augmented.at[i, "pose"] = get_avg(df, j, "pose")
        df_augmented.at[i, "hand1"] = get_avg(df, j, "hand1")
        df_augmented.at[i, "hand2"] = get_avg(df, j, "hand2")

    df_augmented["label"] = df.iloc[0].loc["label"]
    return df_augmented

In [78]:
import os

from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

#from models import Xgboost
#from configs import XgbConfig
#from utils import get_experiment_name, load_label_map
# from augment import (
#     plus7rotation,
#     minus7rotation,
#     gaussSample,
#     cutout,
#     upsample,
#     downsample,
# )
from tqdm.auto import tqdm

def load_label_map(dataset):
    file_path = f"label_maps/label_map_{dataset}.json"
    return load_json(file_path)


def get_experiment_name(args):
    exp_name = ""
    if args.use_cnn:
        exp_name += "cnn_"
    if args['use_augs']:
        exp_name += "augs_"
    exp_name += args.model
    return exp_name

def flatten(arr, max_seq_len=200):
    arr = np.array(arr)
    arr = np.pad(arr, ((0, max_seq_len - arr.shape[0]), (0, 0)), "constant")
    arr = arr.flatten()
    return arr


def combine_xy(x, y):
    x, y = np.array(x), np.array(y)
    _, length = x.shape
    x = x.reshape((-1, length, 1))
    y = y.reshape((-1, length, 1))
    return np.concatenate((x, y), -1).astype(np.float32)


def split_xy(data):
    value_x, value_y = [], []
    for row in data:
        row = np.asarray(row)
        if row.shape == ():
            continue
        value_x.append(row[:, 0])
        value_y.append(row[:, 1])
    value_x, value_y = np.asarray(value_x), np.asarray(value_y)
    return value_x, value_y


def augment_sample(df, augs):
    df = df.copy()
    pose = combine_xy(df.pose_x, df.pose_y)
    h1 = combine_xy(df.hand1_x, df.hand1_y)
    h2 = combine_xy(df.hand2_x, df.hand2_y)
    input_df = pd.DataFrame.from_dict(
        {
            "uid": df.uid,
            "pose": pose.tolist(),
            "hand1": h1.tolist(),
            "hand2": h2.tolist(),
            "label": df.label,
        }
    )
    augmented_samples = []
    for augmentation in augs:
        df_augmented = augmentation(input_df)
        pose_x, pose_y = split_xy(df_augmented.pose)
        hand1_x, hand1_y = split_xy(df_augmented.hand1)
        hand2_x, hand2_y = split_xy(df_augmented.hand2)
        save_df = pd.Series(
            {
                "uid": df.uid + "_" + augmentation.__name__,
                "label": df.label,
                "pose_x": pose_x.tolist(),
                "pose_y": pose_y.tolist(),
                "hand1_x": hand1_x.tolist(),
                "hand1_y": hand1_y.tolist(),
                "hand2_x": hand2_x.tolist(),
                "hand2_y": hand2_y.tolist(),
                "n_frames": df.n_frames,
            }
        )
        augmented_samples.append(save_df)

    return pd.concat(augmented_samples, axis=0)


def preprocess(df, use_augs, label_map, mode):
    feature_cols = ["pose_x", "pose_y", "hand1_x", "hand1_y", "hand2_x", "hand2_y"]
    x, y = [], []
    i = 0
    no_of_videos = df.shape[0]
    pbar = tqdm(total=no_of_videos, desc=f"Processing {mode} file....")
    while i < no_of_videos:
        if use_augs and mode == "train":
            augs = [
                plus7rotation,
                minus7rotation,
                gaussSample,
                cutout,
                upsample,
                downsample,
            ]
            augmented_rows = augment_sample(df.iloc[i], augs)
            df = pd.concat([df, augmented_rows], axis=0)
        row = df.loc[i, feature_cols]
        flatten_features = np.hstack(list(map(flatten, row.values)))
        x.append(flatten_features)
        y.append(label_map[df.loc[i, "label"]])
        i += 1
        pbar.update(1)
    x = np.stack(x)
    y = np.array(y)
    return x, y

def load_dataframe(files):
    series = []
    for file_path in files:
        series.append(pd.read_json(file_path, typ="series"))
    return pd.concat(series, axis=0)


def fit(args):
    train_files = sorted(
        glob.glob("/kaggle/working/train_keypoints/include50_keypoints/*.json")
    
    val_files = sorted(
        glob.glob("/kaggle/working/val_keypoints/include50_keypoints/*.json")
        )
    
    train_df = load_dataframe(train_files)
    val_df = load_dataframe(val_files)

    label_map = load_label_map(args['dataset'])
    x_train, y_train = preprocess(train_df, args['use_augs'], label_map, "train")
    x_val, y_val = preprocess(val_df, args['use_augs'], label_map, "val")

    config = XgbConfig()
    model = Xgboost(config=config)
    model.fit(x_train, y_train, x_val, y_val)

    exp_name = get_experiment_name(args)
    save_path = os.path.join(args.save_dir, exp_name, ".pickle.dat")
    model.save(save_path)


def evaluate(args):
    test_files = sorted(
        glob.glob(
            os.path.join(args.data_dir, f"{args.dataset}_test_keypoints", "*.json")
        )
    )

    test_df = load_dataframe(test_files)

    label_map = load_label_map(args.dataset)
    x_test, y_test = preprocess(test_df, args.use_augs, label_map, "test")

    exp_name = get_experiment_name(args)
    config = XgbConfig()
    model = Xgboost(config=config)
    load_path = os.path.join(args.save_dir, exp_name, ".pickle.dat")
    model.load(load_path)
    print("### Model loaded ###")

    test_preds = model(x_test)
    print("Test accuracy:", accuracy_score(y_test, test_preds))
    

Train Files:
[]
Val Files:
[]


ValueError: No objects to concatenate

Ignore following code. Pursuin different approach

To DO for group:
1. make a dataframe, like previous one. hand1x,hand1y, hand2x, hand2y, label, n_frames.
2. instead of processing all frames, only select mid 30? or 40? and then process them.
3. write a function to extract keypoints, for that we have to understand the structure of hand_landmarker_result

In [None]:
#our box

import mediapipe as mp

BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Create a hand landmarker instance with the video mode:
options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='/kaggle/input/hand-landmark/hand_landmarker.task'),
    running_mode=VisionRunningMode.VIDEO)

with HandLandmarker.create_from_options(options) as landmarker:
    
  # The landmarker is initialized. Use it here.
  # ...
    # Use OpenCV’s VideoCapture to load the input video.
    cap = cv2.VideoCapture('/kaggle/input/include-50/Adjectives/1. loud/MVI_5177.MOV')
    video_framerate = cap.get(cv2.CAP_PROP_FPS)
    vdo_keypoints = []
    
    while cap.isOpened():
        ret, image = cap.read() #read returns if frame_exits, current_frame
        if not ret:
            break
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #when we load, its in blue green red, we convert it to RGB
    # Load the frame rate of the video using OpenCV’s CV_CAP_PROP_FPS
    # You’ll need it to calculate the timestamp for each frame.
    # Loop through each frame in the video using VideoCapture#read()
    # Convert the frame received from OpenCV to a MediaPipe’s Image object.
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)
        # Perform hand landmarks detection on the provided single image.
        # The hand landmarker must be created with the video mode.
        frame_timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))
        print(type(frame_timestamp_ms))
        #case CV_FFMPEG_CAP_PROP_POS_MSEC:
        #return 1000.0*(double)frame_number/get_fps();
        hand_landmarker_result = landmarker.detect_for_video(mp_image, frame_timestamp_ms)
        print(hand_landmarker_result)
        hand1_x,hand1_y, hand2_x, hand2_y = process_hand_landmarker_result(hand_landmarker_result) 
        
        
        #process the output.
        #refer last section of this link
        #https://developers.google.com/mediapipe/solutions/vision/hand_landmarker/python#video_2
        
        
        cap.release()
        
    
    

In [None]:
print(type(hand_landmarker_result.hand_landmarks))

In [None]:
print(type(hand_landmarker_result.handedness))

In [None]:
print(type(hand_landmarker_result.hand_world_landmarks))

In [None]:
print(len(hand_landmarker_result.hand_world_landmarks[0]))

In [None]:
print(len(hand_landmarker_result.hand_landmarks[0]))

In [None]:
hand_landmarker_result.hand_landmarks[0]

In [None]:
hand_landmarker_result.hand_world_landmarks[0]