#### Reference

+ [WLASL (World Level American Sign Language) Video](https://www.kaggle.com/datasets/risangbaskoro/wlasl-processed)


In [1]:
!pip install mediapipe==0.9.0.1 scikit-video

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m

In [2]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import cv2
import json
import skvideo
import skvideo.io
import numpy as np
import pandas as pd
import mediapipe as mp
import matplotlib.pyplot as plt

from tqdm import tqdm
from base64 import b64encode
from IPython.display import HTML
from IPython.display import Video
from joblib import Parallel, delayed
from google.protobuf.json_format import MessageToDict

mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_face_mesh = mp.solutions.face_mesh

In [3]:
DATA_PATH = "../input/"

TRAIN_FILE = DATA_PATH + "train.csv"
INDEX_FILE = DATA_PATH + "wlasl/WLASL_v0.3.json"

VID_PATH = DATA_PATH + "wlasl/videos/videos/"
SAVE_FOLDER = "../input/wlasl/train_landmark_files/"

In [4]:
df = pd.read_csv(TRAIN_FILE)
sl = df["sign"].unique()
df = pd.DataFrame({"sign": sl})
df = df.sort_values(["sign"], ascending=[True])

In [5]:
root = json.load(open(INDEX_FILE))

rows = []
for ri in tqdm(range(len(root))):
    item = root[ri]
    gloss = item["gloss"]
    #     if not gloss in sl:
    #         continue

    instances = item["instances"]
    for inst in instances:
        video_id = str(inst["video_id"])
        frame_start = inst["frame_start"]
        frame_end = inst["frame_end"]
        fps = inst["fps"]
        bbox_0 = inst["bbox"][0]
        bbox_1 = inst["bbox"][1]
        bbox_2 = inst["bbox"][2]
        bbox_3 = inst["bbox"][3]
        signer_id = inst["signer_id"] + 1000000
        path = (
            DATA_PATH + "wlasl" + f"train_landmark_files/{signer_id}/{video_id}.parquet"
        )
        rw = {
            "path": path,
            "participant_id": signer_id,
            "sequence_id": video_id,
            "sign": gloss,
            "video_id": str(video_id),
            "video_path": VID_PATH + video_id + ".mp4",
            "fps": fps,
            "frame_start": frame_start,
            "frame_end": frame_end,
            "bbox_0": bbox_0,
            "bbox_1": bbox_1,
            "bbox_2": bbox_2,
            "bbox_3": bbox_3,
        }

        if not os.path.exists(VID_PATH + video_id + ".mp4"):
            continue

        rows.append(rw)

df = pd.DataFrame(rows)
df = df.sort_values(["sign"], ascending=[True])
video_df = df.copy()

100%|██████████| 2000/2000 [00:00<00:00, 3932.44it/s]


In [6]:
def play(filename):
    html = ""
    video = open(filename, "rb").read()
    src = "data:video/mp4;base64," + b64encode(video).decode()
    html += (
        '<video width=1000 controls autoplay loop><source src="%s" type="video/mp4"></video>'
        % src
    )
    return HTML(html)

In [7]:
def extract_video(video_id, video_file):
    folder = f"../output/tmp/{video_id}"
    os.system(f"mkdir -p {folder}")

    vid = skvideo.io.vread(video_file)

    for count, image in enumerate(vid):
        cv2.imwrite(f"{folder}/{video_id}_{count}.jpg", image)


def process_video(video_df, video_idx, debug=False):
    video_id = video_df["video_id"].iloc[video_idx]
    video_sign = video_df["sign"].iloc[video_idx]
    signer_id = video_df["participant_id"].iloc[video_idx]
    video_file = video_df["video_path"].iloc[video_idx]
    
#     if os.path.exists(f"{SAVE_FOLDER}/{signer_id}/{video_id}.parquet"):
#         return

    assert os.path.exists(video_file)

    extract_video(video_id, video_file)

    rows = []
    with mp_hands.Hands(
        static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5
    ) as hands:
        with mp_face_mesh.FaceMesh(
            static_image_mode=True,
            max_num_faces=1,
            refine_landmarks=True,
            min_detection_confidence=0.5,
        ) as face_mesh:
            with mp_pose.Pose(
                static_image_mode=True,
                model_complexity=2,
                enable_segmentation=True,
                min_detection_confidence=0.5,
            ) as pose:
                frame = 0
                frame_file = f"../output/tmp/{video_id}/{video_id}_{frame}.jpg"
                while os.path.exists(frame_file):
                    if debug:
                        print(f"=> PF: {frame}")

                    image = cv2.imread(frame_file)
                    image_height, image_width, _ = image.shape

                    lh_rows = []
                    rh_rows = []
                    if True:
                        results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                        rh_landmark_index = 0
                        lh_landmark_index = 0
                        max_size = 21

                        lh_idx = -1
                        rh_idx = -1
                        if results.multi_hand_landmarks is not None:
                            if results.multi_handedness is not None:
                                i = 0
                                for m in results.multi_handedness:
                                    label = MessageToDict(m)["classification"][0][
                                        "label"
                                    ]
                                    if label == "Left":
                                        lh_idx = i
                                    if label == "Right":
                                        rh_idx = i
                                    i += 1

                            if debug:
                                print(f"=> LH_IDX: {lh_idx}")
                                print(f"=> RH_IDX: {rh_idx}")

                            if lh_idx >= 0:
                                lm = results.multi_hand_landmarks[lh_idx]
                                for m in lm.landmark:
                                    if lh_landmark_index < max_size:
                                        x = m.x
                                        y = m.y
                                        z = m.z
                                        atype = "left_hand"
                                        row_id = f"{frame}-{atype}-{lh_landmark_index}"
                                        rw = {
                                            "frame": frame,
                                            "row_id": row_id,
                                            "type": atype,
                                            "landmark_index": lh_landmark_index,
                                            "x": x,
                                            "y": y,
                                            "z": z,
                                        }
                                        lh_rows.append(rw)
                                    lh_landmark_index += 1

                            if rh_idx >= 0:
                                lm = results.multi_hand_landmarks[rh_idx]
                                for m in lm.landmark:
                                    if rh_landmark_index < max_size:
                                        x = m.x
                                        y = m.y
                                        z = m.z
                                        atype = "right_hand"
                                        row_id = f"{frame}-{atype}-{rh_landmark_index}"
                                        rw = {
                                            "frame": frame,
                                            "row_id": row_id,
                                            "type": atype,
                                            "landmark_index": rh_landmark_index,
                                            "x": x,
                                            "y": y,
                                            "z": z,
                                        }
                                        rh_rows.append(rw)
                                    rh_landmark_index += 1

                        while lh_landmark_index < max_size:
                            x = 0.0
                            y = 0.0
                            z = 0.0
                            atype = "left_hand"
                            row_id = f"{frame}-{atype}-{lh_landmark_index}"
                            rw = {
                                "frame": frame,
                                "row_id": row_id,
                                "type": atype,
                                "landmark_index": lh_landmark_index,
                                "x": x,
                                "y": y,
                                "z": z,
                            }
                            lh_rows.append(rw)
                            lh_landmark_index += 1

                        while rh_landmark_index < max_size:
                            x = 0.0
                            y = 0.0
                            z = 0.0
                            atype = "right_hand"
                            row_id = f"{frame}-{atype}-{rh_landmark_index}"
                            rw = {
                                "frame": frame,
                                "row_id": row_id,
                                "type": atype,
                                "landmark_index": rh_landmark_index,
                                "x": x,
                                "y": y,
                                "z": z,
                            }
                            rh_rows.append(rw)
                            rh_landmark_index += 1

                    if True:
                        results = face_mesh.process(
                            cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                        )
                        max_size = 468
                        landmark_index = 0
                        atype = "face"
                        if results.multi_face_landmarks is not None:
                            for face in results.multi_face_landmarks:
                                for landmark in face.landmark:
                                    if landmark_index < max_size:
                                        try:
                                            x = landmark.x
                                            y = landmark.y
                                            z = landmark.z
                                        except Exception as e:
                                            print(f"=> EC: {e}")
                                            x = 0.0
                                            y = 0.0
                                            z = 0.0
                                        row_id = f"{frame}-{atype}-{landmark_index}"
                                        rw = {
                                            "frame": frame,
                                            "row_id": row_id,
                                            "type": atype,
                                            "landmark_index": landmark_index,
                                            "x": x,
                                            "y": y,
                                            "z": z,
                                        }
                                        rows.append(rw)
                                        landmark_index += 1
                        while landmark_index < max_size:
                            x = 0.0
                            y = 0.0
                            z = 0.0
                            row_id = f"{frame}-{atype}-{landmark_index}"
                            rw = {
                                "frame": frame,
                                "row_id": row_id,
                                "type": atype,
                                "landmark_index": landmark_index,
                                "x": x,
                                "y": y,
                                "z": z,
                            }
                            rows.append(rw)
                            landmark_index += 1

                    if True:
                        for rw in lh_rows:
                            rows.append(rw)

                    if True:
                        max_size = 33
                        # Convert the BGR image to RGB before processing.
                        results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

                        landmark_index = 0
                        atype = "pose"
                        if results.pose_landmarks is not None:
                            lm = results.pose_landmarks.landmark
                            for landmark_index in range(len(lm)):
                                if landmark_index < max_size:
                                    m = lm[landmark_index]
                                    try:
                                        x = m.x
                                        y = m.y
                                        z = m.z
                                    except Exception as e:
                                        print(f"=> EC: {e}")
                                    row_id = f"{frame}-{atype}-{landmark_index}"
                                    rw = {
                                        "frame": frame,
                                        "row_id": row_id,
                                        "type": atype,
                                        "landmark_index": landmark_index,
                                        "x": x,
                                        "y": y,
                                        "z": z,
                                    }
                                    rows.append(rw)
                            landmark_index += 1
                        while landmark_index < max_size:
                            x = 0.0
                            y = 0.0
                            z = 0.0
                            row_id = f"{frame}-{atype}-{landmark_index}"
                            rw = {
                                "frame": frame,
                                "row_id": row_id,
                                "type": atype,
                                "landmark_index": landmark_index,
                                "x": x,
                                "y": y,
                                "z": z,
                            }
                            rows.append(rw)
                            landmark_index += 1

                    if True:
                        for rw in rh_rows:
                            rows.append(rw)

                    frame += 1
                    frame_file = f"../output/tmp/{video_id}/{video_id}_{frame}.jpg"

    df = pd.DataFrame(rows)

    os.makedirs(f"{SAVE_FOLDER}/{signer_id}/", exist_ok=True)
    df.to_parquet(f"{SAVE_FOLDER}/{signer_id}/{video_id}.parquet")
    return df

In [8]:
video_df = video_df.sort_values("video_id")

In [9]:
for video_idx in tqdm(range(10)):  # range(len(video_df))):
    data = process_video(video_df, video_idx)

    if video_idx > -1:
        data.to_parquet('../output/wsasl_ex.pq')
        break

  0%|          | 0/10 [00:00<?, ?it/s]INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
  0%|          | 0/10 [00:18<?, ?it/s]


In [None]:
_ = Parallel(n_jobs=80)(
    delayed(process_video)(video_df, video_idx)
    for video_idx in tqdm(range(len(video_df)))
)

  1%|          | 80/13791 [00:02<06:42, 34.04it/s]INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU

In [None]:
# video_idx = 0
# video_id, video_sign, video_file = process_video(video_df, video_idx, True)
# print(f'=> Sign: {video_sign}')
# play(video_file)

Done !