In [None]:
import argparse
import csv
import os
from datetime import datetime
from typing import List, Dict
!pip install pose_format
!pip install sign_language_datasets
!pip install tfds-nightly
from pose_format import PoseHeader, Pose
from pose_format.numpy import NumPyPoseBody
from pose_format.utils.reader import BufferReader
from tqdm import tqdm
from typing import Generator

LEXICON_INDEX = ['filename','start', 'end', 'word']


def init_index(index_path: str):
    if not os.path.isfile(index_path):
        # Create csv file with specified header
        with open(index_path, 'w', encoding='utf-8', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(LEXICON_INDEX)

def load_signsuisse(directory_path: str) -> Generator[Dict[str, str], None, None]:
    with open("holistic.poseheader", "rb") as buffer:
        pose_header = PoseHeader.read(BufferReader(buffer.read()))

    # Load dataset (holistic poses?)
    from sign_language_datasets.datasets.config import SignDatasetConfig
    config = SignDatasetConfig(name=datetime.now().strftime("%Y-%m-%d"), version="1.0.0", include_video=False, include_pose="holistic")
    import tensorflow_datasets as tfds
    dataset = tfds.load(name='asl_signs', builder_kwargs={"config": config})

    # Iterate over dataset
    for datum in tqdm(dataset["train"]):
        uid_raw = datum['id'].numpy().decode('utf-8')
        words = datum['name'].numpy().decode('utf-8')

        # Load pose and save to file
        tf_pose = datum['pose']
        fps = int(tf_pose["fps"].numpy())
        if fps == 0:
            continue
        pose_body = NumPyPoseBody(fps, tf_pose["data"].numpy(), tf_pose["conf"].numpy())
        pose = Pose(pose_header, pose_body)


        with open(os.path.join(directory_path, f"{uid_raw}.pose"), "wb") as f:
            pose.write(f)

        yield {
            'filename': f"{uid_raw}.pose",
            'word': words,
            'start': "0",
            'end': str(len(pose_body.data) / fps),  # pose duration
        }



def add_data(data: List[Dict[str, str]], directory: str):
    index_path = os.path.join(directory, 'index.csv')
    os.makedirs(directory, exist_ok=True)
    init_index(index_path)

    with open(index_path, 'a', encoding='utf-8', newline='') as file:
        writer = csv.writer(file)
        for row in tqdm(data):
            writer.writerow([row[key] for key in LEXICON_INDEX])

    print(f"Added entries to {index_path}")


def main():
    data = load_signsuisse("dir")
    add_data(data, "dir")


if __name__ == '__main__':
    main()