In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from skimage.transform import resize
import json
from tqdm import tqdm
import os



In [2]:
inpdir = "/kaggle/input/asl-fingerspelling"
df = pd.read_csv(f'{inpdir}/train.csv')
df["phrase_bytes"] = df["phrase"].map(lambda x: x.encode("utf-8"))
display(df.head())

Unnamed: 0,path,file_id,sequence_id,participant_id,phrase,phrase_bytes
0,train_landmarks/5414471.parquet,5414471,1816796431,217,3 creekhouse,b'3 creekhouse'
1,train_landmarks/5414471.parquet,5414471,1816825349,107,scales/kuhaylah,b'scales/kuhaylah'
2,train_landmarks/5414471.parquet,5414471,1816862427,0,hentaihubs.com,b'hentaihubs.com'
3,train_landmarks/5414471.parquet,5414471,1816909464,1,1383 william lanier,b'1383 william lanier'
4,train_landmarks/5414471.parquet,5414471,1816967051,63,988 franklin lane,b'988 franklin lane'


In [3]:
LIP = [
    61, 185, 40, 39, 37, 267, 269, 270, 409,
    291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
    95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
]

FACE = [f'x_face_{i}' for i in LIP] + [f'y_face_{i}' for i in LIP] + [f'z_face_{i}' for i in LIP]
LHAND = [f'x_left_hand_{i}' for i in range(21)] + [f'y_left_hand_{i}' for i in range(21)] + [f'z_left_hand_{i}' for i in range(21)]
RHAND = [f'x_right_hand_{i}' for i in range(21)] + [f'y_right_hand_{i}' for i in range(21)] + [f'z_right_hand_{i}' for i in range(21)]
POSE = [f'x_pose_{i}' for i in range(33)] + [f'y_pose_{i}' for i in range(33)] + [f'z_pose_{i}' for i in range(33)]

SEL_COLS = FACE + LHAND + RHAND + POSE
FRAME_LEN = 128

In [4]:
def load_relevant_data_subset(pq_path):
    return pd.read_parquet(pq_path, columns=SEL_COLS)

for file_id in tqdm(df.file_id.unique()):
    pqfile = f"{inpdir}/train_landmarks/{file_id}.parquet"
    if not os.path.isdir("tfds"): os.mkdir("tfds")
    tffile = f"tfds/{file_id}.tfrecord"
    seq_refs = df.loc[df.file_id == file_id]
    seqs = load_relevant_data_subset(pqfile)
    
    with tf.io.TFRecordWriter(tffile) as file_writer:
        for seq_id, phrase in zip(seq_refs.sequence_id, seq_refs.phrase_bytes):
            frames = seqs.iloc[seqs.index == seq_id]
            frames128 = frames.fillna(-10).to_numpy()
            frames128 = resize(frames128, (FRAME_LEN, len(SEL_COLS)))
            frames = pd.DataFrame(data = frames128, columns=frames.columns)
            
            features = {COL: tf.train.Feature(float_list=tf.train.FloatList(value=frames[COL])) for COL in SEL_COLS}
            features["phrase"] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[phrase]))
            record_bytes = tf.train.Example(features=tf.train.Features(feature=features)).SerializeToString()
            file_writer.write(record_bytes)

100%|██████████| 68/68 [52:01<00:00, 45.91s/it]


In [5]:
def decode_fn(record_bytes):
    schema = {COL: tf.io.FixedLenFeature([FRAME_LEN], dtype=tf.float32) for COL in SEL_COLS}
    schema["phrase"] = tf.io.FixedLenFeature([], dtype=tf.string)
    return tf.io.parse_single_example(record_bytes, schema)

for file_id in df.file_id:
    pqfile = f"{inpdir}/train_landmarks/{file_id}.parquet"
    if not os.path.isdir("tfds"): os.mkdir("tfds")
    tffile = f"tfds/{file_id}.tfrecord"
    for batch in tf.data.TFRecordDataset([tffile]).map(decode_fn).take(2):
        print(list(batch.keys())[0])
    break


phrase
phrase
