In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from skimage.transform import resize
import json
from tqdm import tqdm
import os
from tensorflow.keras import layers
import glob

In [None]:
!pip install scikit-image

# Initialization

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

# Detect TPU, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
!pip install cached-property
from cached_property import cached_property
from shutil import copyfile

In [None]:
from CTC_TPU import classic_ctc_loss

In [None]:
inpdir = "/kaggle/input/asl-fingerspelling"
df = pd.read_csv(f'{inpdir}/train.csv')
df["phrase_bytes"] = df["phrase"].map(lambda x: x.encode("utf-8"))
display(df.head())

In [None]:
LIP = [
    61, 185, 40, 39, 37, 267, 269, 270, 409,
    291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
    95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
]

LPOSEID = [13, 15, 17, 19, 21]
RPOSEID = [14, 16, 18, 20, 22]
POSEID = LPOSEID + RPOSEID

FACE = [f'x_face_{i}' for i in LIP] + [f'y_face_{i}' for i in LIP] + [f'z_face_{i}' for i in LIP]
LHAND = [f'x_left_hand_{i}' for i in range(21)] + [f'y_left_hand_{i}' for i in range(21)] + [f'z_left_hand_{i}' for i in range(21)]
RHAND = [f'x_right_hand_{i}' for i in range(21)] + [f'y_right_hand_{i}' for i in range(21)] + [f'z_right_hand_{i}' for i in range(21)]
POSE = [f'x_pose_{i}' for i in POSEID] + [f'y_pose_{i}' for i in POSEID] + [f'z_pose_{i}' for i in POSEID]

SEL_COLS = FACE + LHAND + RHAND + POSE
FRAME_LEN = 300

X_IDX = [i for i, col in enumerate(SEL_COLS)  if "x_" in col]
Y_IDX = [i for i, col in enumerate(SEL_COLS)  if "y_" in col]
Z_IDX = [i for i, col in enumerate(SEL_COLS)  if "z_" in col]

RHAND_IDX = [i for i, col in enumerate(SEL_COLS)  if "right" in col]
LHAND_IDX = [i for i, col in enumerate(SEL_COLS)  if  "left" in col]
RPOSE_IDX = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in RPOSEID]
LPOSE_IDX = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in LPOSEID]

FACE_IDX = [i for i, col in enumerate(SEL_COLS)  if "face" in col]
HANDIDS = [i for i, _ in  enumerate(SEL_COLS) if 'hand' in _]

In [None]:
import tensorflow as tf

def remove_nan_rows_at_indices(x, column_indices):
    mask = tf.math.logical_not(tf.reduce_all(tf.math.is_nan(tf.gather(x, column_indices, axis=1)), axis=1))
    return tf.boolean_mask(x, mask)

def drop_nan_timesteps(tensor, handids=HANDIDS):
    handids = tf.convert_to_tensor(handids)
    tensor_slice = tf.gather(tensor, handids, axis=1)
    nan_mask = tf.reduce_all(tf.math.is_nan(tensor_slice), axis=1)
    non_nan_mask = tf.logical_not(nan_mask)
    filtered_tensor = tf.boolean_mask(tensor, non_nan_mask, axis=0)
    return filtered_tensor


def resize_pad(x):
    if tf.shape(x)[0] < FRAME_LEN:
        x = tf.pad(x, ([[0, FRAME_LEN-tf.shape(x)[0]], [0, 0]]))
    else:
        x = x[..., tf.newaxis]
        x = tf.image.resize(x, (FRAME_LEN, tf.shape(x)[1]))
        x = tf.squeeze(x, axis=-1)
    return x

In [None]:
X_IDX = [i for i, col in enumerate(SEL_COLS)  if "x_" in col]
Y_IDX = [i for i, col in enumerate(SEL_COLS)  if "y_" in col]
Z_IDX = [i for i, col in enumerate(SEL_COLS)  if "z_" in col]

RHAND_IDX = [i for i, col in enumerate(SEL_COLS)  if "right" in col]
LHAND_IDX = [i for i, col in enumerate(SEL_COLS)  if  "left" in col]
RPOSE_IDX = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in RPOSEID]
LPOSE_IDX = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in LPOSEID]

FACE_IDX = [i for i, col in enumerate(SEL_COLS)  if "face" in col]
HANDIDS = [i for i, _ in  enumerate(SEL_COLS) if 'hand' in _]

In [None]:
FLIP_IDX_DICT = {}
for i in range(len(RHAND_IDX)):
    FLIP_IDX_DICT[RHAND_IDX[i]] = LHAND_IDX[i]
    FLIP_IDX_DICT[LHAND_IDX[i]] = RHAND_IDX[i]

for i in range(len(RPOSE_IDX)):
    FLIP_IDX_DICT[RPOSE_IDX[i]] = LPOSE_IDX[i]
    FLIP_IDX_DICT[LPOSE_IDX[i]] = RPOSE_IDX[i]

for i in range(len(SEL_COLS)):
    if i not in FLIP_IDX_DICT:
        FLIP_IDX_DICT[i] = i

In [None]:
IDX_TO_FLIP = RHAND_IDX + LHAND_IDX + RPOSE_IDX + LPOSE_IDX

In [None]:
alph = [1 if i not in IDX_TO_FLIP else -1 for i in range(len(SEL_COLS))]
ALP_TENSOR = tf.cast(tf.convert_to_tensor(alph)[None], tf.float32)

In [None]:
new_flip_idx = [FLIP_IDX_DICT[i] for i in range(len(SEL_COLS))]

In [None]:
def flip_augm(x):
    x = tf.gather(x, new_flip_idx, axis=1) * ALP_TENSOR
    return x

In [None]:
def augm(x, phrace):
    if tf.random.uniform(()) > 0.9:
        x = flip_augm(x)
    return x, phrace

In [None]:
MEAN = tf.convert_to_tensor(np.load('mean.npy'))
STD = tf.convert_to_tensor(np.load('std.npy'))

# Load dataset

In [None]:
batch_size = 16 * strategy.num_replicas_in_sync

with open ("/kaggle/input/asl-fingerspelling/character_to_prediction_index.json", "r") as f:
    char_to_num = json.load(f)

pad_token = 'P'
pad_token_idx = 59

char_to_num[pad_token] = pad_token_idx

num_to_char = {j:i for i,j in char_to_num.items()}

inpdir = "/kaggle/input/asl-fingerspelling"
df = pd.read_csv(f'{inpdir}/train.csv')
df_s = pd.read_csv(f'{inpdir}/supplemental_metadata.csv')

table = tf.lookup.StaticHashTable(
    initializer=tf.lookup.KeyValueTensorInitializer(
        keys=list(char_to_num.keys()),
        values=list(char_to_num.values()),
    ),
    default_value=tf.constant(-1),
    name="class_weight"
)


def filter_fn(record_bytes):
    schema = {COL: tf.io.VarLenFeature(dtype=tf.float32) for COL in SEL_COLS}
    schema["phrase"] = tf.io.FixedLenFeature([], dtype=tf.string)
    features = tf.io.parse_single_example(record_bytes, schema)
    landmarks = ([tf.sparse.to_dense(features[COL]) for COL in SEL_COLS])
    landmarks = tf.transpose(landmarks)
    return tf.shape(landmarks)[0] > 5

def decode_fn(record_bytes):
    schema = {COL: tf.io.VarLenFeature(dtype=tf.float32) for COL in SEL_COLS}
    schema["phrase"] = tf.io.FixedLenFeature([], dtype=tf.string)
    features = tf.io.parse_single_example(record_bytes, schema)
    phrase = features["phrase"]
    landmarks = ([tf.sparse.to_dense(features[COL]) for COL in SEL_COLS])
    landmarks = tf.transpose(landmarks)

    landmarks = (landmarks - MEAN[None]) / STD[None]
    landmarks = tf.where(tf.math.is_nan(landmarks), tf.zeros_like(landmarks), landmarks)
    landmarks = resize_pad(landmarks)
    landmarks = tf.reshape(landmarks, (FRAME_LEN, landmarks.shape[1]))

    phrase = tf.strings.bytes_split(phrase)
    phrase = table.lookup(phrase)
    phrase = tf.pad(phrase, paddings=[[0, 64 - tf.shape(phrase)[0]]], constant_values = pad_token_idx)

    return landmarks, phrase

inpdir = "/kaggle/working"
tffiles = list(df.file_id.map(lambda x: f'/kaggle/input/sign-language-recg-dataset-training/tfds/{x}.tfrecord').unique())
PAD = pad_token_idx

In [None]:
len(tffiles)

In [None]:
np.random.seed(17)
np.random.shuffle(train_fls)

In [None]:
train_dataset = tf.data.TFRecordDataset(train_fls, num_parallel_reads=tf.data.AUTOTUNE, compression_type='GZIP').filter(filter_fn).map(decode_fn, num_parallel_calls=tf.data.AUTOTUNE).map(augm, num_parallel_calls=tf.data.AUTOTUNE).shuffle(5000).batch(batch_size, drop_remainder=True).prefetch(buffer_size=tf.data.AUTOTUNE).cache()
batch = next(iter(train_dataset))

In [None]:
batch[0].shape

# Define model

In [None]:
def positional_encoding(length, depth):
    depth = depth/2

    positions = np.arange(length)[:, np.newaxis]
    depths = np.arange(depth)[np.newaxis, :]/depth

    angle_rates = 1 / (10000**depths)
    angle_rads = positions * angle_rates

    pos_encoding = np.concatenate(
      [np.sin(angle_rads), np.cos(angle_rads)],
      axis=-1)

    return tf.cast(pos_encoding, dtype=tf.float32)

class PositionalEmbedding(tf.keras.layers.Layer):
    def __init__(self, d_model):
        super().__init__()
        self.d_model = d_model
        self.pos_encoding = positional_encoding(length=2048, depth=d_model)

    def call(self, x):
        length = tf.shape(x)[1]
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x = x + self.pos_encoding[tf.newaxis, :length, :]
        return x

In [None]:
class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, num_heads, feed_forward_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [
                layers.Dense(feed_forward_dim, activation="swish"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [None]:
class ECA(tf.keras.layers.Layer):
    def __init__(self, kernel_size=5, **kwargs):
        super().__init__(**kwargs)
        self.supports_masking = True
        self.kernel_size = kernel_size
        self.conv = tf.keras.layers.Conv1D(1, kernel_size=kernel_size, strides=1, padding="same", use_bias=False)

    def call(self, inputs, mask=None):
        nn = tf.keras.layers.GlobalAveragePooling1D()(inputs, mask=mask)
        nn = tf.expand_dims(nn, -1)
        nn = self.conv(nn)
        nn = tf.squeeze(nn, -1)
        nn = tf.nn.sigmoid(nn)
        nn = nn[:,None,:]
        return inputs * nn


class CausalDWConv1D(tf.keras.layers.Layer):
    def __init__(self,
        kernel_size=17,
        dilation_rate=1,
        use_bias=False,
        depthwise_initializer='glorot_uniform',
        **kwargs):
        super().__init__(**kwargs)
        self.causal_pad = tf.keras.layers.ZeroPadding1D((dilation_rate*(kernel_size-1),0))
        self.dw_conv = tf.keras.layers.DepthwiseConv1D(
                            kernel_size,
                            strides=1,
                            dilation_rate=dilation_rate,
                            padding='valid',
                            use_bias=use_bias,
                            depthwise_initializer=depthwise_initializer)
        self.supports_masking = True

    def call(self, inputs):
        x = self.causal_pad(inputs)
        x = self.dw_conv(x)
        return x

class Conv1DBlock(tf.keras.layers.Layer):
    def __init__(self,
                 channel_size,
                 kernel_size,
                 dilation_rate=1,
                 drop_rate=0.0,
                 expand_ratio=2,
                 se_ratio=0.25,
                 activation='swish',
                 **kwargs):
        super(Conv1DBlock, self).__init__(**kwargs)

        self.channel_size = channel_size
        self.kernel_size = kernel_size
        self.dilation_rate = dilation_rate
        self.drop_rate = drop_rate
        self.expand_ratio = expand_ratio
        self.se_ratio = se_ratio
        self.activation = activation

        self.dense_expand = tf.keras.layers.Dense(
            self.expand_ratio * channel_size,
            use_bias=True,
            activation=self.activation
        )

        self.dwconv = CausalDWConv1D(
            self.kernel_size,
            dilation_rate=self.dilation_rate,
            use_bias=False
        )

        self.batch_norm = tf.keras.layers.BatchNormalization(momentum=0.95)

        self.eca = ECA()

        self.dense_project = tf.keras.layers.Dense(
            self.channel_size,
            use_bias=True
        )

        if self.drop_rate > 0:
            self.dropout = tf.keras.layers.Dropout(self.drop_rate, noise_shape=(None,1,1))

    def call(self, inputs):
        channels_in = tf.keras.backend.int_shape(inputs)[-1]
        channels_expand = channels_in * self.expand_ratio

        x = self.dense_expand(inputs)

        x = self.dwconv(x)

        x = self.batch_norm(x)

        x = self.eca(x)

        x = self.dense_project(x)

        if self.drop_rate > 0:
            x = self.dropout(x)

        if channels_in == self.channel_size:
            x = tf.keras.layers.add([x, inputs])

        return x


In [None]:
from tensorflow import keras
class CnnTransformer(tf.keras.Model):
    def __init__(
        self,
        num_hid=64,
        num_head=2,
        num_feed_forward=128,
        num_blocks=3,
        maxlen=110,
        num_classes=10,
        ksizes=[11, 5, 3],
        dim=256,
        cnn_dropout=0.4,
        dropout=0.15,
        inp_dimen=200,
        std_noise=0.1,
        end_drop=0.2,
        norm=True
    ):
        super().__init__()
        self.maxlen = maxlen
        self.num_classes = num_classes

        self.pos_emb = PositionalEmbedding(d_model=num_hid)

        self.encoders = [TransformerEncoder(num_hid, num_head, num_feed_forward, rate=dropout) for _ in range(num_blocks)]
        self.cv_1d_blcs = [[Conv1DBlock(dim, ksizes[j], drop_rate=cnn_dropout) for j in range(len(ksizes))] for _ in range(num_blocks)]

        self.classifier = layers.Dense(num_classes)
        self.dns0 = tf.keras.layers.Dense(num_hid)
        self.cv_per_block = len(ksizes)
        self.num_blocs = num_blocks
        self.mask = tf.keras.layers.Masking(mask_value=0.0)
        self.pool1 = tf.keras.layers.MaxPooling1D()
        self.pool2 = tf.keras.layers.MaxPooling1D()
        self.noise = tf.keras.layers.GaussianNoise(std_noise)
        self.btch_norm = layers.BatchNormalization()
        self.norm = norm
        self.drop_end=tf.keras.layers.Dropout(end_drop)

    def encode(self, source, training=False):
        x = self.noise(source)
        x = self.dns0(x)
        x = self.pos_emb(x)
        if self.norm:
            x = self.btch_norm(x)
        for j in range(self.num_blocs):
            for i in range(self.cv_per_block):
                x = self.cv_1d_blcs[j][i](x)
            x = self.encoders[j](x)
            if j == 1 or j == 3:
                x = self.pool1(x)
        return x

    def call(self, inputs, training=False):
        x = self.mask(inputs)
        #x = self.noise(x)
        x = self.encode(x, training)
        x = self.drop_end(x)
        return self.classifier(x)

In [None]:
class CTCLoss(keras.losses.Loss):

    def __init__(self, reduction=tf.keras.losses.Reduction.NONE, name='ctc_loss'):
        super().__init__(reduction=reduction, name=name)

    def call(self, y_true, y_pred):
        siz = batch_size // strategy.num_replicas_in_sync
        y_true = tf.ensure_shape(y_true, (siz, 64))
        y_pred = tf.ensure_shape(y_pred, (siz, 75, 60))
        label_length = tf.cast(tf.argmax(y_true==pad_token_idx, axis = -1), dtype="int32")#tf.cast(tf.ones([y_true.shape[0]]) * y_true.shape[0], dtype="int64")
        logit_length = tf.cast(tf.ones([siz]) * 75, dtype="int32")

        m = classic_ctc_loss(
             labels=y_true,
             logits=y_pred,
             label_length=label_length,
             logit_length=logit_length,
             blank_index=pad_token_idx,
         )
        #tf.print(m)
        #print(m)
        return tf.math.reduce_mean(m)
        #return tf.math.reduce_mean(loss)

In [None]:
batch[0].shape

# Init model

In [None]:
with strategy.scope():
    model = CnnTransformer(
        num_hid=200,
        num_head=6,
        num_feed_forward=400,
        maxlen=160,
        num_blocks=6,
        num_classes=60,
        dim=200,
        dropout=0.1,
        cnn_dropout=0.4,
        end_drop=0.1,
        inp_dimen=273,
        std_noise=0.005,
        norm=False
    )
    opt = tf.keras.optimizers.Adam(0.0001)

    model.compile(optimizer=opt, loss=CTCLoss())
    _ = model(batch[0])
    model.load_weights('/kaggle/working/with_max_pool/model_v_1_o.h5')

In [None]:
model.summary()

# Training

In [None]:
historadd_lossy = model.fit(train_dataset,
                            #validation_data=val_dataset,
                            #callbacks=[],
                            epochs=20)

In [None]:
model.save_weights('/kaggle/working/with_max_pool/model_v_2_0.h5')

# Save as TFLite

In [None]:
def remove_consecutive(tensor):
    tensor_1 = tf.roll(tensor, shift=-1, axis=1)
    mask = tensor != tensor_1
    return tf.reshape(tf.boolean_mask(tensor, mask), [1, -1])

def remove_number(tensor, num):
    mask = tensor != num
    return tf.reshape(tf.boolean_mask(tensor, mask), [1, -1])

In [None]:
class PreprocessLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(PreprocessLayer, self).__init__()

    def __call__(self, x):
        x = (x - MEAN[None]) / STD[None]
        x = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)
        x = resize_pad(x)
        x = tf.reshape(x, (300, 273))
        return x


class TFLiteModel(tf.Module):
    def __init__(self, model):
        super(TFLiteModel, self).__init__()

        # Load the feature generation and main models
        self.preprocess_layer = PreprocessLayer()
        self.model = model

    def decode(self, batch):
        source = batch
        return tf.convert_to_tensor(res, dtype=tf.int32)

    @tf.function(input_signature=[tf.TensorSpec(shape=[None, len(SEL_COLS)], dtype=tf.float32, name='inputs')])
    def __call__(self, inputs, training=False):
        # Preprocess Data
        x = self.preprocess_layer(inputs)
        x = x[None]
        preds = self.model(x, training=False)
        idxs = tf.argmax(preds, -1)
        x = remove_consecutive(idxs)
        x = tf.cast(remove_number(x, 59), tf.int32)[0]

        x = tf.one_hot(x, 59)
        return {'outputs': x}

pre = PreprocessLayer()
print(pre(tf.zeros((300, 273))).shape)
tflitemodel_base = TFLiteModel(model)
print(batch[0][0].shape)
tflitemodel_base(tf.zeros((300, 273)))["outputs"].shape

In [None]:
prd = tf.math.argmax(model(batch[0][0][None]), axis=-1)
tf.boolean_mask(prd, prd != 59)

In [None]:
[num_to_char[ch] for ch in [49, 40, 50, 46, 32, 32, 32]]

In [None]:
keras_model_converter = tf.lite.TFLiteConverter.from_keras_model(tflitemodel_base)
keras_model_converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]

tflite_model = keras_model_converter.convert()
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

infargs = {"selected_columns": SEL_COLS}

with open('inference_args.json', "w") as json_file:
    json.dump(infargs, json_file)

In [None]:
interpreter = tf.lite.Interpreter("model.tflite")

REQUIRED_SIGNATURE = "serving_default"
REQUIRED_OUTPUT = "outputs"

with open ("/kaggle/input/asl-fingerspelling/character_to_prediction_index.json", "r") as f:
    character_map = json.load(f)
rev_character_map = {j:i for i,j in character_map.items()}

found_signatures = list(interpreter.get_signature_list().keys())

if REQUIRED_SIGNATURE not in found_signatures:
    raise KernelEvalException('Required input signature not found.')

prediction_fn = interpreter.get_signature_runner("serving_default")
output = prediction_fn(inputs=tf.zeros((300, 273)))
prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(output[REQUIRED_OUTPUT], axis=1)])
print(prediction_str)

In [None]:
import zipfile

def zip_files(file1, file2, output_zip):
    with zipfile.ZipFile(output_zip, 'w') as zipf:
        zipf.write(file1, 'inference_args.json')
        zipf.write(file2, 'model.tflite')

file1 = '/kaggle/working/inference_args.json'
file2 = '/kaggle/working/model.tflite'
output_zip = '/kaggle/working/submission.zip'

zip_files(file1, file2, output_zip)

# Evaluate

In [None]:
import time
import json
from tqdm.auto import tqdm
import Levenshtein as Lev


SEL_FEATURES = json.load(open('/kaggle/working/inference_args.json'))['selected_columns']

def load_relevant_data_subset(pq_path):
        return pd.read_parquet(pq_path, columns=SEL_FEATURES) #selected_columns)

with open ("/kaggle/input/asl-fingerspelling/character_to_prediction_index.json", "r") as f:
    character_map = json.load(f)
rev_character_map = {j:i for i,j in character_map.items()}


df = pd.read_csv('/kaggle/input/asl-fingerspelling/train.csv')

idx = 0
sample = df.loc[idx]
loaded = load_relevant_data_subset('/kaggle/input/asl-fingerspelling/' + sample['path'])
loaded = loaded[loaded.index==sample['sequence_id']].values
print(loaded.shape)
frames = loaded

def wer__(s1, s2):
    w1 = len(s1.split())
    lvd = Lev.distance(s1, s2)
    return lvd / w1

found_signatures = list(interpreter.get_signature_list().keys())

REQUIRED_SIGNATURE = 'serving_default'
REQUIRED_OUTPUT = 'outputs'
if REQUIRED_SIGNATURE not in found_signatures:
    raise KernelEvalException('Required input signature not found.')

prediction_fn = interpreter.get_signature_runner("serving_default")
output_lite = prediction_fn(inputs=frames)
prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(output_lite[REQUIRED_OUTPUT], axis=1)])
print(prediction_str)


st = time.time()
cnt = 0
total = 100
model_time = 0

levs = []

for i in tqdm(range(len(df.iloc[:total]))):
    sample = df.loc[i]
    loaded = load_relevant_data_subset('/kaggle/input/asl-fingerspelling/' + sample['path'])
    loaded = loaded[loaded.index==sample['sequence_id']].values

    md_st = time.time()
    output_ = prediction_fn(inputs=loaded)
    model_time += time.time() - md_st


    prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(output_[REQUIRED_OUTPUT], axis=1)])
    cur_lev = wer__(sample['phrase'], prediction_str)

    levs.append(cur_lev)

print(f'WER: {np.mean(levs):.5f}')
print(f'Mean time: {(time.time() - st)/total:.7f}')
print(f'Mean time only infer: {model_time/total:.7f}')