In [None]:
import gc
import json
import math
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_addons as tfa

import Levenshtein as lev


from IPython.display import HTML
import matplotlib.animation as animation
from matplotlib.animation import FuncAnimation
import glob
import random

In [None]:
print("pre_process_fntrain")

In [None]:
BASE_PATH="/kaggle/input/asl-fingerspelling/"
BASE_PATH_DATA = "/kaggle/input/aslfr-dataset-tfrecords/"

In [None]:
with open (BASE_PATH+"character_to_prediction_index.json", "r") as f:
    char_to_num = json.load(f)

pad_token = '^'
pad_token_idx = 59

char_to_num[pad_token] = pad_token_idx

num_to_char = {j:i for i,j in char_to_num.items()}
df = pd.read_csv(BASE_PATH+"train.csv")

LIP = [
    61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
    291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
    95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
]
LPOSE = [13, 15, 17, 19, 21]
RPOSE = [14, 16, 18, 20, 22]
POSE = LPOSE + RPOSE

X = [f'x_right_hand_{i}' for i in range(21)] + [f'x_left_hand_{i}' for i in range(21)] + [f'x_pose_{i}' for i in POSE] + [f'x_face_{i}' for i in LIP]
Y = [f'y_right_hand_{i}' for i in range(21)] + [f'y_left_hand_{i}' for i in range(21)] + [f'y_pose_{i}' for i in POSE] + [f'y_face_{i}' for i in LIP]
Z = [f'z_right_hand_{i}' for i in range(21)] + [f'z_left_hand_{i}' for i in range(21)] + [f'z_pose_{i}' for i in POSE] + [f'z_face_{i}' for i in LIP]

SEL_COLS = X + Y + Z
FRAME_LEN = 128
NUM_AXIS = 2
MAX_PHRASE_LENGTH = 64

LIP_IDX_X   = [i for i, col in enumerate(SEL_COLS)  if  "face" in col and "x" in col]
RHAND_IDX_X = [i for i, col in enumerate(SEL_COLS)  if "right" in col and "x" in col]
LHAND_IDX_X = [i for i, col in enumerate(SEL_COLS)  if  "left" in col and "x" in col]
RPOSE_IDX_X = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in RPOSE and "x" in col]
LPOSE_IDX_X = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in LPOSE and "x" in col]

LIP_IDX_Y   = [i for i, col in enumerate(SEL_COLS)  if  "face" in col and "y" in col]
RHAND_IDX_Y = [i for i, col in enumerate(SEL_COLS)  if "right" in col and "y" in col]
LHAND_IDX_Y = [i for i, col in enumerate(SEL_COLS)  if  "left" in col and "y" in col]
RPOSE_IDX_Y = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in RPOSE and "y" in col]
LPOSE_IDX_Y = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in LPOSE and "y" in col]

LIP_IDX_Z   = [i for i, col in enumerate(SEL_COLS)  if  "face" in col and "z" in col]
RHAND_IDX_Z = [i for i, col in enumerate(SEL_COLS)  if "right" in col and "z" in col]
LHAND_IDX_Z = [i for i, col in enumerate(SEL_COLS)  if  "left" in col and "z" in col]
RPOSE_IDX_Z = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in RPOSE and "z" in col]
LPOSE_IDX_Z = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in LPOSE and "z" in col]

RHM = np.load(BASE_PATH_DATA+"mean_std/rh_mean.npy")
LHM = np.load(BASE_PATH_DATA+"mean_std/lh_mean.npy")
RPM = np.load(BASE_PATH_DATA+"mean_std/rp_mean.npy")
LPM = np.load(BASE_PATH_DATA+"mean_std/lp_mean.npy")
LIPM = np.load(BASE_PATH_DATA+"mean_std/lip_mean.npy")

RHS = np.load(BASE_PATH_DATA+"mean_std/rh_std.npy")
LHS = np.load(BASE_PATH_DATA+"mean_std/lh_std.npy")
RPS = np.load(BASE_PATH_DATA+"mean_std/rp_std.npy")
LPS = np.load(BASE_PATH_DATA+"mean_std/lp_std.npy")
LIPS = np.load(BASE_PATH_DATA+"mean_std/lip_std.npy")


LIP_IDX  = list(range(0,40))
RHAND_IDX = list(range(40,61))
LHAND_IDX = list(range(61,82))
RPOSE_IDX = list(range(82,87))
LPOSE_IDX = list(range(87,92))

# ANIMATE

In [None]:
fig, ax = plt.subplots()
edges = [(0,1),(1,2),(2,3),(3,4),(0,5),(0,17),(5,6),(6,7),(7,8),(5,9),(9,10),(10,11),(11,12),
         (9,13),(13,14),(14,15),(15,16),(13,17),(17,18),(18,19),(19,20)]

def plot_frame(frame, edges=[], idxs=[]):
    x = frame[0:21]
    y = frame[21:]
    ax.clear()
    
    ax.scatter(x, y, color='dodgerblue')
    if len(idxs) == 0:
        idxs = list(range(len(x)))
        
    for i in range(len(x)):
        ax.text(x[i], y[i], idxs[i])

    for edge in edges:
        ax.plot([x[edge[0]], x[edge[1]]], [y[edge[0]], y[edge[1]]], color='salmon')
        
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    plt.show()
    
    
def animate_frames(frames, edges=[], idxs=[]):
    anim = FuncAnimation(fig, lambda frame: plot_frame(frame,edges), frames=frames, interval=100)
    return HTML(anim.to_jshtml())

In [None]:
def load_relevant_data_subset(pq_path):
    return pd.read_parquet(pq_path, columns=SEL_COLS)

file_id = df.file_id.iloc[0]
inpdir = BASE_PATH+"train_landmarks"
pqfile = f"{inpdir}/{file_id}.parquet"
seq_refs = df.loc[df.file_id == file_id]
seqs = load_relevant_data_subset(pqfile)

seq_id = seq_refs.sequence_id.iloc[0]
frames = seqs.iloc[seqs.index == seq_id]
phrase = str(df.loc[df.sequence_id == seq_id].phrase.iloc[0])

In [None]:
@tf.function()
def resize_pad(x):
    if tf.shape(x)[0] < FRAME_LEN:
        x = tf.pad(x, ([[0, FRAME_LEN-tf.shape(x)[0]], [0, 0], [0, 0]]), constant_values=float("NaN"))
    else:
        x = tf.image.resize(x, (FRAME_LEN, tf.shape(x)[1]))
    return x

@tf.function(jit_compile=True)
def pre_process0(x):
    lip_x = tf.gather(x, LIP_IDX_X, axis=1)
    lip_y = tf.gather(x, LIP_IDX_Y, axis=1)
    lip_z = tf.gather(x, LIP_IDX_Z, axis=1)

    rhand_x = tf.gather(x, RHAND_IDX_X, axis=1)
    rhand_y = tf.gather(x, RHAND_IDX_Y, axis=1)
    rhand_z = tf.gather(x, RHAND_IDX_Z, axis=1)
    
    lhand_x = tf.gather(x, LHAND_IDX_X, axis=1)
    lhand_y = tf.gather(x, LHAND_IDX_Y, axis=1)
    lhand_z = tf.gather(x, LHAND_IDX_Z, axis=1)

    rpose_x = tf.gather(x, RPOSE_IDX_X, axis=1)
    rpose_y = tf.gather(x, RPOSE_IDX_Y, axis=1)
    rpose_z = tf.gather(x, RPOSE_IDX_Z, axis=1)
    
    lpose_x = tf.gather(x, LPOSE_IDX_X, axis=1)
    lpose_y = tf.gather(x, LPOSE_IDX_Y, axis=1)
    lpose_z = tf.gather(x, LPOSE_IDX_Z, axis=1)
    
    lip   = tf.concat([lip_x[..., tf.newaxis], lip_y[..., tf.newaxis], lip_z[..., tf.newaxis]], axis=-1)
    rhand = tf.concat([rhand_x[..., tf.newaxis], rhand_y[..., tf.newaxis], rhand_z[..., tf.newaxis]], axis=-1)
    lhand = tf.concat([lhand_x[..., tf.newaxis], lhand_y[..., tf.newaxis], lhand_z[..., tf.newaxis]], axis=-1)
    rpose = tf.concat([rpose_x[..., tf.newaxis], rpose_y[..., tf.newaxis], rpose_z[..., tf.newaxis]], axis=-1)
    lpose = tf.concat([lpose_x[..., tf.newaxis], lpose_y[..., tf.newaxis], lpose_z[..., tf.newaxis]], axis=-1)
    
    hand = tf.concat([rhand, lhand], axis=1)
    hand = tf.where(tf.math.is_nan(hand), 0.0, hand)
    mask = tf.math.not_equal(tf.reduce_sum(hand, axis=[1, 2]), 0.0)

    lip = lip[mask]
    rhand = rhand[mask]
    lhand = lhand[mask]
    rpose = rpose[mask]
    lpose = lpose[mask]

    return lip, rhand, lhand, rpose, lpose


@tf.function()
def pre_process1(lip, rhand, lhand, rpose, lpose):
    lip   = resize_pad((( lip) - LIPM) / LIPS)
    rhand = resize_pad(((rhand) - RHM) / RHS)
    lhand = resize_pad(((lhand) - LHM) / LHS)
    rpose = resize_pad(((rpose) - RPM) / RPS)
    lpose = resize_pad(((lpose) - LPM) / LPS)
    
    
    x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
    s = tf.shape(x)
    x = tf.reshape(x, (s[0], s[1]*s[2]))
    
    
    a=tf.reshape(x,(-1,92,3))
    X=a[:,:,0]
    Y=a[:,:,1]

    lip_x = tf.gather(X, LIP_IDX, axis=1)
    lip_y = tf.gather(Y, LIP_IDX, axis=1)

    rhand_x = tf.gather(X, RHAND_IDX, axis=1)
    rhand_y = tf.gather(Y, RHAND_IDX, axis=1)

    lhand_x = tf.gather(X, LHAND_IDX, axis=1)
    lhand_y = tf.gather(Y, LHAND_IDX, axis=1)

    rpose_x = tf.gather(X, RPOSE_IDX, axis=1)
    rpose_y = tf.gather(Y, RPOSE_IDX, axis=1)

    lpose_x = tf.gather(X, LPOSE_IDX, axis=1)
    lpose_y = tf.gather(Y, LPOSE_IDX, axis=1)

    lip   = tf.concat([lip_x[..., tf.newaxis], lip_y[..., tf.newaxis]], axis=-1)
    rhand = tf.concat([rhand_x[..., tf.newaxis], rhand_y[..., tf.newaxis]], axis=-1)
    lhand = tf.concat([lhand_x[..., tf.newaxis], lhand_y[..., tf.newaxis]], axis=-1)
    rpose = tf.concat([rpose_x[..., tf.newaxis], rpose_y[..., tf.newaxis]], axis=-1)
    lpose = tf.concat([lpose_x[..., tf.newaxis], lpose_y[..., tf.newaxis]], axis=-1)

    x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
    s = tf.shape(x)
    x = tf.reshape(x, (s[0], s[1]*s[2]))
    x = tf.where(tf.math.is_nan(x), 0.0, x)
    
    return x


pre0 = pre_process0(frames)
pre1 = pre_process1(*pre0)
INPUT_SHAPE = list(pre1.shape)
print(INPUT_SHAPE)

In [None]:
def interp1d_(x, target_len, method='random'):
    length = tf.shape(x)[1]
    target_len = tf.maximum(1,target_len)
    if method == 'random':
        if tf.random.uniform(()) < 0.33:
            x = tf.image.resize(x, (target_len,tf.shape(x)[1]),'bilinear')
        else:
            if tf.random.uniform(()) < 0.5:
                x = tf.image.resize(x, (target_len,tf.shape(x)[1]),'bicubic')
            else:
                x = tf.image.resize(x, (target_len,tf.shape(x)[1]),'nearest')
    else:
        x = tf.image.resize(x, (target_len,tf.shape(x)[1]),method)
    return x


def resample(x, rate):
    length = tf.shape(x)[0]
    new_size = tf.cast(rate*tf.cast(length,tf.float32), tf.int32)
    new_x = interp1d_(x, new_size)
    return new_x

MAX_LEN=FRAME_LEN
length=MAX_LEN
def temporal_crop(x,offset):
    x = x[offset:offset+length]
    return x

@tf.function()
def pre_processtrain(lip, rhand, lhand, rpose, lpose):
    rate=(0.8,1.2)
    rate = tf.random.uniform((), rate[0], rate[1])
    
    l = tf.shape(lip)[0]
    offset = tf.random.uniform((), 0, tf.clip_by_value(l-length,1,length), dtype=tf.int32)
    
    lip   = (((temporal_crop(resample(lip,rate),offset)) - LIPM) / LIPS)
    rhand = (((temporal_crop(resample(rhand,rate),offset)) - RHM) / RHS)
    lhand = (((temporal_crop(resample(lhand,rate),offset)) - LHM) / LHS)
    rpose = (((temporal_crop(resample(rpose,rate),offset)) - RPM) / RPS)
    lpose = (((temporal_crop(resample(lpose,rate),offset)) - LPM) / LPS)

    x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
    s = tf.shape(x)
    x = tf.reshape(x, (s[0], s[1]*s[2]))
    
    a=tf.reshape(x,(-1,92,3))
    X=a[:,:,0]
    Y=a[:,:,1]

    lip_x = tf.gather(X, LIP_IDX, axis=1)
    lip_y = tf.gather(Y, LIP_IDX, axis=1)

    rhand_x = tf.gather(X, RHAND_IDX, axis=1)
    rhand_y = tf.gather(Y, RHAND_IDX, axis=1)

    lhand_x = tf.gather(X, LHAND_IDX, axis=1)
    lhand_y = tf.gather(Y, LHAND_IDX, axis=1)

    rpose_x = tf.gather(X, RPOSE_IDX, axis=1)
    rpose_y = tf.gather(Y, RPOSE_IDX, axis=1)

    lpose_x = tf.gather(X, LPOSE_IDX, axis=1)
    lpose_y = tf.gather(Y, LPOSE_IDX, axis=1)

    lip   = tf.concat([lip_x[..., tf.newaxis], lip_y[..., tf.newaxis]], axis=-1)
    rhand = tf.concat([rhand_x[..., tf.newaxis], rhand_y[..., tf.newaxis]], axis=-1)
    lhand = tf.concat([lhand_x[..., tf.newaxis], lhand_y[..., tf.newaxis]], axis=-1)
    rpose = tf.concat([rpose_x[..., tf.newaxis], rpose_y[..., tf.newaxis]], axis=-1)
    lpose = tf.concat([lpose_x[..., tf.newaxis], lpose_y[..., tf.newaxis]], axis=-1)

    x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
    s = tf.shape(x)
    x = tf.reshape(x, (s[0], s[1]*s[2]))
    return x

# AUGMENTATIONS

In [None]:
MAXSHIFT=1
MINSHIFT=-1
ANGLE_DEGREE_RANGE=45
MAX_SHEAR=0.2
MIN_SHEAR=-0.2



X_idx_after_preprocess=list(range(0,40))  + list(range(120,141)) + list(range(183,204)) + list(range(246,251)) +list(range(261,266)) 
Y_idx_after_preprocess=list(range(40,80)) + list(range(141,162)) + list(range(204,225)) + list(range(251,256)) +list(range(266,271)) 
Z_idx_after_preprocess=list(range(80,120))+ list(range(162,183)) + list(range(225,246)) + list(range(256,261)) +list(range(271,276)) 

LIP_IDX  = list(range(0,40))
RHAND_IDX = list(range(40,61))
LHAND_IDX = list(range(61,82))
RPOSE_IDX = list(range(82,87))
LPOSE_IDX = list(range(87,92))

#FLIP
@tf.function()
def random_x_flip(x):
    if tf.random.uniform([])<0.5:
        a=tf.reshape(x,(-1,92,NUM_AXIS))
        X=a[:,:,0]
        Y=a[:,:,1]
        
        X=X*-1

        lip_x = tf.gather(X, LIP_IDX, axis=1)
        lip_y = tf.gather(Y, LIP_IDX, axis=1)
        
        rhand_x = tf.gather(X, RHAND_IDX, axis=1)
        rhand_y = tf.gather(Y, RHAND_IDX, axis=1)
        
        lhand_x = tf.gather(X, LHAND_IDX, axis=1)
        lhand_y = tf.gather(Y, LHAND_IDX, axis=1)

        rpose_x = tf.gather(X, RPOSE_IDX, axis=1)
        rpose_y = tf.gather(Y, RPOSE_IDX, axis=1)
        
        lpose_x = tf.gather(X, LPOSE_IDX, axis=1)
        lpose_y = tf.gather(Y, LPOSE_IDX, axis=1)
        
        lip   = tf.concat([lip_x[..., tf.newaxis], lip_y[..., tf.newaxis]], axis=-1)
        rhand = tf.concat([rhand_x[..., tf.newaxis], rhand_y[..., tf.newaxis]], axis=-1)
        lhand = tf.concat([lhand_x[..., tf.newaxis], lhand_y[..., tf.newaxis]], axis=-1)
        rpose = tf.concat([rpose_x[..., tf.newaxis], rpose_y[..., tf.newaxis]], axis=-1)
        lpose = tf.concat([lpose_x[..., tf.newaxis], lpose_y[..., tf.newaxis]], axis=-1)

        x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
        s = tf.shape(x)
        x = tf.reshape(x, (s[0], s[1]*s[2]))
    return x




#SHIFT
@tf.function()
def random_shift(x):
    x=x+tf.random.uniform([],minval=MINSHIFT,maxval=MAXSHIFT)
    return x



#ROTATE
@tf.function()
def rotate_xyz(x):
    angle=tf.random.uniform([],minval=-ANGLE_DEGREE_RANGE,maxval=ANGLE_DEGREE_RANGE)
    radian = angle/180*np.pi

    rotation_matrix = tf.stack([(tf.cos(radian), -tf.sin(radian)), (tf.sin(radian), tf.cos(radian))], axis=0)


    a=tf.reshape(x,(-1,92,NUM_AXIS))
    
    X=a[:,:,0]
    Y=a[:,:,1]


    lip_x = tf.gather(X, LIP_IDX, axis=1)
    lip_y = tf.gather(Y, LIP_IDX, axis=1)
    amd=tf.stack([lip_x,lip_y],axis=-1)@rotation_matrix
    lip_x=amd[:,:,0]
    lip_y=amd[:,:,1]


    rhand_x = tf.gather(X, RHAND_IDX, axis=1)
    rhand_y = tf.gather(Y, RHAND_IDX, axis=1)
    amd=tf.stack([rhand_x,rhand_y],axis=-1)@rotation_matrix
    rhand_x=amd[:,:,0]
    rhand_y=amd[:,:,1]
    
    
    lhand_x = tf.gather(X, LHAND_IDX, axis=1)
    lhand_y = tf.gather(Y, LHAND_IDX, axis=1)
    amd=tf.stack([lhand_x,lhand_y],axis=-1)@rotation_matrix
    lhand_x=amd[:,:,0]
    lhand_y=amd[:,:,1]

    
    rpose_x = tf.gather(X, RPOSE_IDX, axis=1)
    rpose_y = tf.gather(Y, RPOSE_IDX, axis=1)
    amd=tf.stack([rpose_x,rpose_y],axis=-1)@rotation_matrix
    rpose_x=amd[:,:,0]
    rpose_y=amd[:,:,1]
   

    lpose_x = tf.gather(X, LPOSE_IDX, axis=1)
    lpose_y = tf.gather(Y, LPOSE_IDX, axis=1)
    amd=tf.stack([lpose_x,lpose_y],axis=-1)@rotation_matrix
    lpose_x=amd[:,:,0]
    lpose_y=amd[:,:,1]

    
        
    lip   = tf.concat([lip_x[..., tf.newaxis], lip_y[..., tf.newaxis]], axis=-1)
    rhand = tf.concat([rhand_x[..., tf.newaxis], rhand_y[..., tf.newaxis]], axis=-1)
    lhand = tf.concat([lhand_x[..., tf.newaxis], lhand_y[..., tf.newaxis]], axis=-1)
    rpose = tf.concat([rpose_x[..., tf.newaxis], rpose_y[..., tf.newaxis]], axis=-1)
    lpose = tf.concat([lpose_x[..., tf.newaxis], lpose_y[..., tf.newaxis]], axis=-1)


    x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
    s = tf.shape(x)
    x = tf.reshape(x, (s[0], s[1]*s[2]))
    return x


#SHEAR
@tf.function()
def random_x_shear(x):
    a=tf.reshape(x,(-1,92,NUM_AXIS))

    X0 = a[:,:,0]
    Y0 = a[:,:,1]

    sx = tf.random.uniform([],minval=MIN_SHEAR,maxval=MAX_SHEAR)
    sy = tf.random.uniform([],minval=MIN_SHEAR,maxval=MAX_SHEAR)

    X = X0
    Y = Y0 + sy*X0

    lip_x = tf.gather(X, LIP_IDX, axis=1)
    lip_y = tf.gather(Y, LIP_IDX, axis=1)

    rhand_x = tf.gather(X, RHAND_IDX, axis=1)
    rhand_y = tf.gather(Y, RHAND_IDX, axis=1)

    lhand_x = tf.gather(X, LHAND_IDX, axis=1)
    lhand_y = tf.gather(Y, LHAND_IDX, axis=1)

    rpose_x = tf.gather(X, RPOSE_IDX, axis=1)
    rpose_y = tf.gather(Y, RPOSE_IDX, axis=1)

    lpose_x = tf.gather(X, LPOSE_IDX, axis=1)
    lpose_y = tf.gather(Y, LPOSE_IDX, axis=1)

    lip   = tf.concat([lip_x[..., tf.newaxis], lip_y[..., tf.newaxis]], axis=-1)
    rhand = tf.concat([rhand_x[..., tf.newaxis], rhand_y[..., tf.newaxis]], axis=-1)
    lhand = tf.concat([lhand_x[..., tf.newaxis], lhand_y[..., tf.newaxis]], axis=-1)
    rpose = tf.concat([rpose_x[..., tf.newaxis], rpose_y[..., tf.newaxis]], axis=-1)
    lpose = tf.concat([lpose_x[..., tf.newaxis], lpose_y[..., tf.newaxis]], axis=-1)

    x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
    s = tf.shape(x)
    x = tf.reshape(x, (s[0], s[1]*s[2]))
    return x
@tf.function()
def random_y_shear(x):
    a=tf.reshape(x,(-1,92,NUM_AXIS))

    X0 = a[:,:,0]
    Y0 = a[:,:,1]

    sx = tf.random.uniform([],minval=MIN_SHEAR,maxval=MAX_SHEAR)
    sy = tf.random.uniform([],minval=MIN_SHEAR,maxval=MAX_SHEAR)

    X = X0 + sx*Y0
    Y = Y0 

    lip_x = tf.gather(X, LIP_IDX, axis=1)
    lip_y = tf.gather(Y, LIP_IDX, axis=1)

    rhand_x = tf.gather(X, RHAND_IDX, axis=1)
    rhand_y = tf.gather(Y, RHAND_IDX, axis=1)

    lhand_x = tf.gather(X, LHAND_IDX, axis=1)
    lhand_y = tf.gather(Y, LHAND_IDX, axis=1)

    rpose_x = tf.gather(X, RPOSE_IDX, axis=1)
    rpose_y = tf.gather(Y, RPOSE_IDX, axis=1)

    lpose_x = tf.gather(X, LPOSE_IDX, axis=1)
    lpose_y = tf.gather(Y, LPOSE_IDX, axis=1)

    lip   = tf.concat([lip_x[..., tf.newaxis], lip_y[..., tf.newaxis]], axis=-1)
    rhand = tf.concat([rhand_x[..., tf.newaxis], rhand_y[..., tf.newaxis]], axis=-1)
    lhand = tf.concat([lhand_x[..., tf.newaxis], lhand_y[..., tf.newaxis]], axis=-1)
    rpose = tf.concat([rpose_x[..., tf.newaxis], rpose_y[..., tf.newaxis]], axis=-1)
    lpose = tf.concat([lpose_x[..., tf.newaxis], lpose_y[..., tf.newaxis]], axis=-1)

    x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
    s = tf.shape(x)
    x = tf.reshape(x, (s[0], s[1]*s[2]))
    return x


@tf.function()
def temporal_mask(x, size=(0.2,0.4), mask_value=float(0)):
    if tf.random.uniform(())<0.75:
        
        a =tf.reshape(x,(-1,92,NUM_AXIS))
        l = tf.shape(x)[0]
        
        mask_size = tf.random.uniform((), *size)
        mask_size = tf.cast(tf.cast(l, tf.float32) * mask_size, tf.int32)
        mask_offset = tf.random.uniform((), 0, tf.clip_by_value(l-mask_size,1,l), dtype=tf.int32)
        a = tf.tensor_scatter_nd_update(a,tf.range(mask_offset, mask_offset+mask_size)[...,None],tf.fill([mask_size,92,NUM_AXIS],mask_value))

    
        X = a[:,:,0]
        Y = a[:,:,1]

        lip_x = tf.gather(X, LIP_IDX, axis=1)
        lip_y = tf.gather(Y, LIP_IDX, axis=1)

        rhand_x = tf.gather(X, RHAND_IDX, axis=1)
        rhand_y = tf.gather(Y, RHAND_IDX, axis=1)

        lhand_x = tf.gather(X, LHAND_IDX, axis=1)
        lhand_y = tf.gather(Y, LHAND_IDX, axis=1)

        rpose_x = tf.gather(X, RPOSE_IDX, axis=1)
        rpose_y = tf.gather(Y, RPOSE_IDX, axis=1)

        lpose_x = tf.gather(X, LPOSE_IDX, axis=1)
        lpose_y = tf.gather(Y, LPOSE_IDX, axis=1)

        lip   = tf.concat([lip_x[..., tf.newaxis], lip_y[..., tf.newaxis]], axis=-1)
        rhand = tf.concat([rhand_x[..., tf.newaxis], rhand_y[..., tf.newaxis]], axis=-1)
        lhand = tf.concat([lhand_x[..., tf.newaxis], lhand_y[..., tf.newaxis]], axis=-1)
        rpose = tf.concat([rpose_x[..., tf.newaxis], rpose_y[..., tf.newaxis]], axis=-1)
        lpose = tf.concat([lpose_x[..., tf.newaxis], lpose_y[..., tf.newaxis]], axis=-1)

        x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
        s = tf.shape(x)
        x = tf.reshape(x, (s[0], s[1]*s[2]))
    return x

@tf.function()
def spatial_mask(x):
    if tf.random.uniform(())<0.75:
        x =tf.reshape(x,(-1,92,NUM_AXIS))
        
        size=(0.2,0.4)
        mask_value=float(0)
        mask_offset_y = tf.random.uniform(())
        mask_offset_x = tf.random.uniform(())
        mask_size = tf.random.uniform((), *size)
        mask_x = (mask_offset_x<x[...,0]) & (x[...,0] < mask_offset_x + mask_size)
        mask_y = (mask_offset_y<x[...,1]) & (x[...,1] < mask_offset_y + mask_size)
        mask = mask_x & mask_y
        x=tf.where(mask[...,None], mask_value, x)
    
        X = x[:,:,0]
        Y = x[:,:,1]

        lip_x = tf.gather(X, LIP_IDX, axis=1)
        lip_y = tf.gather(Y, LIP_IDX, axis=1)

        rhand_x = tf.gather(X, RHAND_IDX, axis=1)
        rhand_y = tf.gather(Y, RHAND_IDX, axis=1)

        lhand_x = tf.gather(X, LHAND_IDX, axis=1)
        lhand_y = tf.gather(Y, LHAND_IDX, axis=1)

        rpose_x = tf.gather(X, RPOSE_IDX, axis=1)
        rpose_y = tf.gather(Y, RPOSE_IDX, axis=1)

        lpose_x = tf.gather(X, LPOSE_IDX, axis=1)
        lpose_y = tf.gather(Y, LPOSE_IDX, axis=1)

        lip   = tf.concat([lip_x[..., tf.newaxis], lip_y[..., tf.newaxis]], axis=-1)
        rhand = tf.concat([rhand_x[..., tf.newaxis], rhand_y[..., tf.newaxis]], axis=-1)
        lhand = tf.concat([lhand_x[..., tf.newaxis], lhand_y[..., tf.newaxis]], axis=-1)
        rpose = tf.concat([rpose_x[..., tf.newaxis], rpose_y[..., tf.newaxis]], axis=-1)
        lpose = tf.concat([lpose_x[..., tf.newaxis], lpose_y[..., tf.newaxis]], axis=-1)

        x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
        s = tf.shape(x)
        x = tf.reshape(x, (s[0], s[1]*s[2]))
    
    return x
    
    
@tf.function()
def augment(x,y):
    #Temporal
    x = temporal_mask(x)
    #spacial
    x = spatial_mask(x)
    #Rotate
    x = rotate_xyz(x)
    #Flip
    x = random_x_flip(x)
    #Shift
    x = random_shift(x)
    #Shear
    x = random_x_shear(x)
    x = random_y_shear(x)
    
    
    x =tf.reshape(x,(-1,92,NUM_AXIS))
        
    
    
    X = x[:,:,0]
    Y = x[:,:,1]
    
    lip_x = tf.gather(X, LIP_IDX, axis=1)
    lip_y = tf.gather(Y, LIP_IDX, axis=1)

    rhand_x = tf.gather(X, RHAND_IDX, axis=1)
    rhand_y = tf.gather(Y, RHAND_IDX, axis=1)

    lhand_x = tf.gather(X, LHAND_IDX, axis=1)
    lhand_y = tf.gather(Y, LHAND_IDX, axis=1)

    rpose_x = tf.gather(X, RPOSE_IDX, axis=1)
    rpose_y = tf.gather(Y, RPOSE_IDX, axis=1)

    lpose_x = tf.gather(X, LPOSE_IDX, axis=1)
    lpose_y = tf.gather(Y, LPOSE_IDX, axis=1)

    lip   = tf.concat([lip_x[..., tf.newaxis], lip_y[..., tf.newaxis]], axis=-1)
    rhand = tf.concat([rhand_x[..., tf.newaxis], rhand_y[..., tf.newaxis]], axis=-1)
    lhand = tf.concat([lhand_x[..., tf.newaxis], lhand_y[..., tf.newaxis]], axis=-1)
    rpose = tf.concat([rpose_x[..., tf.newaxis], rpose_y[..., tf.newaxis]], axis=-1)
    lpose = tf.concat([lpose_x[..., tf.newaxis], lpose_y[..., tf.newaxis]], axis=-1)

    lip   = resize_pad(lip) 
    rhand = resize_pad(rhand)
    lhand = resize_pad(lhand)
    rpose = resize_pad(rpose)
    lpose = resize_pad(lpose)
    
    x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
    s = tf.shape(x)
    x = tf.reshape(x, (s[0], s[1]*s[2]))
    x = tf.where(tf.math.is_nan(x), 0.0, x)
    return x,y

In [None]:
def decode_fn(record_bytes):
    schema = {
        "lip": tf.io.VarLenFeature(tf.float32),
        "rhand": tf.io.VarLenFeature(tf.float32),
        "lhand": tf.io.VarLenFeature(tf.float32),
        "rpose": tf.io.VarLenFeature(tf.float32),
        "lpose": tf.io.VarLenFeature(tf.float32),
        "phrase": tf.io.VarLenFeature(tf.int64)
    }
    x = tf.io.parse_single_example(record_bytes, schema)

    lip = tf.reshape(tf.sparse.to_dense(x["lip"]), (-1, 40, 3))
    rhand = tf.reshape(tf.sparse.to_dense(x["rhand"]), (-1, 21, 3))
    lhand = tf.reshape(tf.sparse.to_dense(x["lhand"]), (-1, 21, 3))
    rpose = tf.reshape(tf.sparse.to_dense(x["rpose"]), (-1, 5, 3))
    lpose = tf.reshape(tf.sparse.to_dense(x["lpose"]), (-1, 5, 3))
    phrase = tf.sparse.to_dense(x["phrase"])

    return lip, rhand, lhand, rpose, lpose, phrase

def pre_process_fntrain(lip, rhand, lhand, rpose, lpose, phrase):
    phrase = tf.pad(phrase, [[0, MAX_PHRASE_LENGTH-tf.shape(phrase)[0]]], constant_values=pad_token_idx)
    return pre_processtrain(lip, rhand, lhand, rpose, lpose), phrase

def pre_process_fn(lip, rhand, lhand, rpose, lpose, phrase):
    phrase = tf.pad(phrase, [[0, MAX_PHRASE_LENGTH-tf.shape(phrase)[0]]], constant_values=pad_token_idx)
    return pre_process1(lip, rhand, lhand, rpose, lpose), phrase
    

train_batch_size = 128
val_batch_size = 128

    
    
tffiles=glob.glob(BASE_PATH_DATA+"tfds/*")
val_len = 1

val_files=[i for i in tffiles if i[len(BASE_PATH_DATA+"tfds/"):]=="5414471.tfrecord"]
train_files=[i for i in tffiles if i not in val_files]
random.shuffle(train_files)

train_batch_size = 128
val_batch_size = 128

train_dataset =  tf.data.TFRecordDataset(train_files).shuffle(2000).prefetch(tf.data.AUTOTUNE).map(decode_fn, num_parallel_calls=tf.data.AUTOTUNE).map(pre_process_fntrain, num_parallel_calls=tf.data.AUTOTUNE).map(augment).batch(train_batch_size).prefetch(tf.data.AUTOTUNE)
val_dataset =  tf.data.TFRecordDataset([val_files]).prefetch(tf.data.AUTOTUNE).map(decode_fn, num_parallel_calls=tf.data.AUTOTUNE).map(pre_process_fn, num_parallel_calls=tf.data.AUTOTUNE).batch(val_batch_size).prefetch(tf.data.AUTOTUNE)

batch = next(iter(val_dataset))
batch[0].shape, batch[1].shape

In [None]:
tffiles1=glob.glob("/kaggle/input/makingdf-with/tfds1/*")

In [None]:
len(train_files)

In [None]:
"/kaggle/input/aslfr-preprocess-dataset-tfrecords-mean-std-mine/tfds/33432165.tfrecord"

In [None]:
for example,phrase in train_dataset:
    print(example.shape)
    break
x=example[7]
print(x)
x=tf.reshape(x,(-1,92,NUM_AXIS))
X=tf.gather(x[:,:,0],RHAND_IDX,axis=1)[0:20]
Y=tf.gather(x[:,:,1],RHAND_IDX,axis=1)[0:20]
frames = tf.concat([X,Y],axis=1)
animate_frames(frames,edges)

In [None]:
for example,phrase in train_dataset:
    print(example.shape)
    break
x=example[7]
print(x)
x=tf.reshape(x,(-1,92,NUM_AXIS))
X=tf.gather(x[:,:,0],RHAND_IDX,axis=1)[0:20]
Y=tf.gather(x[:,:,1],RHAND_IDX,axis=1)[0:20]
frames = tf.concat([X,Y],axis=1)
animate_frames(frames,edges)

# MODEL

In [None]:
#Channel Attention
class ECA(tf.keras.layers.Layer):
    def __init__(self, kernel_size=5, **kwargs):
        super().__init__(**kwargs)
        self.supports_masking = True
        self.kernel_size = kernel_size
        self.conv = tf.keras.layers.Conv1D(1, kernel_size=kernel_size, strides=1, padding="same", use_bias=False)

    def call(self, inputs, mask=None):
        nn = tf.keras.layers.GlobalAveragePooling1D()(inputs, mask=mask)
        nn = tf.expand_dims(nn, -1)
        nn = self.conv(nn)
        nn = tf.squeeze(nn, -1)
        nn = tf.nn.sigmoid(nn)
        nn = nn[:,None,:]
        return inputs * nn

#a separate filter to each input channel. This is different from a standard convolution, which applies the same filter to all input channels. 
#Depthwise convolutions are computationally efficient and can be 
#effective in capturing fine-grained features.
class CausalDWConv1D(tf.keras.layers.Layer):
    def __init__(self, 
        kernel_size=17,
        dilation_rate=1,
        use_bias=False,
        depthwise_initializer='glorot_uniform',
        name='', **kwargs):
        super().__init__(name=name,**kwargs)
        self.causal_pad = tf.keras.layers.ZeroPadding1D((dilation_rate*(kernel_size-1),0),name=name + '_pad')
        self.dw_conv = tf.keras.layers.DepthwiseConv1D(
                            kernel_size,
                            strides=1,
                            dilation_rate=dilation_rate,
                            padding='valid',
                            use_bias=use_bias,
                            depthwise_initializer=depthwise_initializer,
                            name=name + '_dwconv')
        
        self.supports_masking = True
        
    def call(self, inputs):
        x = self.causal_pad(inputs)
        x = self.dw_conv(x)
        return x

def Conv1DBlock(channel_size,
          kernel_size,
          dilation_rate=1,
          drop_rate=0.0,
          expand_ratio=2,
          se_ratio=0.25,
          activation='swish',
          name=None):
    if name is None:
        name = str(tf.keras.backend.get_uid("mbblock"))
    def apply(inputs):
        channels_in = tf.keras.backend.int_shape(inputs)[-1]
        channels_expand = channels_in * expand_ratio

        skip = inputs

        x = tf.keras.layers.Dense(
            channels_expand,
            use_bias=True,
            activation=activation,
            name=name + '_expand_conv')(inputs)

        # Depthwise Convolution
        x = CausalDWConv1D(kernel_size,
            dilation_rate=dilation_rate,
            use_bias=False,
            name=name + '_dwconv')(x)

        x = tf.keras.layers.BatchNormalization(momentum=0.95, name=name + '_bn')(x)

        x  = ECA()(x)

        x = tf.keras.layers.Dense(
            channel_size,
            use_bias=True,
            name=name + '_project_conv')(x)

        if drop_rate > 0:
            x = tf.keras.layers.Dropout(drop_rate, noise_shape=(None,1,1), name=name + '_drop')(x)

        if (channels_in == channel_size):
            x = tf.keras.layers.add([x, skip], name=name + '_add')
        return x

    return apply

class MultiHeadSelfAttention(tf.keras.layers.Layer):
    def __init__(self, dim=256, num_heads=4, dropout=0, **kwargs):
        super().__init__(**kwargs)
        self.dim = dim
        self.scale = self.dim ** -0.5
        self.num_heads = num_heads
        self.qkv = tf.keras.layers.Dense(3 * dim, use_bias=False)
        self.drop1 = tf.keras.layers.Dropout(dropout)
        self.proj = tf.keras.layers.Dense(dim, use_bias=False)
        self.supports_masking = True

    def call(self, inputs, mask=None):
        qkv = self.qkv(inputs)
        qkv = tf.keras.layers.Permute((2, 1, 3))(tf.keras.layers.Reshape((-1, self.num_heads, self.dim * 3 // self.num_heads))(qkv))
        q, k, v = tf.split(qkv, [self.dim // self.num_heads] * 3, axis=-1)

        attn = tf.matmul(q, k, transpose_b=True) * self.scale

        if mask is not None:
            mask = mask[:, None, None, :]

        attn = tf.keras.layers.Softmax(axis=-1)(attn, mask=mask)
        attn = self.drop1(attn)

        x = attn @ v
        x = tf.keras.layers.Reshape((-1, self.dim))(tf.keras.layers.Permute((2, 1, 3))(x))
        x = self.proj(x)
        return x


def TransformerBlock(dim=256, num_heads=6, expand=4, attn_dropout=0.2, drop_rate=0.2, activation='swish'):
    def apply(inputs):
        x = inputs
        x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)
        x = MultiHeadSelfAttention(dim=dim,num_heads=num_heads,dropout=attn_dropout)(x)
        x = tf.keras.layers.Dropout(drop_rate, noise_shape=(None,1,1))(x)
        x = tf.keras.layers.Add()([inputs, x])
        attn_out = x

        x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)
        x = tf.keras.layers.Dense(dim*expand, use_bias=False, activation=activation)(x)
        x = tf.keras.layers.Dense(dim, use_bias=False)(x)
        x = tf.keras.layers.Dropout(drop_rate, noise_shape=(None,1,1))(x)
        x = tf.keras.layers.Add()([attn_out, x])
        return x
    return apply

def positional_encoding(maxlen, num_hid):
        depth = num_hid/2
        positions = tf.range(maxlen, dtype = tf.float32)[..., tf.newaxis]
        depths = tf.range(depth, dtype = tf.float32)[np.newaxis, :]/depth
        angle_rates = tf.math.divide(1, tf.math.pow(tf.cast(10000, tf.float32), depths))
        angle_rads = tf.linalg.matmul(positions, angle_rates)
        pos_encoding = tf.concat(
          [tf.math.sin(angle_rads), tf.math.cos(angle_rads)],
          axis=-1)
        return pos_encoding
    
class LateDropout(tf.keras.layers.Layer):
    def __init__(self, rate, noise_shape=None, start_step=0, **kwargs):
        super().__init__(**kwargs)
        self.supports_masking = True
        self.rate = rate
        self.start_step = start_step
        self.dropout = tf.keras.layers.Dropout(rate, noise_shape=noise_shape)
      
    def build(self, input_shape):
        super().build(input_shape)
        agg = tf.VariableAggregation.ONLY_FIRST_REPLICA
        self._train_counter = tf.Variable(0, dtype="int64", aggregation=agg, trainable=False)

    def call(self, inputs, training=False):
        x = tf.cond(self._train_counter < self.start_step, lambda:inputs, lambda:self.dropout(inputs, training=training))
        if training:
            self._train_counter.assign_add(1)
        return x

In [None]:
def CTCLoss(labels, logits):
    label_length = tf.reduce_sum(tf.cast(labels != pad_token_idx, tf.int32), axis=-1)
    logit_length = tf.ones(tf.shape(logits)[0], dtype=tf.int32) * tf.shape(logits)[1]
    loss = tf.nn.ctc_loss(
            labels=labels,
            logits=logits,
            label_length=label_length,
            logit_length=logit_length,
            blank_index=pad_token_idx,
            logits_time_major=False
        )
    loss = tf.reduce_mean(loss)
    return loss

In [None]:
"""inp = tf.keras.Input(INPUT_SHAPE)

x = tf.keras.layers.Masking(mask_value=0.0)(inp)
x = tf.keras.layers.Dense(dim, use_bias=False,name='stem_conv')(x) + positional_encoding(INPUT_SHAPE[0], dim)
pe = tf.cast(positional_encoding(INPUT_SHAPE[0], dim), dtype=x.dtype)
x = x + pe
x = tf.keras.layers.BatchNormalization(momentum=0.95,name='stem_bn')(x)

for i in range(NUM_BLOCKS):
    x = Conv1DBlock(dim, 11, drop_rate=DROP_RATE)(x)
    x = Conv1DBlock(dim,  5, drop_rate=DROP_RATE)(x)
    x = Conv1DBlock(dim,  3, drop_rate=DROP_RATE)(x)
    x = TransformerBlock(dim, expand=2)(x)


x = tf.keras.layers.Dense(dim*2,activation='relu',name='top_conv')(x)
x = tf.keras.layers.Dropout(0.8)(x)
x = tf.keras.layers.Dense(len(char_to_num))(x)

model = tf.keras.Model(inp, x)

loss = CTCLoss

# Adam Optimizer
optimizer = tfa.optimizers.RectifiedAdam(sma_threshold=4)
optimizer = tfa.optimizers.Lookahead(optimizer, sync_period=5)

model.compile(loss=loss, optimizer=optimizer)

return model"""

In [None]:
j=1
k=0.059
for i in range(6):
    print(j)
    j=j-k*1

In [None]:
92*2

In [None]:
INPUT_SHAPE=example[0].shape

DROP_RATE=0.4
NUM_BLOCKS=3


def get_model(dim = 384):
    inp = tf.keras.Input(INPUT_SHAPE)
    
    x = tf.keras.layers.Masking(mask_value=0.0)(inp)
    x = tf.keras.layers.Dense(dim, use_bias=False, name='stem_conv')(x)
    
    pe = tf.cast(positional_encoding(INPUT_SHAPE[0], dim), dtype=x.dtype)
    x = x + pe
    
    x = tf.keras.layers.BatchNormalization(momentum=0.95,name='stem_bn')(x)
    
    for i in range(NUM_BLOCKS):
        x = Conv1DBlock(dim, 11, drop_rate=DROP_RATE)(x)
        x = Conv1DBlock(dim,  5, drop_rate=DROP_RATE)(x)
        x = Conv1DBlock(dim,  3, drop_rate=DROP_RATE)(x)
        x = TransformerBlock(dim, expand=2)(x)


    x = tf.keras.layers.Dense(dim*2,activation='relu',name='top_conv')(x)
    x = tf.keras.layers.Dropout(0.8)(x)
    x = tf.keras.layers.Dense(len(char_to_num))(x)

    model = tf.keras.Model(inp, x)

    loss = CTCLoss
    
    # Adam Optimizer
    optimizer = tfa.optimizers.RectifiedAdam(sma_threshold=4)
    optimizer = tfa.optimizers.Lookahead(optimizer, sync_period=5)

    model.compile(loss=loss, optimizer=optimizer)

    return model

tf.keras.backend.clear_session()

model1 = get_model()
model1(batch[0])

model2 = get_model()
model2(batch[0])

model3 = get_model()
model3(batch[0])

model = get_model()
model(batch[0])
model.summary()

In [None]:
def num_to_char_fn(y):
    return [num_to_char.get(x, "") for x in y]

@tf.function()
def decode_phrase(pred):
    x = tf.argmax(pred, axis=1)
    diff = tf.not_equal(x[:-1], x[1:])
    adjacent_indices = tf.where(diff)[:, 0]
    x = tf.gather(x, adjacent_indices)
    mask = x != pad_token_idx
    x = tf.boolean_mask(x, mask, axis=0)
    return x

# A utility function to decode the output of the network
def decode_batch_predictions(pred):
    output_text = []
    for result in pred:
        result = "".join(num_to_char_fn(decode_phrase(result).numpy()))
        output_text.append(result)
    return output_text

In [None]:
with open ("/kaggle/input/asl-fingerspelling/character_to_prediction_index.json", "r") as f:
    character_map = json.load(f)
rev_character_map = {j:i for i,j in character_map.items()}

# A callback class to output a few transcriptions during training
class CallbackEval(tf.keras.callbacks.Callback):
    """Displays a batch of outputs after every epoch."""

    def __init__(self, dataset):
        super().__init__()
        self.dataset = dataset

    def on_epoch_end(self, epoch: int, logs=None):
        model.save_weights("model"+str(epoch+1)+".h5")
        predictions = []
        targets = []
        for batch in self.dataset:
            X, y = batch
            batch_predictions = model(X)
            batch_predictions = decode_batch_predictions(batch_predictions)
            predictions.extend(batch_predictions)
            for label in y:
                label =  "".join([rev_character_map.get(int(s),"") for s in label])
                targets.append(label)

        print("-" * 100)
        # for i in np.random.randint(0, len(predictions), 2):
        L=[]
        N=[]
        for i in range(32):
            print(f"Target    : {targets[i]}")
            print(f"Prediction: {predictions[i]}, len: {len(predictions[i])}, LEV: {lev.distance(targets[i], predictions[i])}")
            print("")


        for i in range(len(targets)):
            N.append(len(targets[i]))
            l=lev.distance(targets[i], predictions[i])
            L.append(l)
            
        
        print("")
        print("LEVDIST Total:",sum(L))
        print("LEVDIST Normalized:",(sum(N) -sum(L))/sum(N))
        print("")

# Callback function to check transcription on the val set.
validation_callback = CallbackEval(val_dataset.take(1))

In [None]:
N_EPOCHS = 600
N_WARMUP_EPOCHS = 10
LR_MAX = 1e-3
WD_RATIO = 0.05
WARMUP_METHOD = "exp"

In [None]:
def lrfn(current_step, num_warmup_steps, lr_max, num_cycles=0.50, num_training_steps=N_EPOCHS):
    if current_step < num_warmup_steps:
        if WARMUP_METHOD == 'log':
            return lr_max * 0.10 ** (num_warmup_steps - current_step)
        else:
            return lr_max * 2 ** -(num_warmup_steps - current_step)
    else:
        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) * lr_max
    
def plot_lr_schedule(lr_schedule, epochs):
    fig = plt.figure(figsize=(20, 10))
    plt.plot([None] + lr_schedule + [None])
    # X Labels
    x = np.arange(1, epochs + 1)
    x_axis_labels = [i if epochs <= 40 or i % 5 == 0 or i == 1 else None for i in range(1, epochs + 1)]
    plt.xlim([1, epochs])
    plt.xticks(x, x_axis_labels) # set tick step to 1 and let x axis start at 1
    
    # Increase y-limit for better readability
    plt.ylim([0, max(lr_schedule) * 1.1])
    
    # Title
    schedule_info = f'start: {lr_schedule[0]:.1E}, max: {max(lr_schedule):.1E}, final: {lr_schedule[-1]:.1E}'
    plt.title(f'Step Learning Rate Schedule, {schedule_info}', size=18, pad=12)
    
    # Plot Learning Rates
    for x, val in enumerate(lr_schedule):
        if epochs <= 40 or x % 5 == 0 or x is epochs - 1:
            if x < len(lr_schedule) - 1:
                if lr_schedule[x - 1] < val:
                    ha = 'right'
                else:
                    ha = 'left'
            elif x == 0:
                ha = 'right'
            else:
                ha = 'left'
            plt.plot(x + 1, val, 'o', color='black');
            offset_y = (max(lr_schedule) - min(lr_schedule)) * 0.02
            plt.annotate(f'{val:.1E}', xy=(x + 1, val + offset_y), size=12, ha=ha)
    
    plt.xlabel('Epoch', size=16, labelpad=5)
    plt.ylabel('Learning Rate', size=16, labelpad=5)
    plt.grid()
    plt.show()

# Learning rate for encoder
LR_SCHEDULE = [lrfn(step, num_warmup_steps=N_WARMUP_EPOCHS, lr_max=LR_MAX, num_cycles=0.50) for step in range(N_EPOCHS)]
# Plot Learning Rate Schedule
plot_lr_schedule(LR_SCHEDULE, epochs=N_EPOCHS)
# Learning Rate Callback
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=0)

# Custom callback to update weight decay with learning rate
class WeightDecayCallback(tf.keras.callbacks.Callback):
    def __init__(self, wd_ratio=WD_RATIO):
        self.step_counter = 0
        self.wd_ratio = wd_ratio
    
    def on_epoch_begin(self, epoch, logs=None):
        model.optimizer.weight_decay = model.optimizer.learning_rate * self.wd_ratio
        print(f'learning rate: {model.optimizer.learning_rate.numpy():.2e}, weight decay: {model.optimizer.weight_decay.numpy():.2e}')

In [None]:
class earylstopatepoch(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if epoch+1>=500:
            self.model.stop_training = True
esep=earylstopatepoch()

In [None]:
models = []

model1.load_weights("/kaggle/input/xy-128frame-supplemental/model880.h5")


model2.load_weights("/kaggle/input/xy-128frame-supplemental/model900.h5")

model3.load_weights("/kaggle/input/xy-128frame-supplemental/model700.h5")

models = [model1,model2,model3]

weights = [model_t.get_weights() for model_t in models]

new_weights = list()
for weights_list_tuple in zip(*weights): 
    new_weights.append(
        np.array([np.array(w).mean(axis=0) for w in zip(*weights_list_tuple)])
    )

    
    

model.set_weights(new_weights)
validation_callback.on_epoch_end(0,1)
models=[model]

In [None]:
"""path="/kaggle/input/xy-128frame-supplemental/model900.h5"
model.load_weights(path)
models=[model]
validation_callback.on_epoch_end(0,1)"""

In [None]:
"""history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=N_EPOCHS,
    initial_epoch=150,
    callbacks=[
        validation_callback,
        lr_callback,
        WeightDecayCallback(),
        esep
    ]
)
models=[model]"""

In [None]:
@tf.function()
def pre_process1xyz(lip, rhand, lhand, rpose, lpose):
    lip   = resize_pad((( lip) - LIPM) / LIPS)
    rhand = resize_pad(((rhand) - RHM) / RHS)
    lhand = resize_pad(((lhand) - LHM) / LHS)
    rpose = resize_pad(((rpose) - RPM) / RPS)
    lpose = resize_pad(((lpose) - LPM) / LPS)
    
    
    x = tf.concat([lip, rhand, lhand, rpose, lpose], axis=1)
    s = tf.shape(x)
    x = tf.reshape(x, (s[0], s[1]*s[2]))
    x = tf.where(tf.math.is_nan(x), 0.0, x)
    return x

INPUT_SHAPE=example[0].shape

DROP_RATE=0.4
NUM_BLOCKS=3


def get_model(dim = 384):
    inp = tf.keras.Input((128,276))
    
    x = tf.keras.layers.Masking(mask_value=0.0)(inp)
    x = tf.keras.layers.Dense(dim, use_bias=False, name='stem_conv')(x)
    
    pe = tf.cast(positional_encoding(INPUT_SHAPE[0], dim), dtype=x.dtype)
    x = x + pe
    
    x = tf.keras.layers.BatchNormalization(momentum=0.95,name='stem_bn')(x)
    
    for i in range(NUM_BLOCKS):
        x = Conv1DBlock(dim, 11, drop_rate=DROP_RATE)(x)
        x = Conv1DBlock(dim,  5, drop_rate=DROP_RATE)(x)
        x = Conv1DBlock(dim,  3, drop_rate=DROP_RATE)(x)
        x = TransformerBlock(dim, expand=2)(x)


    x = tf.keras.layers.Dense(dim*2,activation='relu',name='top_conv')(x)
    x = tf.keras.layers.Dropout(0.8)(x)
    x = tf.keras.layers.Dense(len(char_to_num))(x)

    model = tf.keras.Model(inp, x)

    loss = CTCLoss
    
    # Adam Optimizer
    optimizer = tfa.optimizers.RectifiedAdam(sma_threshold=4)
    optimizer = tfa.optimizers.Lookahead(optimizer, sync_period=5)

    model.compile(loss=loss, optimizer=optimizer)

    return model

tf.keras.backend.clear_session()

b=np.random.random((1,128,276))

modelxyz_1 = get_model()
modelxyz_1(b)

modelxyz_2 = get_model()
modelxyz_2(b)


modelxyz = get_model()
modelxyz(b)
modelxyz.summary()

models = []

modelxyz_1.load_weights("/kaggle/input/xy-128frame-supplemental/xyzmodel650.h5")

modelxyz_2.load_weights("/kaggle/input/xy-128frame-supplemental/xyzmodel880.h5")

#model3.load_weights("/kaggle/input/xy-128frame-supplemental/model700.h5")

models = [modelxyz_1,modelxyz_2]

weights = [model_t.get_weights() for model_t in models]

new_weights = list()
for weights_list_tuple in zip(*weights): 
    new_weights.append(
        np.array([np.array(w).mean(axis=0) for w in zip(*weights_list_tuple)])
    )

modelxyz.set_weights(new_weights)




models=[model,modelxyz]

In [None]:
print(models)

In [None]:
class TFLiteModel(tf.Module):
    def __init__(self, model):
        super(TFLiteModel, self).__init__()
        self.models = model
    
    @tf.function(input_signature=[tf.TensorSpec(shape=[None, len(SEL_COLS)], dtype=tf.float32, name='inputs')])
    def __call__(self, inputs, training=False):
        # Preprocess Data
        x = tf.cast(inputs, tf.float32)
        x = x[None]
        x = tf.cond(tf.shape(x)[1] == 0, lambda: tf.zeros((1, 1, len(SEL_COLS))), lambda: tf.identity(x))
        x = x[0]
        x = pre_process0(x)
        
        x1 = pre_process1(*x)
        x2 = pre_process1xyz(*x)
        
        x1 = tf.reshape(x1, INPUT_SHAPE)
        x1 = x1[None]
        
        x2 = tf.reshape(x2, (128,276))
        x2 = x2[None]
        
        x1  =self.models[0](x1)
        x2  =self.models[1](x2)
        
        x = tf.concat([x1,x2],axis=0)
        
    
        x = tf.reduce_max(x, axis=0, keepdims=True)
        
        
        x = x[0]
        x = decode_phrase(x)
        x = tf.cond(tf.shape(x)[0] == 0, lambda: tf.zeros(1, tf.int64), lambda: tf.identity(x))
        x = tf.one_hot(x, 59)
        return {'outputs': x}

tflitemodel_base = TFLiteModel(models)

In [None]:
pqfiles = df.file_id.unique()
val_len = int(0.05 * len(pqfiles))
def create_data_gen(file_ids, y_mul=1):
    def gen():
        for file_id in file_ids:
            pqfile = f"{inpdir}/{file_id}.parquet"
            seq_refs = df.loc[df.file_id == file_id]
            seqs = load_relevant_data_subset(pqfile)

            for seq_id in seq_refs.sequence_id:
                x = seqs.iloc[seqs.index == seq_id].to_numpy()
                y = str(df.loc[df.sequence_id == seq_id].phrase.iloc[0])
                
                r_nonan = np.sum(np.sum(np.isnan(x[:, RHAND_IDX_X]), axis = 1) == 0)
                l_nonan = np.sum(np.sum(np.isnan(x[:, LHAND_IDX_X]), axis = 1) == 0)
                no_nan = max(r_nonan, l_nonan)
                
                if y_mul*len(y)<no_nan:
                    yield x, y
    return gen

test_dataset = tf.data.Dataset.from_generator(create_data_gen(pqfiles[:val_len], 0),
    output_signature=(tf.TensorSpec(shape=(None, len(SEL_COLS)), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.string))
).prefetch(buffer_size=2000)

for frame, target in test_dataset.skip(100).take(10):
    frame
    break
tflitemodel_base(frame)

In [None]:
keras_model_converter = tf.lite.TFLiteConverter.from_keras_model(tflitemodel_base)
keras_model_converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]#, tf.lite.OpsSet.SELECT_TF_OPS]
keras_model_converter.optimizations = [tf.lite.Optimize.DEFAULT]
keras_model_converter.target_spec.supported_types = [tf.float16]
tflite_model = keras_model_converter.convert()
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)
    
with open('inference_args.json', "w") as f:
    json.dump({"selected_columns" : SEL_COLS}, f)
    
!zip submission.zip  './model.tflite' './inference_args.json'

In [None]:
with open ("inference_args.json", "r") as f:
    SEL_COLS = json.load(f)["selected_columns"]
    
def load_relevant_data_subset(pq_path):
    return pd.read_parquet(pq_path, columns=SEL_COLS)

def create_data_gen(file_ids, y_mul=1):
    def gen():
        for file_id in file_ids:
            pqfile = f"{inpdir}/{file_id}.parquet"
            seq_refs = df.loc[df.file_id == file_id]
            seqs = load_relevant_data_subset(pqfile)

            for seq_id in seq_refs.sequence_id:
                x = seqs.iloc[seqs.index == seq_id].to_numpy()
                y = str(df.loc[df.sequence_id == seq_id].phrase.iloc[0])
                
                r_nonan = np.sum(np.sum(np.isnan(x[:, RHAND_IDX_X]), axis = 1) == 0)
                l_nonan = np.sum(np.sum(np.isnan(x[:, LHAND_IDX_X]), axis = 1) == 0)
                no_nan = max(r_nonan, l_nonan)
                
                if y_mul*len(y)<no_nan:
                    yield x, y
    return gen

pqfiles = df.file_id.unique()
val_len = int(0.05 * len(pqfiles))

test_dataset = tf.data.Dataset.from_generator(create_data_gen(pqfiles[:val_len], 0),
    output_signature=(tf.TensorSpec(shape=(None, len(SEL_COLS)), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.string))
).prefetch(buffer_size=2000)

In [None]:
interpreter = tf.lite.Interpreter("model.tflite")

REQUIRED_SIGNATURE = "serving_default"
REQUIRED_OUTPUT = "outputs"

with open (BASE_PATH+"character_to_prediction_index.json", "r") as f:
    character_map = json.load(f)
rev_character_map = {j:i for i,j in character_map.items()}

prediction_fn = interpreter.get_signature_runner(REQUIRED_SIGNATURE)

for frame, target in test_dataset.skip(100).take(10):
    output = prediction_fn(inputs=frame)
    prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(output[REQUIRED_OUTPUT], axis=1)])
    target = target.numpy().decode("utf-8")
    print("pred =", prediction_str, "; target =", target,"; lev =",lev.distance(prediction_str,target))

In [None]:
%%timeit -n 10
output = prediction_fn(inputs=frame)

In [None]:
from Levenshtein import distance

scores = []
j=1
for i, (frame, target) in tqdm(enumerate(test_dataset.take(1000))):
    output = prediction_fn(inputs=frame)
    prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(output[REQUIRED_OUTPUT], axis=1)])
    target = target.numpy().decode("utf-8")
    score = (len(target) - distance(prediction_str, target)) / len(target)
    scores.append(score)
    if i % 50 == 0:
        print(j," ",np.sum(scores) / len(scores))
        j=j+1
    
scores = np.array(scores)
print("Mean : ",np.sum(scores) / len(scores))