In [None]:
!pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import random
from types import SimpleNamespace
#from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint
import matplotlib.pyplot as plt
import os
import tensorflow_addons as tfa
import math
import tensorflow_probability as tfp


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [None]:
def seed_everything(seed=000):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything()

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

AUTO = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print("REPLICAS: ", REPLICAS)

Running on TPU  grpc://10.71.107.202:8470
REPLICAS:  8


In [None]:
config = SimpleNamespace(**{})
config.project = 'sign_language_recognition'
config.name = 'islr_transformer_pretraining'
config.num_folds = 4
config.batch_size = 8 * REPLICAS
config.lr_max_ph1 = 1.25e-5  * REPLICAS
config.lr_max = 1.25e-5 * REPLICAS
config.warmup_steps = 5
config.rate=0.8
config.max_frames = 537
config.embed_dim = 384
config.num_heads = 4
config.num_blocks = 1
config.attention_dropout = 0.1
config.dropout = 0.2
config.embed_dropout = 0.3
config.label_smoothing = 0.0
config.phase1_epochs=100
config.epochs=100
config.wandb = True
config.wd_ratio = 0.01
config.mixed_dtype = 'mixed_bfloat16' if tpu else 'mixed_float16'
config.dtype = tf.bfloat16 if tpu else tf.float16

In [None]:
keras.mixed_precision.set_global_policy(config.mixed_dtype)

In [None]:
GCP = 'gs://kds-43ba30170ce0a373bcf241ac6c08eac590e0bd136768facb719906d2'

GCP

'gs://kds-43ba30170ce0a373bcf241ac6c08eac590e0bd136768facb719906d2'

In [None]:
files = tf.io.gfile.glob(f'{GCP}/*.tfrec')
len(files)

90

In [None]:
REYE = [
    33, 7, 163, 144, 145, 153, 154, 155, 133,
    246, 161, 160, 159, 158, 157, 173,
]
LEYE = [
    263, 249, 390, 373, 374, 380, 381, 382, 362,
    466, 388, 387, 386, 385, 384, 398,
]
NOSE=[
    1,2,98,327
]
SLIP = [
    78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
    191, 80, 81, 82, 13, 312, 311, 310, 415,
]
SPOSE = (np.array([
    11,13,15,12,14,16,23,24,
])+489).tolist()


lip_landmarks = tf.constant([61, 185, 40, 39, 37,  0, 267, 269, 270, 409,
                 291,146, 91,181, 84, 17, 314, 405, 321, 375, 
                 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 
                 95, 88, 178, 87, 14,317, 402, 318, 324, 308])
spose = tf.constant([
            504, 502, 500, 501, 503, 505, 512, 513])
triu_index = tf.constant([
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
            14, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 27, 28,
            29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
            58, 59, 60, 61, 62, 67, 68, 69, 70, 71, 72, 73, 74,
            75, 76, 77, 78, 79, 80, 81, 82, 83, 89, 90, 91, 92,
            93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 111,
            112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
            125, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
            145, 146, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165,
            166, 167, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187,
            188, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 221,
            222, 223, 224, 225, 226, 227, 228, 229, 230, 243, 244, 245, 246,
            247, 248, 249, 250, 251, 265, 266, 267, 268, 269, 270, 271, 272,
            287, 288, 289, 290, 291, 292, 293, 309, 310, 311, 312, 313, 314,
            331, 332, 333, 334, 335, 353, 354, 355, 356, 375, 376, 377, 397,
            398, 419,
        ])

In [None]:
def do_hflip_hand(lhand, rhand):
    lhand = tf.stack([lhand[:,:,0] * -1, lhand[:,:,1]],axis=-1)
    rhand = tf.stack([rhand[:,:,0] * -1, rhand[:,:,1]],axis=-1)
    rhand, lhand = lhand,rhand
    return lhand, rhand

def do_hflip_spose(spose):
    spose = tf.stack([spose[:,:,0] * -1, spose[:,:,1]],axis=-1)
    #spose = tf.gather(spose, [3,4,5,0,1,2,7,6], axis=1)
    return spose

def do_hflip_slip(slip):
    
    slip = tf.stack([slip[:,:,0] * -1, slip[:,:,1]],axis=-1)
    #slip = tf.gather(slip,[10,9,8,7,6,5,4,3,2,1,0]+[19,18,17,16,15,14,13,12,11],axis=1)
    return slip

def do_hflip_eye(leye, reye):
    leye = tf.stack([leye[:,:,0] * -1, leye[:,:,1]],axis=-1)
    reye = tf.stack([reye[:,:,0] * -1, reye[:,:,1]],axis=-1)
    reye, leye = leye,reye
    return leye, reye
def do_hflip_nose(nose):
    nose = tf.stack([nose[:,:,0] * -1, nose[:,:,1]],axis=-1)
    #spose = tf.gather(spose, [3,4,5,0,1,2,7,6], axis=1)
    return nose

In [None]:
AUTO = tf.data.AUTOTUNE
def decode_tfrecord(tfrecord, train=True):
    feature_dict = {
        'frames': tf.io.FixedLenFeature([],dtype=tf.string),
        'mean':tf.io.FixedLenFeature([],dtype=tf.string),
        'std': tf.io.FixedLenFeature([],dtype=tf.string),
        'latters': tf.io.FixedLenSequenceFeature([], dtype=tf.int64, allow_missing=True),
        'participant': tf.io.FixedLenSequenceFeature([], dtype=tf.int64, allow_missing=True),
        'sign': tf.io.FixedLenFeature(shape=[], dtype=tf.int64)
    }
    
    record = tf.io.parse_single_example(tfrecord, features=feature_dict)
    frames = tf.io.parse_tensor(record['frames'], out_type=tf.float64)
    frames = tf.cast(frames, tf.float32)
    
    frames= tf.reshape(frames, [-1, 543, 3])
    frames = frames[:,:,:2]
    
    if tf.random.uniform([], minval=0, maxval=1)>1.0 and train:
      frames = frames + tf.random.uniform([tf.shape(frames)[0],1,2], -0.05,0.05)
    #if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
      #frames = frames + tf.random.normal(shape=tf.shape(frames), stddev=0.001)
    not_nan = frames[~tf.math.is_nan(frames)]
    frames = frames - tf.reduce_mean(not_nan, axis=0, keepdims=True)
    frames = frames / tf.math.reduce_std(not_nan, axis=0, keepdims=True)
    if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
      a = random.uniform(0.5, 1.5)
      x = tf.linspace(0.,tf.cast(tf.shape(frames)[0]-1, tf.float32), tf.cast(tf.cast(tf.shape(frames)[0],tf.float32)*a, tf.int32))
      frames = tfp.math.interp_regular_1d_grid(
          x ,
          0., tf.cast(tf.shape(frames)[0]-1,tf.float32), frames,
          axis=0)   
    if tf.shape(frames)[0]>9:
      if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
        
        frames = frames[tf.cast(tf.cast(tf.shape(frames)[0], tf.float32)*0.1, tf.int32): -tf.cast(tf.cast(tf.shape(frames)[0], tf.float32)*0.1, tf.int32)]
    
    
    
    if tf.shape(frames)[0]>9:
      if tf.random.uniform([], minval=0, maxval=1)>0.99 and train:
        
        frames = frames[: -tf.cast(tf.cast(tf.shape(frames)[0], tf.float32)*0.5, tf.int32)]
    lip = tf.gather( frames, SLIP, axis=1)
    right_hands =  frames[:,468:489]
    left_hands =  frames[:,522:543]
    pose = tf.gather(frames, SPOSE, axis=1)
    leye = tf.gather(frames, LEYE, axis=1)
    reye = tf.gather(frames, REYE, axis=1)
    nose = tf.gather(frames, NOSE, axis=1)
    
    if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
      nan = tf.zeros_like(left_hands)/0
      prob = tf.random.uniform(tf.shape(left_hands), 0., 1.,)
      left_hands = tf.where(prob<0.01, nan, left_hands)
      nan = tf.zeros_like(right_hands)/0
      prob = tf.random.uniform(tf.shape(right_hands), 0., 1.,)
      right_hands = tf.where(prob<0.01, nan, right_hands)

    if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
      left_hands, right_hands = do_hflip_hand(left_hands, right_hands)
      pose = do_hflip_spose(pose)
      leye, reye = do_hflip_eye(leye, reye)
      lip = do_hflip_slip(lip)
      nose = do_hflip_nose(nose)
    if tf.random.uniform([], minval=0, maxval=1)>1.0 and train:
    
      angle = random.uniform(-10, 10)
      radian = angle/180*math.pi
      c = math.cos(radian)
      s = math.sin(radian)
      rotator = tf.constant([[c, -s], [s, c]])
      rh = tf.concat([right_hands[:,:,:1],right_hands[:,:,-1:]],axis=-1)
      lh = tf.concat([left_hands[:,:,:1],left_hands[:,:,-1:]],axis=-1)
      a = tf.linalg.matmul(rh, rotator, transpose_b=True)
      b = tf.linalg.matmul(lh, rotator,transpose_b=True)
      right_hands = tf.concat([a[:,:,:1], right_hands[:,:,1:2],a[:,:,-1:]], axis=-1)
      left_hands = tf.concat([b[:,:,:1], left_hands[:,:,1:2],b[:,:,-1:]], axis=-1)
      #right_hands = tf.transpose(right_hands, (0,2,1))
      #left_hands = tf.transpose(left_hands,(0,2,1))
      
    if tf.random.uniform([], minval=0, maxval=1)>1.0 and train:
            right_hands = right_hands + tf.random.normal(shape=tf.shape(right_hands), stddev=0.001)
            left_hands = left_hands + tf.random.normal(shape=tf.shape(left_hands), stddev=0.001)
    
    if tf.random.uniform([], minval=0, maxval=1)>1.0 and train:
     if tf.random.uniform([], minval=0, maxval=1)>0.5:
         right_hands = tf.random.uniform([], 0.7,1.3)*right_hands
         left_hands = tf.random.uniform([], 0.7,1.3)*left_hands
     elif tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
        lip = tf.random.uniform([], 0.7,1.3)*lip
     else:
         a = tf.random.uniform([], 0.7,1.3)
         right_hands = right_hands*a
         left_hands = left_hands*a
         lip = lip*a
    
    if tf.random.uniform([], minval=0, maxval=1)>1.0 and train:
        if tf.random.uniform([], minval=0, maxval=1)>0.5:
            right_hands = tf.zeros_like(right_hands)
            left_hands = tf.zeros_like(left_hands)
        else:
            lip = tf.zeros_like(lip)
            pose = tf.zeros_like(pose)

    if tf.random.uniform([], minval=0, maxval=1)>0.8 and train and r:
      rh = tf.concat([right_hands[:1],right_hands[:-1]],axis=0)
      lh = tf.concat([left_hands[:1],left_hands[:-1]],axis=0)
      lip = tf.concat([lip[:1],lip[:-1]],axis=0)
      pose = tf.concat([pose[:1],pose[:-1]],axis=0)
      leye = tf.concat([leye[:1],leye[:-1]],axis=0)
      reye = tf.concat([reye[:1],reye[:-1]],axis=0)
      nose = tf.concat([nose[:1],nose[:-1]],axis=0)
      left_hands,right_hands,lip,pose,leye,reye,nose,_,_,_,_,_,_,_ = tf.map_fn(
          remove, (left_hands,right_hands,lip,pose,leye,reye,nose,rh,lh,lip,pose,leye,reye,nose))
    
    landmark = tf.concat(
        [lip, leye, reye, nose, left_hands, right_hands, pose],
        axis=1)
    
    motion = tf.pad(landmark[:-1] - landmark[1:], [[0,1],[0,0],[0,0]])
    
    landmark = tf.concat([
        tf.reshape(landmark,(-1, 106*2)),
        tf.reshape(motion, (-1, 106*2)),
        ],
        axis=-1)
    #landmark = tf.where(tf.math.is_finite(landmark), landmark, tf.zeros_like(landmark))
    landmark = landmark[:config.max_frames]
    latters = tf.cast(record['latters'], tf.int32)
    latters_ = tf.concat([[28], latters], axis=0)
    latters = tf.concat([latters, [26]], axis=0)
    sign =  record['sign']
    sign = tf.one_hot(sign, depth=250)
    return {'encoder_input': landmark, 'decoder_input': latters_,}, (latters, tf.cast(record['participant'], tf.int32)[0])

def get_dataset(files, val=False):
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False
    ds = tf.data.Dataset.from_tensor_slices(files)
    ds =  tf.data.TFRecordDataset(files, num_parallel_reads=AUTO)
    ds = ds.map(lambda x:decode_tfrecord(x, train= not val), num_parallel_calls=AUTO)
    if not val:
        ds = ds.shuffle(1000)
        #ds = ds.map(augment, num_parallel_calls=AUTO)
    ds = ds.padded_batch(
        config.batch_size,
        padded_shapes=({"encoder_input": (config.max_frames, 106*4), "decoder_input": (13),}, ((13), ())),
        padding_values=({"encoder_input": 0., "decoder_input": 27, }, (27, 27)),
        drop_remainder=True)
    ds = ds.cache()
    ds = ds.prefetch(AUTO)
    return ds

In [None]:
def filter(x, y, cond=True):
    if cond:
      return y[-1]==1#y[-1]==20 or y[-1]==19 or y[-1]==18 or y[-1]==17
    else:
      return y[-1]!=20 and y[-1]!=19 and y[-1]!=18 and y[-1]!=17 and y[-1]!=1

In [None]:

def decode_tfrecord1(tfrecord, train=True, p_ids=None):
    feature_dict = {
        'frames': tf.io.FixedLenFeature([],dtype=tf.string),
        'mean':tf.io.FixedLenFeature([],dtype=tf.string),
        'std': tf.io.FixedLenFeature([],dtype=tf.string),
        'latters': tf.io.FixedLenSequenceFeature([], dtype=tf.int64, allow_missing=True),
        'participant': tf.io.FixedLenSequenceFeature([], dtype=tf.int64, allow_missing=True),
        'sign': tf.io.FixedLenFeature(shape=[], dtype=tf.int64)
    }
    
    record = tf.io.parse_single_example(tfrecord, features=feature_dict)
    frames = tf.io.parse_tensor(record['frames'], out_type=tf.float64)
    frames = tf.cast(frames, tf.float32)
    
    frames= tf.reshape(frames, [-1, 543, 3])
    frames = frames[:,:,:2]
    
    if tf.random.uniform([], minval=0, maxval=1)>1.0 and train:
      frames = frames + tf.random.uniform([tf.shape(frames)[0],1,2], -0.05,0.05)
    #if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
      #frames = frames + tf.random.normal(shape=tf.shape(frames), stddev=0.001)
    not_nan = frames[~tf.math.is_nan(frames)]
    frames = frames - tf.reduce_mean(not_nan, axis=0, keepdims=True)
    frames = frames / tf.math.reduce_std(not_nan, axis=0, keepdims=True)  
    if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
      a = random.uniform(0.5, 1.5)
      x = tf.linspace(0.,tf.cast(tf.shape(frames)[0]-1, tf.float32), tf.cast(tf.cast(tf.shape(frames)[0],tf.float32)*a, tf.int32))
      frames = tfp.math.interp_regular_1d_grid(
          x ,
          0., tf.cast(tf.shape(frames)[0]-1,tf.float32), frames,
          axis=0)
    r=True
    if tf.shape(frames)[0]>9:
      if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
        r=False
        frames = frames[tf.cast(tf.cast(tf.shape(frames)[0], tf.float32)*0.1, tf.int32): -tf.cast(tf.cast(tf.shape(frames)[0], tf.float32)*0.1, tf.int32)]
    
    r=True
    if tf.shape(frames)[0]>9:
      if tf.random.uniform([], minval=0, maxval=1)>0.95 and train:
        a = random.uniform(.1,.50)
        if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
          frames = frames[: -tf.cast(tf.cast(tf.shape(frames)[0], tf.float32)*a, tf.int32)]
        else:
          frames = frames[tf.cast(tf.cast(tf.shape(frames)[0], tf.float32)*a, tf.int32):]
    lip = tf.gather( frames, SLIP, axis=1)
    right_hands =  frames[:,468:489]
    left_hands =  frames[:,522:543]
    pose = tf.gather(frames, SPOSE, axis=1)
    leye = tf.gather(frames, LEYE, axis=1)
    reye = tf.gather(frames, REYE, axis=1)
    nose = tf.gather(frames, NOSE, axis=1)
    
    if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
      nan = tf.zeros_like(left_hands)/0
      prob = tf.random.uniform(tf.shape(left_hands), 0., 1.,)
      left_hands = tf.where(prob<0.01, nan, left_hands)
      nan = tf.zeros_like(right_hands)/0
      prob = tf.random.uniform(tf.shape(right_hands), 0., 1.,)
      right_hands = tf.where(prob<0.01, nan, right_hands)

    if tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
      left_hands, right_hands = do_hflip_hand(left_hands, right_hands)
      pose = do_hflip_spose(pose)
      leye, reye = do_hflip_eye(leye, reye)
      lip = do_hflip_slip(lip)
      nose = do_hflip_nose(nose)
    if tf.random.uniform([], minval=0, maxval=1)>1.0 and train:
    
      angle = random.uniform(-10, 10)
      radian = angle/180*math.pi
      c = math.cos(radian)
      s = math.sin(radian)
      rotator = tf.constant([[c, -s], [s, c]])
      rh = tf.concat([right_hands[:,:,:1],right_hands[:,:,-1:]],axis=-1)
      lh = tf.concat([left_hands[:,:,:1],left_hands[:,:,-1:]],axis=-1)
      a = tf.linalg.matmul(rh, rotator, transpose_b=True)
      b = tf.linalg.matmul(lh, rotator,transpose_b=True)
      right_hands = tf.concat([a[:,:,:1], right_hands[:,:,1:2],a[:,:,-1:]], axis=-1)
      left_hands = tf.concat([b[:,:,:1], left_hands[:,:,1:2],b[:,:,-1:]], axis=-1)
      #right_hands = tf.transpose(right_hands, (0,2,1))
      #left_hands = tf.transpose(left_hands,(0,2,1))
      
    if tf.random.uniform([], minval=0, maxval=1)>1.0 and train:
            right_hands = right_hands + tf.random.normal(shape=tf.shape(right_hands), stddev=0.001)
            left_hands = left_hands + tf.random.normal(shape=tf.shape(left_hands), stddev=0.001)
    
    if tf.random.uniform([], minval=0, maxval=1)>1.0 and train:
     if tf.random.uniform([], minval=0, maxval=1)>0.5:
         right_hands = tf.random.uniform([], 0.7,1.3)*right_hands
         left_hands = tf.random.uniform([], 0.7,1.3)*left_hands
     elif tf.random.uniform([], minval=0, maxval=1)>0.5 and train:
        lip = tf.random.uniform([], 0.7,1.3)*lip
     else:
         a = tf.random.uniform([], 0.7,1.3)
         right_hands = right_hands*a
         left_hands = left_hands*a
         lip = lip*a
    
    if tf.random.uniform([], minval=0, maxval=1)>1.0 and train:
        if tf.random.uniform([], minval=0, maxval=1)>0.5:
            right_hands = tf.zeros_like(right_hands)
            left_hands = tf.zeros_like(left_hands)
        else:
            lip = tf.zeros_like(lip)
            pose = tf.zeros_like(pose)
    x1 = tf.where(tf.math.is_finite(left_hands), left_hands, tf.zeros_like(left_hands))
    x2  = tf.where(tf.math.is_finite(right_hands), right_hands, tf.zeros_like(right_hands))
    x = tf.clip_by_value(tf.reduce_sum(tf.abs(x1), [-1,-2]) + tf.reduce_sum(tf.abs(x1), [-1,-2]),0,1)
    x = tf.reduce_mean(x)
  
    if tf.random.uniform([], minval=0, maxval=1)>0.95 and train and x>0.95:
      a = random.uniform(.10, .50)
      rh = tf.concat([right_hands[:1],right_hands[:-1]],axis=0)
      lh = tf.concat([left_hands[:1],left_hands[:-1]],axis=0)
      lip1 = tf.concat([lip[:1],lip[:-1]],axis=0)
      pose1 = tf.concat([pose[:1],pose[:-1]],axis=0)
      leye1 = tf.concat([leye[:1],leye[:-1]],axis=0)
      reye1 = tf.concat([reye[:1],reye[:-1]],axis=0)
      nose1 = tf.concat([nose[:1],nose[:-1]],axis=0)
      left_hands,right_hands,lip,pose,leye,reye,nose,_,_,_,_,_,_,_ = tf.map_fn(
          remove(a), (left_hands,right_hands,lip,pose,leye,reye,nose,rh,lh,lip1,pose1,leye1,reye1,nose1))
    
    landmark = tf.concat(
        [lip, leye, reye, nose, left_hands, right_hands],
        axis=1)
    
    motion = tf.pad(landmark[:-1] - landmark[1:], [[0,1],[0,0],[0,0]])
    
    landmark = tf.concat([
        tf.reshape(landmark,(-1, 98*2)),
        tf.reshape(motion, (-1, 98*2)),
        ],
        axis=-1)
    #landmark = tf.where(tf.math.is_finite(landmark), landmark, tf.zeros_like(landmark))
    landmark = landmark[:config.max_frames]
    latters = tf.cast(record['latters'], tf.int32)
    latters_ = tf.concat([[28], latters], axis=0)
    latters = tf.concat([latters, [26]], axis=0)
    sign =  record['sign']
    sign = tf.one_hot(sign, depth=250)

    if p_ids:
      p_ids = tf.convert_to_tensor(p_ids) - tf.cast(record['participant'], tf.int32)[0]
      p_ids = tf.clip_by_value(tf.abs(p_ids), 0, 1)
      p_ids = tf.reduce_sum(1 - p_ids)
    else:
      p_ids = tf.cast(record['participant'], tf.int32)[0]

    return {'encoder_input': landmark, 'decoder_input': latters_, }, (sign, p_ids)

def get_dataset1(files, val=False, p_ids=None, cond=True):
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False
    ds = tf.data.Dataset.from_tensor_slices(files)
    ds =  tf.data.TFRecordDataset(files, num_parallel_reads=AUTO)
    ds = ds.map(lambda x:decode_tfrecord1(x, train= not val, p_ids=p_ids), num_parallel_calls=AUTO)
    
    if not val:
        ds = ds.shuffle(100000)
        #ds = ds.map(augment, num_parallel_calls=AUTO)
    #ds = ds.filter(lambda x,y: filter(x,y,cond=cond))
    ds = ds.padded_batch(
        config.batch_size,
        padded_shapes=({"encoder_input": (config.max_frames, 98*4), "decoder_input": (13)}, ((250), ())),
        padding_values=({"encoder_input": 0., "decoder_input": 27, }, (0., 0)),
        drop_remainder=True)
    ds = ds.cache()
    ds = ds.prefetch(AUTO)
    return ds

In [None]:
def pairwise_distance(x, num_points=None):
  shape = tf.shape(x)
  x = tf.reshape(x, (-1,shape[1],num_points,1,2)) - tf.reshape(x, (-1,shape[1],1,num_points,2))
  x = tf.math.sqrt(tf.reduce_sum((x ** 2),-1))
  x = tf.reshape(x, (-1,shape[1],num_points*num_points))
  return x

In [None]:
TAU = 2 * np.pi
def acos(x, margin=1e-5):
    """ Approximate arccos() as it's not supported within TFLite
    """
    x = tf.clip_by_value(x, margin - 1., 1. - margin)

    # set initial approximation
    xp = tf.abs(x)
    t = tf.sqrt(1. - xp)

    # fix with polynomial
    c3 = -0.0200752
    c2 = xp * c3 + 0.0759031
    c1 = xp * c2 - 0.2126757
    c0 = xp * c1 + 1.5707963
    p = t * c0

    # correct for negative argument
    n = TAU / 2. - p
    y = tf.where(x >= 0., p, n)

    return y

In [None]:
def pairwise_angle(landmarks, num_points=None):
    shape = tf.shape(landmarks)
    landmarks = tf.reshape(landmarks, (-1,shape[1],num_points,2))
    vectors = landmarks - landmarks[:, :, 0:1, :]
    norms = tf.norm(vectors, axis=-1, keepdims=True)
    normalized_vectors = tf.where(
    tf.equal(norms, 0),
    tf.zeros_like(vectors),
    vectors / norms
    )
    dot_products = tf.matmul(normalized_vectors, normalized_vectors, transpose_b=True)
    pairwise_angles = acos(tf.clip_by_value(dot_products, -1.0, 1.0))
    diag_indices = tf.range(num_points)
    diag_mask = tf.one_hot(diag_indices, depth=num_points)
    pairwise_angles = tf.where(tf.cast(diag_mask,tf.bool), tf.zeros_like(pairwise_angles), pairwise_angles)
    pairwise_angles = tf.reshape(pairwise_angles, (-1, shape[1], num_points*num_points))
    return pairwise_angles

In [None]:
def remove(rate):
  def function(x):
    lhands, rhands, lip, pose, leye, reye, nose =x[:7]
    if tf.random.uniform([], minval=0, maxval=1)>rate:
      return lhands, rhands, lip, pose, leye, reye, nose,x[7],x[8],x[9],x[10],x[11],x[12],x[13]
  
    else:
      return tf.zeros_like(lhands)/tf.zeros_like(lhands), tf.zeros_like(lhands)/tf.zeros_like(lhands), lip, pose, leye, reye, nose,x[7],x[8],x[9],x[10],x[11],x[12],x[13]
  return function

In [None]:
#ds = get_dataset(files)
ds1 = get_dataset1(files)

In [None]:
for i in ds1:
    break

In [None]:
def positional_encoding(length, depth):
  depth = depth/2

  positions = np.arange(length)[:, np.newaxis]     # (seq, 1)
  depths = np.arange(depth)[np.newaxis, :]/depth   # (1, depth)

  angle_rates = 1 / (10000**depths)         # (1, depth)
  angle_rads = positions * angle_rates      # (pos, depth)

  pos_encoding = np.concatenate(
      [np.sin(angle_rads), np.cos(angle_rads)],
      axis=-1) 

  return tf.cast(pos_encoding, dtype=tf.float32)

In [None]:
def sinusoids(length, channels, max_timescale=10000.):
    """Returns sinusoids for positional embedding"""
    
    log_timescale_increment = tf.math.log(max_timescale) / (channels // 2 - 1)
    inv_timescales = tf.math.exp(-log_timescale_increment * tf.range(channels // 2, dtype=tf.float32))
    scaled_time = tf.range(length, dtype=tf.float32)[:, tf.newaxis] * inv_timescales[tf.newaxis, :]
    return tf.concat([tf.math.sin(scaled_time), tf.math.cos(scaled_time)], axis=1)

In [None]:
class MultiHeadAttention(keras.layers.Layer):
    def __init__(self, embed_dim=768, num_heads=12, dropout=0.1, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = self.embed_dim // self.num_heads
        self.scale = (1/self.head_dim**0.5)
        self.in_proj = keras.layers.Dense(self.embed_dim*3,   )
        self.out_proj = keras.layers.Dense(self.embed_dim,  )
        self.dropout = keras.layers.SpatialDropout1D(dropout)
    def call(self, hidden_state, attention_mask):
        shape = tf.shape(hidden_state)
        qkv = self.in_proj(hidden_state)
        qkv = tf.reshape(qkv, [-1, shape[1], self.num_heads, self.head_dim*3])
        query, key, value = tf.split(qkv, 3, axis=-1)
        attn_weights = tf.einsum("bthc,bshc->bhts", query, key) * self.scale
        attn_weights = attn_weights + attention_mask
        attn_weights = tf.nn.softmax(attn_weights)
        attn_output = tf.einsum("bhts,bshc->bthc", attn_weights, value)
        attn_output = tf.reshape(attn_output, (-1, shape[1], self.embed_dim))
        output = self.out_proj(attn_output)
        return self.dropout(output)
    def get_config(self):
        return {"embed_dim": self.embed_dim, 'num_heads':self.num_heads}


class MultiHeadAttention1(keras.layers.Layer):
    def __init__(self, embed_dim=768, num_heads=12, dropout=0.1, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = self.embed_dim // self.num_heads
        self.scale = (1/self.head_dim**0.5)
        self.in_proj = keras.layers.Dense(self.embed_dim*3,   )
        self.out_proj = keras.layers.Dense(self.embed_dim,  )
        self.dropout = keras.layers.SpatialDropout1D(dropout)
    def call(self, hidden_state):
        shape = tf.shape(hidden_state)
        qkv = self.in_proj(hidden_state)
        qkv = tf.reshape(qkv, [-1, shape[1], self.num_heads, self.head_dim*3])
        query, key, value = tf.split(qkv, 3, axis=-1)
        attn_weights = tf.einsum("bthc,bshc->bhts", query, key) * self.scale
        attn_weights = tf.nn.softmax(attn_weights)
        attn_output = tf.einsum("bhts,bshc->bthc", attn_weights, value)
        attn_output = tf.reshape(attn_output, (-1, shape[1], self.embed_dim))
        output = self.out_proj(attn_output)
        return self.dropout(output)
    def get_config(self):
        return {"embed_dim": self.embed_dim, 'num_heads':self.num_heads}

class MLPLayer(keras.layers.Layer):
    def __init__(self, embed_dim=512, dropout=0.2, **kwargs):
        super().__init__(**kwargs)
        self.layer_norm = keras.layers.LayerNormalization(epsilon=1e-5)
        self.fc1 = keras.layers.Dense(embed_dim*4,  )
        self.fc2 = keras.layers.Dense(embed_dim,   )
        self.dropout = keras.layers.Dropout(dropout)
        self.dropout1 = keras.layers.Dropout(dropout)
    def call(self, inputs, attention_mask=None):
        x = self.layer_norm(inputs)
        x = self.fc1(x)
        x = tf.nn.relu(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.dropout(x)
        return x + inputs


class EncoderLayer(keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, attention_dropout, dropout, **kwargs):
        super().__init__(**kwargs)
        self.layer_norm1 = keras.layers.LayerNormalization(epsilon=1e-5)
        self.attn = MultiHeadAttention(embed_dim, num_heads, attention_dropout)
        self.layer_norm2 = keras.layers.LayerNormalization(epsilon=1e-5)
        self.mlp = MLPLayer(embed_dim, dropout)
        #self.mlp1 = MLPLayer(embed_dim, dropout)
    def call(self, inputs, attention_mask=None):
        residual = inputs
        x = self.layer_norm1(inputs)
        x = self.attn(x, attention_mask)
        x = x + residual
        x = self.mlp(x)
        #x = self.mlp1(x)
        return x




class MultiHeadCrossAttention(keras.layers.Layer):
    def __init__(self, embed_dim=128, num_heads=3, dropout=0.1, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = self.embed_dim // self.num_heads
        self.scale = (1/self.head_dim**0.5)
        self.query = keras.layers.Dense(self.embed_dim)
        self.key_value = keras.layers.Dense(self.embed_dim*2, )
        self.out_proj = keras.layers.Dense(self.embed_dim,  )
        self.dropout = keras.layers.SpatialDropout1D(dropout)
       
    def call(self, hidden_state, encoded_hidden_states):
        shape = tf.shape(hidden_state)
        query = self.query(hidden_state)
        query = tf.reshape(query, [-1, shape[1], self.num_heads, self.head_dim])
        key_value = self.key_value(encoded_hidden_states)
        key_value = tf.reshape(key_value, [-1, tf.shape(key_value)[1], self.num_heads, self.head_dim*2])
        key, value = tf.split(key_value, 2, axis=-1)
        attn_weights = tf.einsum("bthc,bshc->bhts", query, key) * self.scale
        attn_weights = tf.nn.softmax(attn_weights)
        attn_output = tf.einsum("bhts,bshc->bthc", attn_weights, value)
        attn_output = tf.reshape(attn_output, (-1, shape[1], self.embed_dim))
        return self.dropout(self.out_proj(attn_output))



class DecoderLayer(keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, attention_dropout, dropout, **kwargs):
        super().__init__(**kwargs)
        self.layer_norm1 = keras.layers.LayerNormalization(epsilon=1e-5)
        self.attnself = MultiHeadAttention1(embed_dim, num_heads, attention_dropout)
        self.attn = MultiHeadCrossAttention(embed_dim, num_heads, attention_dropout)
        self.layer_norm2 = keras.layers.LayerNormalization(epsilon=1e-5)
        self.layer_norm3 = keras.layers.LayerNormalization(epsilon=1e-5)
        self.fc1 = keras.layers.Dense(embed_dim*4,  )
        self.fc2 = keras.layers.Dense(embed_dim, )
        self.dropout = keras.layers.Dropout(dropout)
        self.layer_norm4 = keras.layers.LayerNormalization(epsilon=1e-5)

    def call(self, hidden_state, encoded_hidden_state):
        encoded_hidden_state = self.layer_norm1(encoded_hidden_state)
        residual = hidden_state
        x = self.layer_norm2(hidden_state)
        #x = self.attnself(x)
        #x = self.layer_norm4(x)
        x = self.attn(x, encoded_hidden_state)
        x = residual + x
        residual = x
        x = self.layer_norm3(x)
        x = self.fc1(x)
        x = tf.nn.relu(x)
        x = self.fc2(x)
        x = self.dropout(x)
        return x + residual

In [None]:
class Embedding(keras.layers.Layer):
    def __init__(self, embed_dim, **kwargs):
        super().__init__(**kwargs)
        self.fc = keras.layers.Dense(config.embed_dim*2, )
        self.ln = keras.layers.LayerNormalization(epsilon=1e-5)
        self.fc1 = keras.layers.Dense(embed_dim, )
        self.ln1 = keras.layers.LayerNormalization(epsilon=1e-5)
        self.act = tf.keras.layers.PReLU()
        self.act1 = tf.keras.layers.PReLU()
        self.dr = keras.layers.Dropout(config.embed_dropout)
        self.dr1 = keras.layers.Dropout(config.embed_dropout)
        self.mlp = MLPLayer(config.embed_dim, config.dropout)
        self.noise = keras.layers.GaussianNoise(0.5)
    def call(self, inputs):
        x = self.fc(inputs)
        x = self.ln(x)
        x = tf.nn.relu(x)
        x = self.dr(x)
        #x = self.noise(x)
        x = self.fc1(x)
        x = self.ln1(x)
        x = tf.nn.relu(x)
        x = self.dr1(x)
        #x = self.mlp(x)
        return x

In [None]:
class Encoder(keras.layers.Layer):
  def __init__(self, length, embed_dim, num_heads, num_blocks, attention_dropout=0.1, dropout=0.1, **kwargs):
    super().__init__(**kwargs)
    self.pos_embedding = tf.Variable(positional_encoding(length, embed_dim, ), trainable=True, name='pos_embedding')
    #self.pos_embedding = tf.Variable(tf.zeros((length, embed_dim, )), trainable=True, name='pos_embedding')
    #self.cls_token = tf.Variable(tf.zeros((1,embed_dim)), trainable=True, name='cls_embedding')
    self.embed = Embedding(embed_dim)
    self.layers = [EncoderLayer(embed_dim, num_heads, attention_dropout, dropout) for i in range(num_blocks)]
    self.ln = keras.layers.LayerNormalization(epsilon=1e-5)
    
  
  def call(self, inputs, mask):
    shape = tf.shape(inputs)
    embed = self.embed(inputs)
    #cls = tf.tile(self.cls_token[None,:,:], [shape[0], 1, 1])
    #x = tf.concat([tf.cast(cls, x.dtype), x], axis=1)
    embed = embed + tf.cast(self.pos_embedding, embed.dtype)
    
    x = embed
    for block in self.layers:
      x =  block(x, mask)
    
    x = self.ln(x)
    return x



class Decoder(keras.layers.Layer):
  def __init__(self, embed_dim, num_heads, num_blocks, attention_dropout=0.1, dropout=0.1, **kwargs):
    super().__init__(**kwargs)
    self.token_embedding = keras.layers.Embedding(29, embed_dim)
    self.pos_embedding = tf.Variable(tf.zeros((13, embed_dim)), trainable=True)
    self.layers = [DecoderLayer(embed_dim, num_heads,attention_dropout, dropout) for i in range(num_blocks)]
    
    self.ln = keras.layers.LayerNormalization(epsilon=1e-5)
    self.drop_path = tfa.layers.StochasticDepth(1.0)
  def call(self, inputs, encoder_state):
    shape = tf.shape(inputs)
    x = self.token_embedding(inputs)
    x = x + tf.cast(self.pos_embedding, x.dtype)[:shape[1]]
    for layer in self.layers:
      x = layer(x, encoder_state)
    
    x = self.ln(x)
    logits = x @ tf.cast(tf.transpose(self.token_embedding.weights[0], [1,0])[None,:,:], x.dtype)
    return logits



In [None]:
class Discriminator(keras.layers.Layer):
  def __init__(self, name='discriminator', **kwargs):
    super().__init__(**kwargs)
    self.mlp = [MLPLayer(config.embed_dim) for i in range(1)]
    self.mlp1 = MLPLayer(config.embed_dim)
    self.dense = keras.layers.Dense(21, activation='softmax')
  def call(self, encoder_state, mask):
    x = encoder_state
    for layer in self.mlp:
      x = layer(x)
    x = x * tf.cast((1-mask[:,:,None]), x.dtype)
    x = tf.reduce_sum(x, axis=1)/(tf.reduce_sum(tf.cast((1-mask), x.dtype), axis=1)[:,None]+1e-11)
    x = self.mlp1(x)
    x = self.dense(x)
    return x

In [None]:
class ISLTransformer(keras.Model):
    def __init__(self, length, embed_dim, num_heads, num_blocks, attention_dropout, dropout, name='model'):
        encoder_input = keras.Input(shape=(config.max_frames, 98*4), name='encoder_input')
        decoder_input = keras.Input(shape=(13), name='decoder_input')
        
        lip = encoder_input[:,:,:20*2]
        leye = encoder_input[:,:,20*2:36*2]
        reye = encoder_input[:,:,36*2:52*2]
        lhand = encoder_input[:,:,56*2:77*2]
        rhand = encoder_input[:,:,77*2:98*2]
        #pose = encoder_input[:,:,98*2:106*2]
        #lhand = tf.concat([lhand, pose], axis=-1)
        #rhand = tf.concat([rhand, pose], axis=-1)

        lip_d = pairwise_distance(lip, 20)
        leye_d = pairwise_distance(leye, 16)
        reye_d = pairwise_distance(reye, 16)
        ld = pairwise_distance(lhand, 21)
        rd = pairwise_distance(rhand, 21)
        #pd = pairwise_distance(pose, 8)
        a = pairwise_distance(encoder_input[:,:,:98*2],98)
        langle = pairwise_angle(lhand, 21)
        rangle = pairwise_angle(rhand, 21)
        #pangle = pairwise_angle(pose, 8)
        langle = langle * tf.cast(tf.reduce_mean(langle,axis=-1) < 1.47, tf.float32)[:,:,None]
        rangle = rangle * tf.cast(tf.reduce_mean(rangle,axis=-1) < 1.47, tf.float32)[:,:,None]
        #pangle = pangle * tf.cast(tf.reduce_mean(pangle,axis=-1) < 1.47, tf.float32)[:,:,None]

        x = tf.concat([encoder_input, lip_d, leye_d, reye_d, ld, rd, langle, rangle], axis=-1)#
        x = tf.where(tf.math.is_finite(x), x, tf.zeros_like(x)*-999)
        mask = tf.cast(tf.reduce_mean(tf.abs(x), axis=-1)==0, tf.int32)
        mask_expanded = tf.cast(mask * -1000, config.dtype)[:,None,None,:]
        #mask_expanded = tf.concat([mask_expanded[:,:,:,:1], mask_expanded], axis=-1)
        #x = tf.reshape(encoder_input, (-1, tf.shape(encoder_input)[1], 106*4))
        
        encoder_state = Encoder(length, embed_dim, num_heads, num_blocks, attention_dropout, dropout)(x, mask_expanded)
        decoder = Decoder(embed_dim, num_heads, num_blocks, attention_dropout, dropout)
        probab = keras.layers.Softmax(dtype=tf.float32)

        start = tf.tile(tf.ones((1,1), tf.int32), (tf.shape(encoder_input)[0],1))*28
        padding = tf.tile(tf.ones((1,1), tf.int32), (tf.shape(encoder_input)[0],1))*27
        end = tf.tile(tf.ones((1,1), tf.int32), (tf.shape(encoder_input)[0],1))*26
        x = start
        for i in range(13):
          output = decoder(x, encoder_state)
          prediction = probab(output)
          
          prediction_id = tf.cast(tf.argmax(prediction[:,-1:], axis=-1), tf.int32)
          prediction_id = tf.where(x[:,-1:]==end, padding, tf.cast(prediction_id, tf.int32))
          prediction_id = tf.where(x[:,-1:]==padding, padding, tf.cast(prediction_id, tf.int32))
          x = tf.concat([x, prediction_id], axis=-1)
          
        x = Discriminator(name='discriminator')(encoder_state, mask)
        super().__init__([encoder_input, decoder_input], [prediction, x], name=name)
        self.loss_tracker = keras.metrics.Mean(name='loss')
        self.dis_loss_tracker = keras.metrics.Mean(name='discriminator_loss')

        
    def train_step(self, data):
        x, y = data
        
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)[1] 
            disc_loss = cls_loss(y[1], y_pred)
            disc_loss = tf.reduce_mean(disc_loss)
       
        trainable_vars = self.trainable_variables
        print(len(trainable_vars))
        gradients = tape.gradient(disc_loss, trainable_vars)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  
            loss = masked_loss(y[0], y_pred[0])
            loss1 = cls_loss(y[1], y_pred[1])
            loss1 = tf.reduce_mean(loss1)
            loss = loss
       
        for layer in self.layers[:-1]:
            layer.trainable = True
        self.layers[-1].trainable = False
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        
        self.layers[-1].trainable = True

        self.compiled_metrics.update_state(y[0], y_pred[0])
        self.loss_tracker.update_state(loss)
        self.dis_loss_tracker.update_state(disc_loss)
        result = {m.name: m.result() for m in self.metrics}
        result[self.loss_tracker.name] = self.loss_tracker.result()
        result[self.dis_loss_tracker.name] = self.dis_loss_tracker.result()
        return result

In [None]:
import math
def lrfn(current_step, num_warmup_steps, lr_max, num_cycles=0.50, rate=0.8, steps=None):
    
    if current_step < num_warmup_steps:
        return lr_max * 0.20 ** (num_warmup_steps - current_step)
    else:
        progress = float(current_step - num_warmup_steps) / float(max(1, steps - num_warmup_steps))

        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) * lr_max

In [None]:
LR_SCHEDULE = [lrfn(step, config.warmup_steps, lr_max=config.lr_max_ph1, rate=config.rate, steps=config.phase1_epochs) for step in range(config.phase1_epochs)]
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=0)

In [None]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
  def __init__(self, d_model, warmup_steps=2100):
    super().__init__()

    self.d_model = d_model
    self.d_model = tf.cast(self.d_model, tf.float32)

    self.warmup_steps = warmup_steps

  def __call__(self, step):
    step = tf.cast(step, dtype=tf.float32)
    arg1 = tf.math.rsqrt(step)
    arg2 = step * (self.warmup_steps ** -1.5)

    return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)
  def get_config(self):
      return {'d_model':self.d_model}

In [None]:
def masked_accuracy(label, pred):
        pred = tf.argmax(pred, axis=2)
        label = tf.cast(label, pred.dtype)
        match = label == pred

        mask = label != 27

        match = match & mask

        match = tf.cast(match, dtype=tf.float32)
        mask = tf.cast(mask, dtype=tf.float32)
        return tf.reduce_sum(match)/tf.reduce_sum(mask)

def unique(x):
        return tf.unique(x, out_idx=tf.int32)[1]
def diff(x):
      pred, label = x
      diff = tf.sets.difference(pred[None,:], label[None,:]).values
      return diff, diff
def masked_loss(label, pred):
        mask = label != 27
        loss_object = keras.losses.SparseCategoricalCrossentropy(reduction='none')
        loss = loss_object(label, pred)
        
        pred = tf.cast(tf.argmax(pred, axis=2), tf.float32)
        label = tf.cast(label, pred.dtype)
        mask = tf.cast(mask, dtype=loss.dtype)

        diff = pred[:,:,None] - label[:, None, :]
        diff = tf.clip_by_value(tf.abs(diff), 0, 1)
        diff = 1 - tf.reduce_sum(1 - diff, axis=-1)
        diff *= mask
        diff = tf.reduce_sum(diff, axis=-1)
        diff = tf.reduce_mean(diff) + 1

        loss *= mask
        loss = tf.reduce_sum(loss)/tf.reduce_sum(mask)
        return loss
masked_loss.name = 'masked_loss'
def non_match(label, pred):
        mask = label != 27
        
        pred = tf.cast(tf.argmax(pred, axis=2), tf.float32)
        label = tf.cast(label, pred.dtype)

        mask = tf.cast(mask, dtype=pred.dtype)
        diff = pred[:,:,None] - label[:, None, :]
        diff = tf.clip_by_value(tf.abs(diff), 0, 1)
        diff = 1 - tf.clip_by_value(tf.reduce_sum(1 - diff, axis=-1), 0, 1)
        diff *= mask
        diff = tf.reduce_sum(diff, axis=-1)
        diff = tf.reduce_mean(diff, axis=-1) + 1

        return diff
def get_model(use_sam=True):
    
    model = ISLTransformer(
        length=config.max_frames,
        embed_dim=config.embed_dim,
        num_heads=config.num_heads,
        num_blocks=config.num_blocks,
        attention_dropout=config.attention_dropout,
        dropout=config.dropout
        )
    scheduler = CustomSchedule(config.embed_dim, warmup_steps=config.warmup_steps)

    


    
    if use_sam:
      model = keras.models.experimental.SharpnessAwareMinimization(model)
    model.compile(#loss='sparse_categorical_crossentropy',
                      optimizer=tfa.optimizers.Lookahead(tfa.optimizers.RectifiedAdam(learning_rate=1e-4, weight_decay=0.)),
                      #optimizer = keras.optimizers.Adam(learning_rate=1e-4),
                      metrics=[masked_accuracy, non_match],
                  jit_compile=True)

    return model

In [None]:
model = get_model(use_sam=False)
model.save_weights('model.h5')
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_input (InputLayer)     [(None, 537, 392)]   0           []                               
                                                                                                  
 tf.__operators__.getitem_3 (Sl  (None, 537, 42)     0           ['encoder_input[0][0]']          
 icingOpLambda)                                                                                   
                                                                                                  
 tf.__operators__.getitem_4 (Sl  (None, 537, 42)     0           ['encoder_input[0][0]']          
 icingOpLambda)                                                                                   
                                                                                              

In [None]:
class WeightDecayCallback(tf.keras.callbacks.Callback):
    def __init__(self, wd_ratio=config.wd_ratio):
        self.step_counter = 0
        self.wd_ratio = wd_ratio
    
    def on_epoch_begin(self, epoch, logs=None):
        model.optimizer.weight_decay = model.optimizer.learning_rate * self.wd_ratio
        print(f'learning rate: {model.optimizer.learning_rate.numpy():.2e}, weight decay: {model.optimizer.weight_decay.numpy():.2e}')

In [None]:
class MultiHeadSoftmax(keras.layers.Layer):
  def __init__(self, probabs, num_heads=100, **kwargs):
    super().__init__(**kwargs)
    self.probabs = probabs
    self.droput = keras.layers.Dropout(.4)
    self.layers = [keras.layers.Dense(250, activation='softmax', dtype=tf.float32) for i in range(num_heads)]
  
  def call(self, inputs):
    x = self.layers[0](inputs)
    if keras.backend.learning_phase():
      for layer in self.layers[1:]:
        
        toss = tf.random.uniform([], minval=0., maxval=1.0)
        inputs = self.dropout(inputs)
        x = tf.cond(tf.greater(toss, self.probabs), x, (layer(inputs)+x)/2.)
      return x
    else:
      for layer in self.layers[1:]:
         x = (layer(inputs)+x)/2.0
      return x

In [None]:
class SelfAttentionPooling(keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.fc = keras.layers.Dense(1)
        self.scale = (1/config.embed_dim)
    def call(self, inputs, mask):
        weights = self.fc(inputs)
        weights = tf.nn.softmax(weights + mask)
        x = tf.reduce_sum(inputs*weights*tf.cast(self.scale, weights.dtype), axis=1)
        return x

In [None]:
class Discriminator1(keras.layers.Layer):
  def __init__(self, name='discriminator', **kwargs):
    super().__init__(**kwargs)
    self.mlp = [MLPLayer(config.embed_dim) for i in range(5)]
    self.dense = keras.layers.Dense(21, activation='softmax', dtype=tf.float32)
  def call(self, input):
    x = input
    for layer in self.mlp:
      x = layer(x)
    x = self.dense(x)
    return x

In [None]:
class GRL(keras.layers.Layer):

    @tf.custom_gradient
    def custom_ops(self, x):
      result = tf.identity(x)
      def custom_grad(dy):
        grad = tf.cast(-self.alpha, dy.dtype) * dy
        return grad
      return result, custom_grad

    def __init__(self, alpha, **kwargs):
        super().__init__(**kwargs)
        self.alpha = alpha

    def call(self, inputs):
        return self.custom_ops(inputs)

In [None]:

class ISLRModel(keras.Model):
    def __init__(self, model):
        
        inputs = model.inputs
        inputs_ = tf.where(tf.math.is_finite(inputs[0]), inputs[0], tf.ones_like(inputs[0])*-999)
        mask = tf.cast(tf.reduce_mean(tf.abs(inputs_), axis=-1)==0, tf.int32)
        mask_expanded = tf.cast(mask * -1000, config.dtype)[:,:,None]
        
        backbone = keras.Model(inputs=inputs, outputs=model.layers[-157].output)
        encoder_state = backbone(inputs)
        
        x = encoder_state * tf.cast((1-mask[:,:,None]), encoder_state.dtype)
        x = SelfAttentionPooling()(x, mask_expanded)
        x = keras.layers.LayerNormalization(epsilon=1e-5)(x)
        x = keras.layers.Dropout(.4)(x)
        output = keras.layers.Dense(250, activation='softmax', dtype=tf.float32, name='main')(x)
        
        super().__init__(inputs, output)
        

        self.loss_tracker = keras.metrics.Mean(name='loss')
        self.main_loss = keras.losses.CategoricalCrossentropy(label_smoothing=0.75, reduction='none')
    
    
    def test_step(self, data):
        x, y = data
        y_pred = self(x, training=False)
        loss = self.main_loss(y[0], y_pred[0])
        
        self.compiled_metrics.update_state(y[0], y_pred[0])
        self.loss_tracker.update_state(loss)
        

        result = {m.name: m.result() for m in self.metrics}
        result[self.loss_tracker.name] = self.loss_tracker.result()
        return result

    def train_step(self, data):
        x, y = data
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  
            loss = self.main_loss(y[0], y_pred)
            loss = tf.reduce_mean(loss)
       
        trainable_vars = self.trainable_variables
        print(len(trainable_vars))
        gradients = tape.gradient(loss, trainable_vars)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        self.compiled_metrics.update_state(y[0], y_pred)
        self.loss_tracker.update_state(loss)
        result = {m.name: m.result() for m in self.metrics}
        result[self.loss_tracker.name] = self.loss_tracker.result()
        return result
        
    

In [None]:
checkpoint_cb = keras.callbacks.ModelCheckpoint(f'weights.h5',
                                monitor='val_loss',
                                save_best_only=True,
                                save_weights_only=True,
                                mode='min')

In [None]:
for fold in range(config.num_folds):
    break
    train_files = [file for file in files if eval(file.split('/')[-1].split('_')[1])%5. not in [fold]]
    val_files = [file for file in files if eval(file.split('/')[-1].split('_')[1])%5.== fold]
    train_ds = get_dataset(train_files)
    val_ds = get_dataset(val_files, val=True)
    for i in val_ds:
      break
    cls_loss = keras.losses.SparseCategoricalCrossentropy(reduction='none')
    with strategy.scope():
        model = get_model(use_sam=False)
    #a = model(i[0])
    model.fit(train_ds, epochs=config.phase1_epochs, validation_data=val_ds, callbacks=[lr_callback, checkpoint_cb])
    break

In [None]:
model = get_model(use_sam=False)
a = model(i[0]) 
n_model = ISLRModel(model)


In [None]:
LR_SCHEDULE = [lrfn(step, config.warmup_steps, lr_max=config.lr_max, rate=config.rate, steps=config.epochs) for step in range(config.epochs)]
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=0)

In [None]:
lr_cb = keras.callbacks.ReduceLROnPlateau(monitor='val_main_accuracy',
                                  factor=0.5,
                                  patience=3,
                                  model='min',
                                  min_lr=0.000001
                                  )

In [None]:
for fold in range(config.num_folds):
    train_files = [file for file in files if eval(file.split('/')[-1].split('_')[1])%5. not in [fold]]
    val_files = [file for file in files if eval(file.split('/')[-1].split('_')[1])%5.== fold]
    
    train_ds = get_dataset1(train_files, val=False, cond=False) #p_ids=[ 4,  5, 10, 11, 15])
    val_ds = get_dataset1(val_files, val=True, cond=True) #p_ids=[ 4,  5, 10, 11, 15])
    checkpoint_cb = keras.callbacks.ModelCheckpoint(f'weights_fold{fold}.h5',
                                monitor='val_main_accuracy',
                                save_best_only=True,
                                save_weights_only=True,
                                mode='max')
    sched = tf.keras.optimizers.schedules.CosineDecayRestarts(
        1e-4,
        1141*100,
        t_mul=1.5,
        m_mul=1.0,
        alpha=0.0005,
        name=None
        )
    
    with strategy.scope():
        model = get_model(use_sam=False)
        a = model(i[0])
        n_model = ISLRModel(model)
        
        n_model.compile(loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.75),
                      #optimizer=tfa.optimizers.Lookahead(tfa.optimizers.RectifiedAdam(learning_rate=1e-4,)),
                                                         #slow_step_size=0.9,
                                                         #sync_period=10),
                      optimizer=keras.optimizers.Adam(learning_rate=sched,),
                      metrics=[keras.metrics.TopKCategoricalAccuracy(1, name='accuracy'),
                               keras.metrics.TopKCategoricalAccuracy(2, name='top2'),
                               keras.metrics.TopKCategoricalAccuracy(5, name='top5'),
                               keras.metrics.TopKCategoricalAccuracy(10, name='top10')],
                        jit_compile=True)
    
    n_model.fit(train_ds, epochs=200, validation_data=val_ds, callbacks=[checkpoint_cb])
    break

Epoch 1/200
Tensor("categorical_crossentropy/weighted_loss/Mul:0", shape=(8,), dtype=float32)
29
Tensor("categorical_crossentropy/weighted_loss/Mul:0", shape=(8,), dtype=float32)
29
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 6

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp /content/weights_fold0.h5 /content/drive/MyDrive