In [1]:
#%pip install -q keras_nlp
#%pip install -q tflite_runtime

#import gc
normalized_length = 10

utils

In [2]:
import math
import time
import tensorflow as tf
import numpy as np
import pandas as pd
import os
from os.path import join as pjoin

ROWS_PER_FRAME = 543  # number of landmarks per frame

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

def tf_get_features(ftensor):
    def feat_wrapper(ftensor):
        return load_relevant_data_subset(ftensor.numpy().decode('utf-8'))
    return tf.py_function(
        feat_wrapper,
        [ftensor],
        Tout=tf.float32
    )

def tf_nan_mean(x, axis=0, keepdims=True):
    return (tf.reduce_sum(
        tf.where(tf.math.is_nan(x), tf.zeros_like(x), x), 
        axis=axis, keepdims=keepdims) 
        / tf.reduce_sum(
            tf.where(
                tf.math.is_nan(x), tf.zeros_like(x), tf.ones_like(x)), 
            axis=axis, keepdims=keepdims))

def tf_nan_std(x, axis=0, keepdims=True):
    d = x - tf_nan_mean(x, axis=axis, keepdims=keepdims)
    return tf.math.sqrt(tf_nan_mean(d * d, axis=axis, keepdims=keepdims))

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return "%dm %ds" % (m, s)

def timeSpent(since):
    now = time.time()
    s = now - since
    return asMinutes(s)

class TimeLimitCallback(tf.keras.callbacks.Callback):
    def __init__(self, start_time, max_duration_hours=8, max_duration_minutes=30):
        super(TimeLimitCallback, self).__init__()
        self.start_time = start_time
        self.max_duration_seconds = max_duration_hours * 3600 + max_duration_minutes * 60

    def on_train_batch_end(self, batch, logs=None):
        elapsed_time = time.time() - self.start_time
        if elapsed_time > self.max_duration_seconds:
            self.model.stop_training = True
            print(f"Training stopped: time limit of {self.max_duration_seconds/3600:.1f} hours exceeded")

@tf.autograph.experimental.do_not_convert
def detuple(v, l, g, pid, sid):
    return (v, l)

@tf.autograph.experimental.do_not_convert
def ensure(shape):
    return lambda x : tf.ensure_shape(x, shape)

def scheduler(epoch, lr):
    if epoch < 8:
        return lr
    else:
        return lr * tf.math.exp(-0.1)
    
def create_load_tfrecords(new=False):
    idx = 0
    while f"run_{idx}" in os.listdir('.'):
        idx +=1

    if new:
        path = f"run_{idx}"
        print(f"Results will be saved at path {path}")
        os.makedirs(path)
        # !mkdir $path

    else:
        path = f"run_{idx-1}"
        print(f"Using path {path}")
    
    return pjoin(path, 'full_cv.tfrecord')

Datasets

In [3]:
from os.path import join as pjoin
from os.path import exists
import os
from sklearn.model_selection import StratifiedGroupKFold, StratifiedKFold
import pandas as pd
import tensorflow as tf
from tqdm import tqdm
import numpy as np
import json

# from utils import tf_get_features

try :
    BASE_DIR = "/kaggle/input/asl-signs/"
    df = pd.read_csv(pjoin(BASE_DIR, "train.csv"))
except :
    BASE_DIR = "asl-signs"
    df = pd.read_csv(pjoin(BASE_DIR, "train.csv"))

path2label = dict(zip(df.path, df.sign))
label2int = json.load(open(pjoin(BASE_DIR, "sign_to_prediction_index_map.json"), 'rb'))

# KFold
def get_KFold_dataset(preprocessing, shape=(None, 122), path='untitled', n_splits=7):
    dir_path = path
    path = pjoin(path, 'full_cv.tfrecord')

    def parse_function(example_proto):
        feature_description = {
            # 'vector': tf.io.FixedLenSequenceFeature(shape=(164), dtype=tf.float32, allow_missing=True), #useful when using padded batches
            'vector': tf.io.FixedLenFeature(shape=(), dtype=tf.string),
            'label': tf.io.FixedLenFeature(shape=(), dtype=tf.int64),
            'group': tf.io.FixedLenFeature(shape=(), dtype=tf.int64),
            'pid': tf.io.FixedLenFeature(shape=(), dtype=tf.int64),
            'sid': tf.io.FixedLenFeature(shape=(), dtype=tf.int64)
        }
        parsed_example = tf.io.parse_single_example(example_proto, feature_description)
        vector = tf.io.parse_tensor(parsed_example['vector'], out_type=tf.float32)
        vector = tf.expand_dims(vector, 0)
        vector = tf.RaggedTensor.from_tensor(vector, ragged_rank=1)
        vector = tf.squeeze(vector, axis=0)
        return vector, parsed_example['label'], parsed_example['group'], parsed_example['pid'], parsed_example['sid']

    if exists(dir_path):
        print(f"Reloading dataset from path {path}")
    else:
        print(f"Dataset will be saved at path {path}")
        os.makedirs(dir_path)

        ds = tf.data.TFRecordDataset(path)
        X_ds = tf.data.Dataset.from_tensor_slices(
            BASE_DIR + "/" + df.path.values
            ).map(tf_get_features)
        y_ds = tf.data.Dataset.from_tensor_slices(
            df.sign.map(label2int).values.reshape(-1,1)
            )
        
        # Perform stratisfied kfold split
        sgkf = StratifiedGroupKFold(n_splits=n_splits, random_state=42, shuffle=True)

        fold2id = dict()
        for fold_idx, (index_train, index_valid) in enumerate(sgkf.split(df.path, df.sign, df.participant_id)):
            fold2id[fold_idx] = np.unique(df.participant_id.values[index_valid])
            
        id2fold = dict()
        for k,v in fold2id.items():
            for vv in v:
                id2fold[vv] = k

        g_ds = tf.data.Dataset.from_tensor_slices(
            df.participant_id.map(id2fold).values.reshape(-1, 1)
        )
        pid_ds = tf.data.Dataset.from_tensor_slices(
            df.participant_id.values
        )
        sid_ds = tf.data.Dataset.from_tensor_slices(
            df.sequence_id.values
        )
        
        with tf.io.TFRecordWriter(path) as writer:
            zipper = zip(X_ds.map(lambda x: tf.ensure_shape(x, (None, 543, 3))).map(preprocessing).map(lambda x: tf.ensure_shape(x, shape)), y_ds, g_ds, pid_ds, sid_ds)
            for example in zipper:
                X, y, g, pid, sid = example
                serialized_X = tf.io.serialize_tensor(X).numpy()
                feature = {
                    'vector': tf.train.Feature(bytes_list=tf.train.BytesList(value=[serialized_X])),
                    'label': tf.train.Feature(int64_list=tf.train.Int64List(value=y.numpy().flatten())),
                    'group': tf.train.Feature(int64_list=tf.train.Int64List(value=g.numpy().flatten())),
                    'pid': tf.train.Feature(int64_list=tf.train.Int64List(value=pid.numpy().flatten())),
                    'sid': tf.train.Feature(int64_list=tf.train.Int64List(value=sid.numpy().flatten())),
                }
                example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
                writer.write(example_proto.SerializeToString())

    ds = tf.data.TFRecordDataset(path)
    ds = ds.map(parse_function).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    ds = ds.map(
        lambda d, l, g, pid, sid: (tf.ensure_shape(d, shape), l, g, pid, sid)
        )
    
    return ds

preprocess_models

In [4]:
import tensorflow as tf
tfkl = tf.keras.layers
# from utils import *

lipsUpperOuter = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291]
lipsLowerOuter = [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
lipsUpperInner = [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308]
lipsLowerInner = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308]
lips_idx = list(set(lipsUpperOuter + lipsLowerOuter + lipsUpperInner + lipsLowerInner))
lips_idx = tf.constant(lips_idx, dtype=tf.int32)

# Rq: possible d'avoir des timesteps hand incomplets ? Avec par exemple quelques landmarks de hand NaN sur ce timestep ? -> nanmean

class Preprocess(tf.keras.layers.Layer):
    def __init__(self, normalized_length):
        super(Preprocess, self).__init__()
        self.normalized_length = normalized_length

    def call(self, frames):
        frames = frames[:, :, :2] #drop z axis

        # Hands
        lh_frames, rh_frames = frames[:, 468:489], frames[:, 522:]

        lh_frames_x, lh_frames_y = lh_frames[:, :, 0], lh_frames[:, :, 1]
        rh_frames_x, rh_frames_y = rh_frames[:, :, 0], rh_frames[:, :, 1]
        lh_frames, rh_frames = tf.stack([lh_frames_x, 1-lh_frames_y], axis=-1), tf.stack([1-rh_frames_x, 1-rh_frames_y], axis=-1)

        hand = tf.stack([lh_frames, rh_frames], axis=0)
        hand = tf.where(tf.math.is_nan(hand), tf.zeros_like(tf.math.is_nan(hand), dtype=tf.float32), hand)
        hand = tf.reduce_sum(hand, axis=0)

        handsNanMask = tf.cast(tf.reduce_sum(hand, axis=[1, 2]), tf.bool) ## drops timestep having no hand data
        hand = tf.boolean_mask(hand, handsNanMask, axis=0)

        # Pose
        # pose = frames[:, 489:522]

        # Lips
        lips = tf.gather(frames, lips_idx, axis=1)
        lips = tf.boolean_mask(lips, handsNanMask, axis=0)
        lips = tf.where(tf.math.is_nan(lips), tf_nan_mean(lips), lips)
        lips = tf.where(tf.math.is_nan(lips), tf.zeros_like(lips), lips)

        # return tfkl.Flatten()(tf.concat([hand, lips], axis=1))

        # Time reduction ?
        raw_data = tf.concat([hand, lips], axis=1)
        source_length = tf.shape(raw_data)[0]
        normalized_idx = tf.linspace(0.0, tf.cast(source_length-1, tf.float32), self.normalized_length+1)
        # normalized_idx = tf.cast(tf.round(normalized_idx, tf.int32), tf.int32)
        normalized_idx = tf.cast(normalized_idx, tf.int32)

        sampled = list()
        for idx in range(self.normalized_length):
            start, end = normalized_idx[idx], normalized_idx[idx+1]
            # end = end + tf.cast(tf.equal(start, end), tf.int32) # avoid null length
            sample = raw_data[start:end]
            # sample = tf.concat([tf_nan_mean(sample), tf_nan_std(sample)], axis=1) #changer ordre ici
            sample = tf_nan_mean(sample)
            sampled.append(sample)
        sampled = tf.concat(sampled, axis=0)

        #tmp
        sampled = tfkl.Flatten()(sampled)

        sampled = tf.where(tf.math.is_nan(sampled), tf.zeros_like(tf.math.is_nan(sampled), dtype=tf.float32), sampled)
        sampledNanMask = tf.cast(tf.reduce_sum(sampled, axis=1), tf.bool)
        sampled = tf.boolean_mask(sampled, sampledNanMask, axis=0)
        return sampled

models

In [5]:
from keras_nlp.layers.transformer_encoder import TransformerEncoder
# from keras_nlp.layers import SinePositionEncoding
import tensorflow as tf
tfkl = tf.keras.layers

def get_model(
        hp, 
        input_shape=(10, (2*(21 + 40)))
        ):
    
    inputs = tf.keras.Input(input_shape, dtype=tf.float32)
    
    vector = tfkl.Bidirectional(tfkl.GRU(hp['gru1'], return_sequences=True))(inputs) #hp['gru1']=96 first test
    vector = tfkl.BatchNormalization()(vector)
    vector = tfkl.Activation('gelu')(vector)
    
    for _ in range(1):
        vector = TransformerEncoder(intermediate_dim=hp['ff_dim'], num_heads=hp['nhead'], dropout=hp['input_dropout'])(vector) #hp['input_dropout']=0.3 first test
        #hp['nhead']=12, hp['ff_dim']=160
    vector = tfkl.Bidirectional(tfkl.GRU(hp['gru2']))(vector) #hp['gru2']=96 first test
    vector = tfkl.Dropout(hp['output_dropout'])(vector) #hp['output_dropout']=0.2 first test

    output = tfkl.Dense(250, activation="softmax")(vector)
    model = tf.keras.Model(inputs=inputs, outputs=output)
    return model

rest

In [6]:
folds = 3
path = "preprocessed_datasets/testkaggle/"

ds = get_KFold_dataset(Preprocess(normalized_length), (None, 122), path, folds) #tmp
hp = {
    "gru1": 80,
    'nhead': 16,
    'ff_dim': 192,
    'input_dropout': 0.2,
    'gru2': 128,
    'output_dropout': 0.3
}

get_model(hp).summary()

Dataset will be saved at path preprocessed_datasets/testkaggle/full_cv.tfrecord
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Cause: could not parse the source code of <function get_KFold_dataset.<locals>.<lambda> at 0x103c141f0>: found multiple definitions with identical signatures at the location. This error may be avoided by defining each lambda on a single line and with unique argument names. The matching definitions were:
Match 0:
lambda x: tf.ensure_shape(x, shape)

Match 1:
lambda x: tf.ensure_shape(x, (None, 543, 3))

Cause: could not parse the source code of <function get_KFold_dataset.<locals>.<lambda> at 0x103c141f0>: found multiple definitions with identical signatures at the location. This error may be avoided by defining each lambda on a single line and with unique argument names. The matching definitions were:
Match 0:

2023-04-02 16:01:54.660810: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 122)]         0         
                                                                 
 bidirectional (Bidirectiona  (None, 10, 160)          97920     
 l)                                                              
                                                                 
 batch_normalization (BatchN  (None, 10, 160)          640       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 10, 160)           0         
                                                                 
 transformer_encoder (Transf  (None, 10, 160)          165472    
 ormerEncoder)                                                   
                                                             

In [9]:
from tqdm.keras import TqdmCallback

val_accs = list()  
for fold_idx in range(folds):
    start = time.time()
    print(f'\n            *** Fold {fold_idx} ***\n')
    train_ds = ds.filter(lambda v, l, g, pid, sid: g != fold_idx).map(detuple).padded_batch(16)
    valid_ds = ds.filter(lambda v, l, g, pid, sid: g == fold_idx).map(detuple).padded_batch(16)
    model = get_model(hp)

    lr = 1e-3

    model.compile(
        tf.keras.optimizers.Adam(learning_rate=lr),
        tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        metrics=[
            tf.keras.metrics.SparseCategoricalAccuracy(),
        ]
    )
    
    hist = model.fit(
        x=train_ds,
        epochs=100,
        verbose=1,
       callbacks=[
            #TqdmCallback(verbose=0),
            #TimeLimitCallback(start, 1, 10),
            TimeLimitCallback(start, 2, 30),
            tf.keras.callbacks.LearningRateScheduler(scheduler),
            tf.keras.callbacks.ModelCheckpoint(pjoin(path.split('/')[0], f"model_{fold_idx}"), 
                save_best_only=True, 
                save_weights_only=True,
                restore_best_weights=True, 
                monitor="val_sparse_categorical_accuracy", mode="max"),
            tf.keras.callbacks.EarlyStopping(patience=10, monitor="val_sparse_categorical_accuracy", mode="max", restore_best_weights=True)
            ],
        validation_data=valid_ds,
        validation_freq=1,
        workers=2,
        use_multiprocessing=True
    )

    best_acc = max(hist.history['val_sparse_categorical_accuracy'])
    print("Best acc fold", fold_idx, ":\n ->", 100*round(best_acc, 4), "%")
    val_accs.append(
        best_acc
    )
    # break

print("Bagged final valid acc score:")
bagged_score = 100*np.round(np.array(val_accs).mean(), 4)
print(bagged_score, "%")


            *** Fold 0 ***

Epoch 1/100
Epoch 2/100
 388/3899 [=>............................] - ETA: 55s - loss: 2.7203 - sparse_categorical_accuracy: 0.3487

In [None]:
del df
#del train_ds
#del valid_ds
#del ds
#del model
gc.collect()

In [None]:
def get_inference_model():
    inputs = tf.keras.Input((543, 3), dtype=tf.float32, name="inputs")
    
    frames = inputs[:, :, :2] #drop z axis

    # Hands
    lh_frames, rh_frames = frames[:, 468:489], frames[:, 522:]

    lh_frames_x, lh_frames_y = lh_frames[:, :, 0], lh_frames[:, :, 1]
    rh_frames_x, rh_frames_y = rh_frames[:, :, 0], rh_frames[:, :, 1]
    lh_frames, rh_frames = tf.stack([lh_frames_x, 1-lh_frames_y], axis=-1), tf.stack([1-rh_frames_x, 1-rh_frames_y], axis=-1)

    hand = tf.stack([lh_frames, rh_frames], axis=0)
    hand = tf.where(tf.math.is_nan(hand), tf.zeros_like(tf.math.is_nan(hand), dtype=tf.float32), hand)
    hand = tf.reduce_sum(hand, axis=0)

    handsNanMask = tf.cast(tf.reduce_sum(hand, axis=[1, 2]), tf.bool) ## drops timestep having no hand data
    hand = tf.boolean_mask(hand, handsNanMask, axis=0)

    # Pose
    # pose = frames[:, 489:522]

    # Lips
    lips = tf.gather(frames, lips_idx, axis=1)
    lips = tf.boolean_mask(lips, handsNanMask, axis=0)
    lips = tf.where(tf.math.is_nan(lips), tf_nan_mean(lips), lips)
    lips = tf.where(tf.math.is_nan(lips), tf.zeros_like(lips), lips)

    # return tfkl.Flatten()(tf.concat([hand, lips], axis=1))

    # Time reduction ?
    raw_data = tf.concat([hand, lips], axis=1)
    source_length = tf.shape(raw_data)[0]
    normalized_idx = tf.linspace(0.0, tf.cast(source_length-1, tf.float32), normalized_length+1)
    # normalized_idx = tf.cast(tf.round(normalized_idx, tf.int32), tf.int32)
    normalized_idx = tf.cast(normalized_idx, tf.int32)

    sampled = list()
    for e,idx in enumerate(range(normalized_length)):
        start, end = normalized_idx[idx], normalized_idx[idx+1]
        if e==0:
            end = end + tf.cast(tf.equal(start, end), tf.int32) # avoid null length
        sample = raw_data[start:end]
        # sample = tf.concat([tf_nan_mean(sample), tf_nan_std(sample)], axis=1) #changer ordre ici
        sample = tf_nan_mean(sample)
        sampled.append(sample)
    sampled = tf.concat(sampled, axis=0)

    #tmp
    sampled = tfkl.Flatten()(sampled)

    sampled = tf.where(tf.math.is_nan(sampled), tf.zeros_like(tf.math.is_nan(sampled), dtype=tf.float32), sampled)
    sampledNanMask = tf.cast(tf.reduce_sum(sampled, axis=1), tf.bool)
    sampled = tf.boolean_mask(sampled, sampledNanMask, axis=0)
    
    pad_length = 10 - tf.shape(sampled)[0]
    zero = tf.zeros_like(sampled[:1])
    pad = tf.repeat(zero, pad_length, axis=0)
    sampled = tf.concat([sampled, pad], axis=0)
    
    diffs = tf.expand_dims(sampled, 0)
    #diffs = next(iter(tf.data.Dataset.from_tensor_slices(diffs).padded_batch(1, (10, 122))))

    models = [get_model(hp) for _ in range(folds)]
    #for fold_idx in range(folds):
    #    models[fold_idx].load_weights(pjoin(path.split('/')[0], f"model_{fold_idx}"))
    outputs = [model(diffs) for model in models]
    vector = tf.reduce_mean(outputs, axis=0) #average the models

    output = tfkl.Activation(activation="linear", name="outputs")(vector)
    inference_model = tf.keras.Model(inputs=inputs, outputs=output) 
    inference_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
    return inference_model

inference_model = get_inference_model()
#inference_model.summary()
params = inference_model.count_params()
print(params/1e6)

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(inference_model)
tflite_model = converter.convert()
model_path = "model.tflite"
# Save the model.
with open(model_path, 'wb') as f:
    f.write(tflite_model)

In [None]:
!zip submission.zip $model_path

In [None]:
BASE_DIR = "/kaggle/input/asl-signs/"
train_df = pd.read_csv(pjoin(BASE_DIR, "train.csv"))

path2label = dict(zip(train_df.path, train_df.sign))
label2int = json.load(open(pjoin(BASE_DIR, "sign_to_prediction_index_map.json"), 'rb'))

int2label = {v:k for k,v in label2int.items()}

import tflite_runtime.interpreter as tflite
interpreter = tflite.Interpreter(model_path)
found_signatures = list(interpreter.get_signature_list().keys())
prediction_fn = interpreter.get_signature_runner("serving_default")
#for i in range(len(train_df)):
for i in range(50):
    frames = load_relevant_data_subset(f'/kaggle/input/asl-signs/{train_df.iloc[i].path}')
    output = prediction_fn(inputs=frames)
    sign = np.argmax(output["outputs"])
    print(f"Predicted label: {int2label[sign]}, Actual Label: {train_df.iloc[i].sign} (shape {frames.shape})")