In [1]:
#!/usr/bin/env python
# coding: utf-8
import os
import datetime
import json
from tqdm import tqdm

import numpy as np
import pandas as pd

import tensorflow as tf
from sklearn.model_selection import train_test_split
from data_preprocess import get_key_frames_by_cluster

import joblib
import matplotlib.pyplot as plt

2023-03-10 12:56:22.610847: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-10 12:56:25.300932: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-03-10 12:56:29.263717: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/maxpow/miniconda3/envs/tf/lib/:/home/maxpow/miniconda3/envs/tf/lib/
2023-03-10 12:56:29.263803: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_pl

## Constants

In [19]:
# Hyperparams
mirror = True
MAX_FRAMES = 15
MAX_SEQ_LENGTH = MAX_FRAMES
N_PTS = 543
N_DIMS = 2
NUM_FEATURES = N_PTS*N_DIMS

START_FACE, END_FACE = (0, 468)
START_LHAND, END_LHAND = (468, 489)
START_POSE, END_POSE = (489, 522)
START_RHAND, END_RHAND = (522, 543)
LIPS_PTS = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308, 95, 88, 178, 87, 14, 317, 402, 318, 324, 146, 91, 181, 84, 17, 314, 405, 321, 375]

PATIENCE = 16
BATCH_SIZE = 128
EPOCHS = 500

LR_START = 0.001
REDUCE_LR_PATIENCE = 4
REDUCE_LR_FACTOR = 0.2

X_npy_fname = f'X-all-{MAX_FRAMES:02}_frames_key_resize_bilinear.npy'
y_npy_fname = f'y.npy'

COMP = os.environ.get('COMP_NAME', '?')
__file__ = os.path.abspath('')
MODEL_DIR = '/'.join((__file__).split('/')[:-1])
print(f'{MODEL_DIR=}')
METRIC_STR = '_xx_val_acc-'
MODEL_NAME = MODEL_DIR.split(METRIC_STR)[-1]

model_details = (
    f'{MODEL_NAME}'
    f'-key_frames_all_resize_bilinear'
    f'-{MAX_FRAMES:02}_frames'
    f'-{N_PTS}_pts_per_frame'
    f'-{N_DIMS}_dims'
    f'-mirror' if mirror else ''
    f'-{BATCH_SIZE}_batch_size'
)

DATA_ROOT = '../data/'
DF_TRAIN =  f'{DATA_ROOT}train.csv'

# train = pd.read_csv(f"{CFG.data_path}train.csv")
train = pd.read_csv(DF_TRAIN)
label_index = read_dict(f"{CFG.data_path}sign_to_prediction_index_map.json")
index_label = {label_index[key]: key for key in label_index}
train["label"] = train["sign"].map(lambda sign: label_index[sign])

MODEL_DIR='/home/maxpow/Workspace/kaggle-asl/kaggle-asl-signs'


## Data pre-processing

## Function Definitions

In [20]:
class CFG:
    data_path = DATA_ROOT
    quick_experiment = False
    is_training = True
    use_aggregation_dataset = True
    num_classes = 250
    rows_per_frame = 543 

def load_relevant_data_subset_with_imputation(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    data.replace(np.nan, 0, inplace=True)
    n_frames = int(len(data) / CFG.rows_per_frame)
    data = data.values.reshape(n_frames, CFG.rows_per_frame, len(data_columns))
    return data.astype(np.float32)

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / CFG.rows_per_frame)
    data = data.values.reshape(n_frames, CFG.rows_per_frame, len(data_columns))
    return data.astype(np.float32)

def read_dict(file_path):
    path = os.path.expanduser(file_path)
    with open(path, "r") as f:
        dic = json.load(f)
    return dic


## Start pre-processing here

In [None]:
if CFG.is_training:
    try:
        X = np.load(X_npy_fname)
        y = np.load(y_npy_fname)
    except:
        X = np.zeros((len(train), MAX_FRAMES, NUM_FEATURES))
        y = np.zeros((len(train),))
        for i in tqdm(range(len(train))):
            y[i] = train.iloc[i].label
            path = f'{CFG.data_path}{train.iloc[i].path}'
            data = load_relevant_data_subset_with_imputation(path)
            hands_mask = np.zeros(data.shape[1], dtype='bool')
            hands_mask[START_LHAND:END_LHAND] = True # LHAND
            hands_mask[START_RHAND:END_RHAND] = True # RHAND
            #
            ## Frame Aggregation
            data_key_frames = tf.image.resize(
                data[:,:,:N_DIMS],
                size=(MAX_FRAMES, N_PTS),
                method='bilinear', #DEFAULT
            )
            n_frames = data_key_frames.shape[0]
            columns = data_key_frames.shape[1]*data_key_frames.shape[2]
            data_resize = tf.reshape(
                tensor=data_key_frames,
                shape=(n_frames, columns),
            )
            X[i] = data_resize
            y[i] = train.iloc[i].label
        # Save number of frames of each training sample for data analysis
        np.save(X_npy_fname, X)
        np.save(y_npy_fname, y)

    print(X.shape, y.shape)

X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=0.2,
    random_state=27,
    stratify=y,
)

# Double the training data by mirroring the coordinates over the x-axis
if mirror:
    # Mirror x-axis of features
    X_temp = np.zeros(
        shape=(X_train.shape[0]*2, *X_train.shape[1:]),
    )
    X_temp[:X_train.shape[0]] = X_train
    X_temp[X_train.shape[0]:] = X_train
    X_temp[X_train.shape[0]:,:,0] *= -1 
    X_train = X_temp
    #
    y_temp = np.zeros(
        shape=(y_train.shape[0]*2,),
    )
    y_temp[:y_train.shape[0]] = y_train
    y_temp[y_train.shape[0]:] = y_train
    y_train = y_temp
    


## Model Definition

In [9]:
# # RNN Model 
# > https://keras.io/examples/vision/video_classification/

# Utility for our sequence model.
def get_sequence_model(max_frames: int, num_features: int):
    n_classes = 250

    frame_features_input = tf.keras.Input((max_frames, num_features))

    # Data's dimensions were flattened so need to get the relevant pieces
    input_lhand = tf.keras.layers.Lambda(
        lambda x: x[:, :, START_LHAND*N_DIMS:END_LHAND*N_DIMS],
        output_shape=(MAX_FRAMES, (END_LHAND - START_LHAND), N_DIMS),
    )(frame_features_input)
    input_rhand = tf.keras.layers.Lambda(
        lambda x: x[:, :, START_RHAND*N_DIMS:END_RHAND*N_DIMS],
        output_shape=(MAX_FRAMES, (END_RHAND - START_RHAND), N_DIMS),
    )(frame_features_input)
    input_lips = tf.keras.layers.Lambda(
        lambda x: tf.gather(x, LIPS_PTS, axis=2),
        output_shape=(MAX_FRAMES, len(LIPS_PTS), N_DIMS),
    )(frame_features_input)
    
    ## RNN

    ## lhand + lips
    concat_lhand = tf.keras.layers.Concatenate()([input_lhand, input_lips])
    l = tf.keras.layers.GRU(128, return_sequences=True)(concat_lhand)
    l = tf.keras.layers.GRU(64)(l)
    # FCN
    l = tf.keras.layers.Dense(256)(l)
    l = tf.keras.layers.BatchNormalization()(l)
    l = tf.keras.layers.Activation('relu')(l)
    l = tf.keras.layers.Dropout(0.2)(l)
    l = tf.keras.layers.Dense(128)(l)
    l = tf.keras.layers.BatchNormalization()(l)
    l = tf.keras.layers.Activation('relu')(l)
    l = tf.keras.layers.Dropout(0.2)(l)

    ## rhand
    concat_rhand = tf.keras.layers.Concatenate()([input_rhand, input_lips])
    r = tf.keras.layers.GRU(128, return_sequences=True)(concat_rhand)
    r = tf.keras.layers.GRU(64)(r)
    # FCN
    r = tf.keras.layers.Dense(256)(r)
    r = tf.keras.layers.BatchNormalization()(r)
    r = tf.keras.layers.Activation('relu')(r)
    r = tf.keras.layers.Dropout(0.2)(r)
    r = tf.keras.layers.Dense(128)(r)
    r = tf.keras.layers.BatchNormalization()(r)
    r = tf.keras.layers.Activation('relu')(r)
    r = tf.keras.layers.Dropout(0.2)(r)



    #
    concat_hands = tf.keras.layers.Concatenate()([l, r])
    x = tf.keras.layers.Dense(128)(concat_hands)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.Dropout(0.2)(x)
   

    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(64)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.Dropout(0.2)(x)

    x = tf.keras.layers.Flatten()(x)
    output = tf.keras.layers.Dense(n_classes, activation='softmax')(x)

    rnn_model = tf.keras.Model([frame_features_input,], output)

    rnn_model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=tf.keras.optimizers.Nadam(
            learning_rate=LR_START,
        ),
        metrics=['accuracy'],
    )
    return rnn_model


In [10]:
model = get_sequence_model(max_frames=MAX_FRAMES, num_features=NUM_FEATURES)
print(model.summary())

2023-03-10 13:04:06.622085: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-10 13:04:07.227940: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-10 13:04:07.228261: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-10 13:04:07.235676: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 15, 1086)]   0           []                               
                                                                                                  
 lambda (Lambda)                (None, 15, 42)       0           ['input_1[0][0]']                
                                                                                                  
 lambda_2 (Lambda)              (None, 15, 40)       0           ['input_1[0][0]']                
                                                                                                  
 lambda_1 (Lambda)              (None, 15, 42)       0           ['input_1[0][0]']                
                                                                                              

## Model Training

### Function definitions

In [None]:
def run_experiment(
    model,
    train_data,
    train_labels,
    validation_data = None,
    validation_split: int = 0.2,
    model_path: str = 'temp',
    epochs: int = 10,
    batch_size: int = 128,
    monitor_metric: str = 'val_accuracy',
    patience: int = 6,
):
    from Slack import SlackCallback
    slack_callback = SlackCallback(
        token=os.environ['SLACK_BOT_TOKEN'],
        start_message=(
            f'Model starting on {COMP=} for {EPOCHS=}\n'
            f'{model_details=}'
        ),
    )

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            model_path,
            save_weights_only=True,
            save_best_only=True,
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor=monitor_metric,
            patience=patience,
            restore_best_weights=True,
        ),
        tf.keras.callbacks.TensorBoard(
            log_dir=(
                'logs/fit/'
                + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
                + f'-{model_details}'
            ),
            histogram_freq=1,
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=REDUCE_LR_FACTOR,
            patience=REDUCE_LR_PATIENCE,
        ),
        slack_callback,
    ]
        
    fit_params = dict(
        x=train_data,
        y=train_labels,
        epochs=epochs,
        batch_size=batch_size,
    )
    if validation_data:
        fit_params['validation_data'] = validation_data
    else:
        fit_params['validation_split'] = validation_split

    history = model.fit(
        **fit_params,
        callbacks=callbacks,
    )

    return model, history

### Training starts here

In [None]:
# ## Experiment
model_details += f'-{model.count_params()}_model_params'
model, history = run_experiment(
    model=model,
    train_data=(X_train,),
    train_labels=y_train,
    validation_data=(X_val, y_val),
    model_path=f'{MODEL_DIR}/model-{model_details}.h5',
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    patience=PATIENCE,
)

joblib.dump(history, f'{MODEL_DIR}/history-{model_details}.gz')


# Plot Results
# summarize history for accuracy
fig, (ax_acc, ax_loss) = plt.subplots(ncols=2, figsize=(16,12))

ax_acc.plot(history.history['accuracy'])
ax_acc.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')

# summarize history for loss
ax_loss.plot(history.history['loss'])
ax_loss.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')

fig.savefig(f'{MODEL_DIR}/acc_loss-{model_details}.png')
results = model.evaluate(X_val, y_val, batch_size=BATCH_SIZE)
val_loss, val_acc = results
print(f'{val_loss=}')
print(f'{val_acc*100:2.0f}_val_acc')

## Generate TFlite Model for Submission

In [54]:
# Load model weights
model_weights_path = 'model-15_key_frames_all_resize_bilinear-rnn-gru_x2_per_hand-lhand_lipsFCN-rhand_lipsFCN-FCN-key_frames_all_resize_bilinear-15_frames-543_pts_per_frame-2_dims-mirror-401786_model_params.h5'
model.load_weights(model_weights_path)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 15, 1086)]   0           []                               
                                                                                                  
 lambda (Lambda)                (None, 15, 42)       0           ['input_1[0][0]']                
                                                                                                  
 lambda_2 (Lambda)              (None, 15, 40)       0           ['input_1[0][0]']                
                                                                                                  
 lambda_1 (Lambda)              (None, 15, 42)       0           ['input_1[0][0]']                
                                                                                              

In [100]:
class FeatureGen(tf.keras.layers.Layer):
    def __init__(self):
        super(FeatureGen, self).__init__()
    
    def call(self, x_in):
        """
        Given x_in of shape (n, 543, 3), return a new x which uses 15 key_frames so the shape would be (1, 15, 543*2)
        """
        N_PTS = 543
        N_DIMS = 2
        MAX_FRAMES = 15
        # x = tf.where(tf.math.is_nan(inputs), tf.zeros_like(inputs), inputs)
        x_in = tf.where(tf.math.is_nan(x_in), tf.zeros_like(x_in), x_in)
        
        data_key_frames = tf.image.resize(
            images=x_in[:, :, :N_DIMS],
            size=(MAX_FRAMES, N_PTS),
            method='bilinear',
        )
        
        n_frames = data_key_frames.shape[0]
        columns = data_key_frames.shape[1]*data_key_frames.shape[2]
        x_out = tf.reshape(
            tensor=data_key_frames,
            shape=(n_frames, columns),
        )
        
        return tf.expand_dims(x_out, axis=0)

print(FeatureGen()(load_relevant_data_subset(f'{CFG.data_path}{train.iloc[0].path}')))

tf.Tensor(
[[[ 0.49620017  0.38049233  0.4951877  ...  0.3944272   0.39395487
    0.3962465 ]
  [ 0.49900684  0.37970504  0.49199703 ...  0.3782073   0.43454036
    0.3804755 ]
  [ 0.50687355  0.37971362  0.49867222 ...  0.3828612   0.4434531
    0.38227323]
  ...
  [ 0.5368115   0.37657106  0.53786635 ...  0.          0.
    0.        ]
  [ 0.5357693   0.37589476  0.53675437 ...  0.          0.
    0.        ]
  [ 0.5362242   0.37557045  0.5357279  ...  0.4721193  -0.02332834
    0.45985666]]], shape=(1, 15, 1086), dtype=float32)


In [101]:
class TFLiteModel(tf.Module):
    """
    TensorFlow Lite model that takes input tensors and applies:
        – a preprocessing model
        – the ASL model 
    """

    def __init__(self, asl_model):
        """
        Initializes the TFLiteModel with the specified feature generation model and main model.
        """
        super(TFLiteModel, self).__init__()

        # Load the feature generation and main models
        self.prep_inputs = FeatureGen()
        self.asl_model   = asl_model
    
    @tf.function(input_signature=[tf.TensorSpec(shape=[None, 543, 3], dtype=tf.float32, name='inputs')])
    def __call__(self, inputs):
        """
        Applies the feature generation model and main model to the input tensors.

        Args:
            inputs: Input tensor with shape [num_frames, 543, 3].

        Returns:
            A dictionary with a single key 'outputs' and corresponding output tensor.
        """
        
        x = self.prep_inputs(tf.cast(inputs, dtype=tf.float32))
        outputs = self.asl_model(x)

        # Return a dictionary with the output tensor
        return {'outputs': outputs}


In [95]:
tflite_keras_model = TFLiteModel(asl_model=model)
prediction = tflite_keras_model(load_relevant_data_subset(f'{CFG.data_path}{train.iloc[0].path}'))["outputs"]

print(index_label[tf.argmax(prediction, axis=1).numpy()[0]])
print(f'{train.iloc[0].sign}')

(1, 15, 1086)
blow
blow


In [99]:
keras_model_converter = tf.lite.TFLiteConverter.from_keras_model(tflite_keras_model)
tflite_model = keras_model_converter.convert()

(1, 15, 1086)




INFO:tensorflow:Assets written to: /tmp/tmpdteiafun/assets


INFO:tensorflow:Assets written to: /tmp/tmpdteiafun/assets
2023-03-10 14:21:33.548512: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-03-10 14:21:33.548532: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-03-10 14:21:33.549033: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpdteiafun
2023-03-10 14:21:33.569074: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-03-10 14:21:33.569093: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpdteiafun
2023-03-10 14:21:33.665172: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2023-03-10 14:21:33.700031: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2023-03-10 14:21:33.869552: I tensorflow/cc/saved_model/loader.cc:213] Running initializatio

In [103]:
model_path = "model.tflite"
# Save the model.
with open(model_path, 'wb') as f:
    f.write(tflite_model)

In [106]:
import tflite_runtime.interpreter as tflite
interpreter = tflite.Interpreter(model_path)
found_signatures = list(interpreter.get_signature_list().keys())
prediction_fn = interpreter.get_signature_runner("serving_default")
for i in tqdm(range(100)):
    # frames = load_relevant_data_subset(f'/kaggle/input/asl-signs/{train.iloc[i].path}')
    frames = load_relevant_data_subset(f'{CFG.data_path}{train.iloc[i].path}')
    output = prediction_fn(inputs=frames)
    sign = np.argmax(output["outputs"])
    # print(f"Predicted label: {index_label[sign]}, Actual Label: {train.iloc[i].sign}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:11<00:00, 86.50it/s]


In [None]:
import zipfile
zipfile.ZipFile('submission.zip', mode='w').write('model.tflite')

In [None]:
with open('/kaggle/working/models/model.tflite', 'wb') as f:
    f.write(tflite_model)
!zip submission.zip /kaggle/working/models/model.tflite