This is the notebook for GRU implementation.


In [1]:
try:
    import ipdb
except:
    !pip install -q --upgrade tensorflow-io
    !pip install tflite-runtime
    !pip install ipdb
    !pip install wandb
    !pip install hydra-core
    !mkdir models
    !cp -r /kaggle/input/gislr-extended-train-dataframe/extended_train.csv ./
    !cp -r /kaggle/input/asl-signs/train_landmark_files/16069/1004211348.parquet .

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.11.0 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible.
tensorflow-transform 1.12.0 requires pyarrow<7,>=6, but you have pyarrow 5.0.0 which is incompatible.
tensorflow-serving-api 2.11.0 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible.[0m[31m
[0mCollecting tflite-runtime
  Downloading tflite_runtime-2.11.0-cp37-cp37m-manylinux2014_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: tflite-runtime
Successfully installed tflite-runtime-2.11.0
[0mCollecting ipdb
  Downloading ipdb-0.13.13-py3-none-any.whl (12 kB)
Installing collected packages: ipdb
Successfully installed ipdb-0.13.13
[0mCollec

In [2]:
%%writefile common_func.py

import json
import os
import warnings

from sklearn import metrics

warnings.filterwarnings("ignore")
os.environ["TF_DETERMINISTIC_OPS"] = "1"
os.environ["TF_CUDNN_DETERMINISTIC"] = "1"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import random
from pathlib import Path
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
from sklearn.model_selection import StratifiedGroupKFold
from tqdm.notebook import tqdm
from wandb.keras import WandbCallback, WandbMetricsLogger

SAVE_DIR = "./models/"
# SAVE_DIR = "/kaggle/working/models/"
if Path("/kaggle/input/asl-signs/").exists():
    DATA_DIR = "/kaggle/input/asl-signs/"
    ROOT_PATH = "/kaggle/input/islr-external-data/"
    CSV_PATH = "./"
else:
    DATA_DIR = "/scratch/smart_data/islr_data/"
    ROOT_PATH = "/scratch/smart_data/"
    CSV_PATH = "/scratch/smart_data/islr_data/"

PARQ_PATH = "/kaggle/input/asl-signs/"
NUMPY_PATH = "/scratch/smart_data/numpy_files/"
LANDMARK_FILES_DIR = f"{ROOT_PATH}train_landmark_files"
TRAIN_FILE = f"{CSV_PATH}extended_new.csv"

ROWS_PER_FRAME =543

# Data Generation ###################################################################################

def tf_nan_mean(x, axis=0):
    return tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), x), axis=axis) / tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), tf.ones_like(x)), axis=axis)

def tf_nan_std(x, axis=0):
    d = x - tf_nan_mean(x, axis=axis)
    return tf.math.sqrt(tf_nan_mean(d * d, axis=axis))

def flatten_means_and_stds(x, axis=0):
    # Get means and stds
    x_mean = tf_nan_mean(x, axis=0)
    x_std  = tf_nan_std(x,  axis=0)

    x_out = tf.concat([x_mean, x_std], axis=0)
    x_out = tf.reshape(x_out, (1, INPUT_SHAPE[1]*2))
    x_out = tf.where(tf.math.is_finite(x_out), x_out, tf.zeros_like(x_out))
    return x_out

class FeatureGen(tf.keras.layers.Layer):
    def __init__(self):
        super(FeatureGen, self).__init__()
    
    def call(self, x_in):
#         print(right_hand_percentage(x))
#         x_list = [tf.expand_dims(tf_nan_mean(x_in[:, av_set[0]:av_set[0]+av_set[1], :], axis=1), axis=1) for av_set in averaging_sets]
#         x_list.append(tf.gather(x_in, point_landmarks, axis=1))
#         x = tf.concat(x_list, 1)
        x = tf.gather(x_in, point_landmarks, axis=1)

        x_padded = x
        for i in range(SEGMENTS):
            p0 = tf.where( ((tf.shape(x_padded)[0] % SEGMENTS) > 0) & ((i % 2) != 0) , 1, 0)
            p1 = tf.where( ((tf.shape(x_padded)[0] % SEGMENTS) > 0) & ((i % 2) == 0) , 1, 0)
            paddings = [[p0, p1], [0, 0], [0, 0]]
            x_padded = tf.pad(x_padded, paddings, mode="SYMMETRIC")
        x_list = tf.split(x_padded, SEGMENTS)
        x_list = [flatten_means_and_stds(_x, axis=0) for _x in x_list]

        x_list.append(flatten_means_and_stds(x, axis=0))
        
        ## Resize only dimension 0. Resize can't handle nan, so replace nan with that dimension's avg value to reduce impact.
        x = tf.image.resize(tf.where(tf.math.is_finite(x), x, tf_nan_mean(x, axis=0)), [NUM_FRAMES, LANDMARKS])
        x = tf.reshape(x, (1, INPUT_SHAPE[0]*INPUT_SHAPE[1]))
        x = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)
        x_list.append(x)
        x = tf.concat(x_list, axis=1)
        return x

def convert_row(row, right_handed=True):
    x = load_relevant_data_subset(os.path.join("/kaggle/input/asl-signs", row[1].path))
    x = feature_converter(tf.convert_to_tensor(x)).cpu().numpy()
    return x, row[1].label

def convert_and_save_data():
    df = pd.read_csv(TRAIN_FILE)
    df['label'] = df['sign'].map(label_map)
    total = df.shape[0]
    if QUICK_TEST:
        total = QUICK_LIMIT
    npdata = np.zeros((total, INPUT_SHAPE[0]*INPUT_SHAPE[1] + (SEGMENTS+1)*INPUT_SHAPE[1]*2))
    nplabels = np.zeros(total)
    for i, row in tqdm(enumerate(df.iterrows()), total=total):
        (x,y) = convert_row(row)
        npdata[i,:] = x
        nplabels[i] = y
        if QUICK_TEST and i == QUICK_LIMIT - 1:
            break
    
    np.save("feature_data.npy", npdata)
    np.save("feature_labels.npy", nplabels)
    

def right_hand_percentage(x):
    right = tf.gather(x, right_hand_landmarks, axis=1)
    left = tf.gather(x, left_hand_landmarks, axis=1)
    right_count = tf.reduce_sum(tf.where(tf.math.is_nan(right), tf.zeros_like(right), tf.ones_like(right)))
    left_count = tf.reduce_sum(tf.where(tf.math.is_nan(left), tf.zeros_like(left), tf.ones_like(left)))
    return right_count / (left_count+right_count)


# Data Functions ###################################################################################


def prepare_main_csv(seed, num_splits, csv_path=CSV_PATH):
    if Path(f"{csv_path}/extended_new.csv").exists():
        data_csv = pd.read_csv(f"{csv_path}/extended_new.csv").reset_index(drop=True)
    else:
        data_csv = pd.read_csv(f"{csv_path}/extended_train.csv").reset_index(drop=True)

        json_data = read_json_file()
        json_df = pd.DataFrame.from_dict(json_data, orient="index")
        json_df = json_df.reset_index()
        json_df.rename(columns={"index": "sign", 0: "sign_val"}, inplace=True)

        data_csv = pd.merge(data_csv, json_df, on="sign")
        data_csv = data_csv.sample(frac=1.0, random_state=seed).reset_index(drop=True)
        data_csv["hand"] = data_csv["participant_id"]
        data_csv = data_csv.replace({"hand": di})
        data_csv["fold_split"] = (
            data_csv["hand"].astype("str") + "_" + data_csv["sign_val"].astype("str")
        )

        # Splitting the data based on (hand & sign), grouped by participant_id
        skf = StratifiedGroupKFold(n_splits=num_splits)
        data_csv["fold"] = -1
        print("Splitting Data ------------------------------------>")
        for i, (train_index, test_index) in enumerate(
            skf.split(data_csv.index, data_csv.fold_split, data_csv.participant_id)
        ):
            data_csv.loc[test_index, "fold"] = i
            print(f"fold {i} --> {len(test_index)}")
            print(data_csv.loc[test_index].participant_id.value_counts())
        
        data_csv.to_csv(f"{csv_path}/extended_new.csv", index=False)

    return data_csv


def get_data(fold_num, cfg):
    # Data Loading
    print("Data Loading ----->")
    # train_x_full = np.load(f"{ROOT_PATH}23_nonorm_feature_data.npy").astype(np.float32)
    # train_y_full = np.load(f"{ROOT_PATH}23_nonorm_feature_labels.npy").astype(np.uint8)
    train_x_full = np.load(f"{ROOT_PATH}feature_data.npy").astype(np.float32)
    train_y_full = np.load(f"{ROOT_PATH}feature_labels.npy").astype(np.uint8)

    print(train_x_full.shape, train_y_full.shape)

    if cfg['FLAG_DROP_Z']:
        train_x_full = np.reshape(train_x_full, [train_x_full.shape[0], -1, 3])
        train_x_full = train_x_full[:, :, 0:2]
        train_x_full = np.reshape(train_x_full, [train_x_full.shape[0], -1])
        print(train_x_full.shape, train_y_full.shape)

    # Remove it with stratifiedkfold
    train_df = prepare_main_csv(cfg['SEED'], cfg['NUM_SPLITS'])
    train_idxs = train_df.index[train_df.fold != fold_num].to_numpy()
    val_idxs = train_df.index[train_df.fold == fold_num].to_numpy()

    train_x, train_y = train_x_full[train_idxs], train_y_full[train_idxs]
    val_x, val_y = train_x_full[val_idxs], train_y_full[val_idxs]

    del train_x_full, train_y_full, train_idxs, val_idxs

    print(train_df[train_df.sequence_id == 1004211348])
    return train_df, train_x, train_y, val_x, val_y


# Utils ###################################################################################

def get_input_shape(num_frames, landmarks, flag_drop_z):
    input_shape = (num_frames, landmarks * 3)

    if flag_drop_z:
        num_coords = 2
    else:
        num_coords = 3

    return (num_frames, landmarks * num_coords)


def seed_it_all(seed=42):
    """Attempt to be Reproducible"""
    tf.keras.backend.clear_session()
    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.keras.utils.set_random_seed(seed)
    tf.config.experimental.enable_op_determinism()


def read_json_file(file_path=f"{DATA_DIR}/sign_to_prediction_index_map.json"):
    with open(file_path, "r") as file:
        json_data = json.load(file)
    return json_data


def load_relevant_data_subset(pq_path):
    data_columns = ["x", "y", "z"]
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)


def load_npz(f):
    data = np.load(f)
    return data["data"]


def uniform_soup():
    soups = []
    ## Instantiating model

    tf.keras.backend.clear_session()
    model = get_model()
    model_paths = Path(SAVE_DIR).glob("*.h5")

    ## Iterating Over all models
    for path in tqdm(model_paths):
        ## loading model wieghts
        print(f"### Loading {path}")
        model.load_weights(str(path))

        ## Adding model weights in soup list
        soup = [np.array(weights) for weights in model.weights]
        soups.append(soup)

    ## Averaing all weights
    mean_soup = np.array(soups).mean(axis=0)

    ## Replacing model's weight with Unifrom Soup Weights
    for w1, w2 in zip(model.weights, mean_soup):
        tf.keras.backend.set_value(w1, w2)

    model.save_weights(f"{SAVE_DIR}/uniform_soup.h5")


class TFLiteModel(tf.Module):
    """
    TensorFlow Lite model that takes input tensors and applies:
        – a preprocessing model
        – the ASL model
    """

    def __init__(self, asl_model):
        """
        Initializes the TFLiteModel with the specified feature generation model and main model.
        """
        super(TFLiteModel, self).__init__()

        # Load the feature generation and main models
        self.prep_inputs = FeatureGen()
        self.asl_model = asl_model

    @tf.function(
        input_signature=[
            tf.TensorSpec(shape=[None, 543, 3], dtype=tf.float32, name="inputs")
        ]
    )
    def __call__(self, inputs):
        """
        Applies the feature generation model and main model to the input tensors.

        Args:
            inputs: Input tensor with shape [batch_size, 543, 3].

        Returns:
            A dictionary with a single key 'outputs' and corresponding output tensor.
        """
        x = self.prep_inputs(tf.cast(inputs, dtype=tf.float32))
        outputs = self.asl_model(x)[0, :]

        # Return a dictionary with the output tensor
        return {"outputs": outputs}


# Metrics ###################################################################################

label_ls = list(range(0, 250))
AVG_TYPE = "weighted"


def get_metrics(labels, preds_class):
    metric_dict = {
        "accuracy": metrics.accuracy_score(labels, preds_class),
        "f1_score": metrics.f1_score(
            labels, preds_class, labels=label_ls, zero_division=0, average=AVG_TYPE
        ),
        "precision": metrics.precision_score(
            labels, preds_class, labels=label_ls, zero_division=0, average=AVG_TYPE
        ),
        "recall": metrics.recall_score(
            labels, preds_class, labels=label_ls, zero_division=0, average=AVG_TYPE
        ),
        # 'roc': metrics.roc_auc_score(labels, preds_softmax, average='macro', multi_class='ovo', labels=label_ls),
    }
    for k, v in metric_dict.items():
        print(k, v)


def compute_evaluation_metrics(model, data_x, data_y, decoder):
    """
    Computes the evaluation metrics for the given model on the given data and prints classwise confusion matrix.

    Args:
    - model: The trained model to evaluate.
    - data_x: The input data to evaluate the model on.
    - data_y: The target data to evaluate the model on.
    - decoder: A function to decode the model's output into readable text.
    """
    # Compute the predicted classes and confusion matrix
    batch_size = 1024
    y_pred = model.predict(data_x, batch_size=1024)
    print(y_pred.shape)
    y_pred_classes = tf.cast(np.argmax(y_pred, axis=1), tf.uint8)
    confusion_mtx = tf.math.confusion_matrix(data_y, y_pred_classes)

    # Compute the evaluation metrics by class
    num_classes = confusion_mtx.shape[0]
    classwise_performance = {}
    for i in range(num_classes):
        tp = confusion_mtx[i, i]
        fp = tf.reduce_sum(confusion_mtx[:, i]) - tp
        fn = tf.reduce_sum(confusion_mtx[i, :]) - tp
        tn = tf.reduce_sum(confusion_mtx[i]) - (tp - fp - fn)

        classwise_performance[i] = dict(
            accuracy=(tp + tn) / (tp + fp + tn + fn),
            precision=tp / (tp + fp),
            recall=tp / (tp + fn),
        )
        classwise_performance[i]["f1_score"] = (
            2
            * (
                classwise_performance[i]["precision"]
                * classwise_performance[i]["recall"]
            )
            / (
                classwise_performance[i]["precision"]
                + classwise_performance[i]["recall"]
            )
        )

    # Sort the classwise performance by f1_score and print the results
    classwise_performance = dict(
        sorted(
            classwise_performance.items(), key=lambda x: x[1]["f1_score"], reverse=True
        )
    )
    print("\n\n... CLASSWISE CONFUSION MATRIX... \n")
    for i, perf in classwise_performance.items():
        print(
            f"Class {i:<3}  ({decoder[i]:^13})  -->  Accuracy: {perf['accuracy']:.2f}, Precision: {perf['precision']:.2f}, Recall: {perf['recall']:.2f}, F1 Score: {perf['f1_score']:.2f}"
        )


# Model Utils ################################################################################

output_bias = tf.keras.initializers.Constant(1.0 / 250.0)


class MSD(tf.keras.layers.Layer):
    def __init__(
        self,
        units,
        fold_num,
        cfg,
        **kwargs,
    ):
        super().__init__(**kwargs)

        self.lin = tf.keras.layers.Dense(
            units,
            activation=None,
            use_bias=True,
            bias_initializer=output_bias,
            # kernel_regularizer=R.l2(WEIGHT_REGULARIZE)
        )

        rate_dropout = cfg["MSD_DROPOUT"]
        if cfg["MSD_DROP_TYPE"] == "normal":
            self.dropouts = [
                tf.keras.layers.Dropout((rate_dropout - 0.2), seed=135 + fold_num),
                tf.keras.layers.Dropout((rate_dropout - 0.1), seed=690 + fold_num),
                tf.keras.layers.Dropout((rate_dropout), seed=275 + fold_num),
                tf.keras.layers.Dropout((rate_dropout + 0.1), seed=348 + fold_num),
                tf.keras.layers.Dropout((rate_dropout + 0.2), seed=861 + fold_num),
            ]

        elif cfg["MSD_DROP_TYPE"] == "gaussian":
            self.dropouts = [
                tf.keras.layers.GaussianDropout((rate_dropout - 0.2)),
                tf.keras.layers.GaussianDropout((rate_dropout - 0.1)),
                tf.keras.layers.GaussianDropout(rate_dropout),
                tf.keras.layers.GaussianDropout((rate_dropout + 0.1)),
                tf.keras.layers.GaussianDropout((rate_dropout + 0.2)),
            ]

    def call(self, inputs):
        for ii, drop in enumerate(self.dropouts):
            if ii == 0:
                out = self.lin(drop(inputs)) / 5.0
            else:
                out += self.lin(drop(inputs)) / 5.0
        return out


class ResidualBlock(tf.keras.layers.Layer):
    def __init__(self, units, dropout):
        super().__init__()
        self.linear = tf.keras.layers.Dense(units)
        self.bn = tf.keras.layers.BatchNormalization()
        self.act = tf.keras.layers.Activation("gelu")
        if dropout != 0:
            self.drop = tf.keras.layers.Dropout(dropout)
            self.flag_use_drop = True
        else:
            self.flag_use_drop = False

    def call(self, x):
        x = self.linear(x)
        x = self.bn(x)
        x = self.act(x)
        if self.flag_use_drop:
            x = self.drop(x)
        return x


class GRUModel(tf.keras.layers.Layer):
    def __init__(self, units, dropout, num_blocks):
        super().__init__()
        self.start_gru = tf.keras.layers.GRU(
            units=units, dropout=0.0, return_sequences=True
        )
        self.end_gru = tf.keras.layers.GRU(
            units=units, dropout=dropout, return_sequences=False
        )

        if (num_blocks - 2) > 0:
            self.gru_blocks = [
                tf.keras.layers.GRU(units=units, dropout=dropout, return_sequences=True)
                * (num_blocks - 2)
            ]
            self.flag_use_gru_blocks = True
        else:
            self.flag_use_gru_blocks = False

    def call(self, x):
        x = self.start_gru(x)
        if self.flag_use_gru_blocks:
            for blk in self.gru_blocks:
                x = blk(x)
        x = self.end_gru(x)
        return x


def model_utils(cfg, fold_num):
    metric_ls = [
        tf.keras.metrics.SparseCategoricalAccuracy(),
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5),
    ]

    cb_list = [
        tf.keras.callbacks.EarlyStopping(
            patience=5,
            restore_best_weights=True,
            verbose=1,
            monitor=cfg["TARGET_METRIC"],
        ),
        tf.keras.callbacks.ReduceLROnPlateau(patience=2, factor=0.8, verbose=1),
        tf.keras.callbacks.ModelCheckpoint(
            f"{SAVE_DIR}/best_acc_{fold_num}.h5",
            monitor=cfg["TARGET_METRIC"],
            verbose=0,
            save_best_only=True,
            save_weights_only=True,
            mode="max",
            save_freq="epoch",
        ),
    ]

    if cfg["FLAG_WANDB"]:
        cb_list += [#WandbMetricsLogger()
            WandbCallback(
                monitor=cfg["TARGET_METRIC"],
                log_weights=False,
                log_evaluation=False,
                save_model=False,
            )
        ]

    opt = tfa.optimizers.AdamW(weight_decay=0, learning_rate=cfg["LR"])
    # opt = tf.keras.optimizers.Adam(learning_rate=LR)
    # opt = tfa.optimizers.RectifiedAdam(learning_rate=LR)
    # opt = tfa.optimizers.Lookahead(opt, sync_period=5)

    return metric_ls, cb_list, opt


######################################################################################################################

lip_landmarks = [
    61,
    185,
    40,
    39,
    37,
    0,
    267,
    269,
    270,
    409,
    291,
    146,
    91,
    181,
    84,
    17,
    314,
    405,
    321,
    375,
    78,
    191,
    80,
    81,
    82,
    13,
    312,
    311,
    310,
    415,
    95,
    88,
    178,
    87,
    14,
    317,
    402,
    318,
    324,
    308,
]

# Analyzing Handedness
left_handed_signer = [
    16069,
    32319,
    36257,
    22343,
    27610,
    61333,
    34503,
    55372,
    37055,
]  # both_hands_signer-> 37055
right_handed_signer = [
    26734,
    28656,
    25571,
    62590,
    29302,
    49445,
    53618,
    18796,
    4718,
    2044,
    37779,
    30680,
]
lip_landmarks = [
    61,
    185,
    40,
    39,
    37,
    0,
    267,
    269,
    270,
    409,
    291,
    146,
    91,
    181,
    84,
    17,
    314,
    405,
    321,
    375,
    78,
    191,
    80,
    81,
    82,
    13,
    312,
    311,
    310,
    415,
    95,
    88,
    178,
    87,
    14,
    317,
    402,
    318,
    324,
    308,
]

di = {}
for k in left_handed_signer:
    di[k] = 0
for k in right_handed_signer:
    di[k] = 1

left_hand_landmarks = list(range(468, 468 + 21))
right_hand_landmarks = list(range(522, 522 + 21))

averaging_sets = [
    [0, 468],
    [489, 33],
]  ## average over the entire face, and the entire 'pose'

point_landmarks = [
    item
    for sublist in [lip_landmarks, left_hand_landmarks, right_hand_landmarks]
    for item in sublist
]

LANDMARKS = len(point_landmarks) #+ len(averaging_sets)

# Fixed  ##################################################################################

FLAG_DROP_Z = False
ROWS_PER_FRAME = 543
NUM_FRAMES = 15
INPUT_SHAPE = get_input_shape(NUM_FRAMES, LANDMARKS, FLAG_DROP_Z)
SEGMENTS = 3
NUM_BASE_FEATS = (SEGMENTS + 1) * INPUT_SHAPE[1] * 2
FLAT_FRAME_SHAPE = NUM_BASE_FEATS + (INPUT_SHAPE[0] * INPUT_SHAPE[1])
decoder = {v: k for k, v in read_json_file().items()}

Writing common_func.py


In [3]:
%%writefile trainer.py

import gc
import os
import pprint
import warnings
import wandb
import hydra
from omegaconf import DictConfig
from zipfile import ZipFile
try:
    import tflite_runtime.interpreter as tflite
    FLAG_INTERPRET = True
except:
    FLAG_INTERPRET = False
    print("TFlite Interpretation not possible")
warnings.filterwarnings("ignore")
os.environ["TF_DETERMINISTIC_OPS"] = "1"
os.environ["TF_CUDNN_DETERMINISTIC"] = "1"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import time
import numpy as np
import tensorflow as tf
from common_func import *
from tensorflow.keras.utils import plot_model

seed_it_all()
start_time = time.time()

# Flags  ##################################################################################
if False:
    mixed_precision.set_global_policy("mixed_float16")
    tf.config.optimizer.set_jit(True)

# Model  ###################################################################################


def get_model(
    cfg,
    fold_num=0,
    n_labels=250,
    flat_frame_len=FLAT_FRAME_SHAPE,
    flag_model_summary=False,
    flag_with_cb_list=False,
):
    print("Model Loading ----->")
    _inputs = tf.keras.layers.Input(shape=(flat_frame_len,))

    # import ipdb
    # ipdb.set_trace()
    x = _inputs[:, :NUM_BASE_FEATS]
    x_conv = tf.reshape(_inputs[:, NUM_BASE_FEATS:], (-1, NUM_FRAMES, INPUT_SHAPE[1]))

    # Concat Dilated Convolutions with actual data
    gru_out = GRUModel(
        cfg["NUM_GRU_UNITS"], cfg["RATE_GRU_DROPOUT"], cfg["NUM_GRU_BLOCKS"]
    )(x_conv)
    
    if cfg['FLAG_CONCAT_FEATS']:
        x = tf.keras.layers.concatenate([gru_out, x], axis=1)
    else:
        x = gru_out
    print("Concatenate Shape", x.shape)

    # Residual Block
    x = ResidualBlock(cfg["NUM_RESIDUAL_UNITS"], 0.25)(x)
    x += ResidualBlock(cfg["NUM_RESIDUAL_UNITS"], 0.0)(x)

    # Final output MSD Layer
    x = MSD(units=n_labels, fold_num=fold_num, cfg=cfg)(x)
    _outputs = tf.keras.layers.Softmax(dtype="float32")(x)

    # Build the model
    model = tf.keras.models.Model(inputs=_inputs, outputs=_outputs)
    metric_ls, cb_list, opt = model_utils(cfg, fold_num)
    model.compile(opt, "sparse_categorical_crossentropy", metrics=metric_ls)

    if flag_model_summary:
        model.summary()

    if flag_with_cb_list:
        return model, cb_list
    else:
        return model


def tflite_conversion(model):    
    # TFLite Conversion
    tflite_keras_model = TFLiteModel(model)
    demo_output = tflite_keras_model(load_relevant_data_subset('1004211348.parquet'))["outputs"]
    decoder[np.argmax(demo_output.numpy(), axis=-1)]

    keras_model_converter = tf.lite.TFLiteConverter.from_keras_model(tflite_keras_model)
    tflite_model = keras_model_converter.convert()
    
    tf_lite_model_path = f'{SAVE_DIR}/model.tflite'
    with open(tf_lite_model_path, 'wb') as f:
        f.write(tflite_model)
        
    ZipFile('submission.zip', mode='w').write(tf_lite_model_path)

    if FLAG_INTERPRET:
        interpreter = tflite.Interpreter(tf_lite_model_path)
        found_signatures = list(interpreter.get_signature_list().keys())
        prediction_fn = interpreter.get_signature_runner("serving_default")
        output = prediction_fn(inputs=load_relevant_data_subset('1004211348.parquet'))
        sign = np.argmax(output["outputs"])

        print("PRED : ", decoder[sign])
        # print("GT   : ", train_df.sign[0])

    
@hydra.main(version_base=None, config_path="./", config_name="config")
def my_app(cfg: DictConfig):
    print("*" * 75)
    config = dict(cfg["CFG"])
    seed_it_all(config["SEED"])
    config["FLAG_DROP_Z"] = FLAG_DROP_Z
    if config["FLAG_DEBUG"]:
        config["NUM_EPOCHS"] = 3
        config["FLAG_WANDB"] = config["FLAG_WANDB"] and False
    else:
        config["FLAG_WANDB"] = config["FLAG_WANDB"] and True

    pprint.pprint(config)    
    true = np.array([])
    oof = np.array([])
    for fold_cnt, fold_num in enumerate(
        range(config["FOLD_START"], config["FOLD_END"]+1)
    ):
        print("#" * 25)
        print(f"### Fold {fold_num}")
        if config["FLAG_WANDB"]:
            wandb.init(project="isle_analysis", group=config["DESCRIPTION"])

        seed_it_all(config["SEED"] + fold_num)
        train_df, train_x, train_y, val_x, val_y = get_data(
            fold_num, config
        )

        model, cb_list = get_model(
            config, fold_num, flag_with_cb_list=True, flag_model_summary=(fold_cnt == 0)
        )
        
        

        plot_model(model, expand_nested=False)

        history = model.fit(
            train_x,
            train_y,
            validation_data=(val_x, val_y),
            verbose=2,
            epochs=config["NUM_EPOCHS"],
            callbacks=cb_list,
            batch_size=config["BATCH_SIZE"],
            workers=8,
        )

        oof_p = model.predict(val_x, batch_size=config["BATCH_SIZE"], verbose=2)
        oof_p = np.argmax(oof_p, axis=1)
        true = np.concatenate([true, val_y])
        oof = np.concatenate([oof, oof_p])

        print("#" * 25)
        print(f"### Evaluation Metrics")
        model.evaluate(val_x, val_y)

        if fold_num == config["FOLD_END"]:
            compute_evaluation_metrics(model, val_x, val_y, decoder=decoder)
            
        del train_df, train_x, train_y, val_x, val_y
        gc.collect()

        if config["FLAG_WANDB"]:
            wandb.finish()
            
    # PRINT OVERALL RESULTS
    print("#" * 25)
    print(f"Overall Metrics")
    get_metrics(true, oof)
    tf.keras.backend.clear_session()

    if config["FLAG_GEN_TFLITE"]:
        tflite_conversion(model)
        
    
if __name__ == "__main__":
    cfg = my_app()
    print("Total Time: ", time.time() - start_time)

    # Dilated Convolutions
    # conv_1 = tf.keras.layers.Conv1D(5, 1, strides=1, activation='silu')(x_conv)
    # conv_3 = tf.keras.layers.Conv1D(5, 1, strides=3, activation='silu')(x_conv)
    # conv_5 = tf.keras.layers.Conv1D(5, 1, strides=5, activation='silu')(x_conv)
    # conv_15 = tf.keras.layers.Conv1D(5, 1, strides=15, activation='silu')(x_conv)
    # conv_out = tf.keras.layers.concatenate([conv_1, conv_3, conv_5, conv_15], axis=1)
    # conv_out = tf.reshape(conv_out, (-1, conv_out.shape[1] * conv_out.shape[2]))


Writing trainer.py


In [4]:
%%writefile config.yaml
CFG:
    # General Params  ##########################################################################

    DESCRIPTION: initial trials
    LR: 6e-4
    BATCH_SIZE: 512 # 512
    NUM_EPOCHS: 100
    NUM_SPLITS: 7
    TARGET_METRIC: "val_sparse_categorical_accuracy"

    FOLD_START: 1
    FOLD_END: 1
    
    SEED: 42
    
    # Flags  ##################################################################################
    
    FLAG_DROP_Z: False
    FLAG_GEN_TFLITE: True
#     FLAG_GEN_TFLITE: False
    
    FLAG_CONCAT_FEATS: False # Use mean and standard deviation information captured in the dataset


    FLAG_DEBUG: False
#     FLAG_WANDB: True
    FLAG_WANDB: False
    # FLAG_DEBUG: True

    # Model  ##################################################################################

    # NUM_RESIDUAL_UNITS: 128
    NUM_RESIDUAL_UNITS: 1024

    # NUM_GRU_UNITS: 128
    NUM_GRU_UNITS: 512
    RATE_GRU_DROPOUT: 0.5
    NUM_GRU_BLOCKS: 1 # 2 minimal value

    MSD_DROP_TYPE: "normal"
    MSD_DROPOUT: 0.5

Writing config.yaml


In [None]:
!python3 trainer.py

***************************************************************************
{'BATCH_SIZE': 512,
 'DESCRIPTION': 'initial trials',
 'FLAG_CONCAT_FEATS': False,
 'FLAG_DEBUG': False,
 'FLAG_DROP_Z': False,
 'FLAG_GEN_TFLITE': True,
 'FLAG_WANDB': False,
 'FOLD_END': 1,
 'FOLD_START': 1,
 'LR': 0.0006,
 'MSD_DROPOUT': 0.5,
 'MSD_DROP_TYPE': 'normal',
 'NUM_EPOCHS': 100,
 'NUM_GRU_BLOCKS': 1,
 'NUM_GRU_UNITS': 512,
 'NUM_RESIDUAL_UNITS': 1024,
 'NUM_SPLITS': 7,
 'RATE_GRU_DROPOUT': 0.5,
 'SEED': 42,
 'TARGET_METRIC': 'val_sparse_categorical_accuracy'}
#########################
### Fold 1
Data Loading ----->
(94477, 5658) (94477,)
Splitting Data ------------------------------------>
fold 0 --> 13920
49445    4968
22343    4677
27610    4275
Name: participant_id, dtype: int64
fold 1 --> 13417
36257    4896
53618    4656
25571    3865
Name: participant_id, dtype: int64
fold 2 --> 13072
16069    4848
29302    4722
18796    3502
Name: participant_id, dtype: int64
fold 3 --> 14026
61333    4900
