[Reference](https://www.kaggle.com/code/ritvik1909/tabular-resnet)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers as L
from tensorflow_addons.activations import sparsemax

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold
import joblib

pd.options.display.max_columns = 300

data = pd.read_csv('./data/train.csv')
data = data.drop_duplicates(subset=data.columns[1:]).reset_index(drop=True)
print(data.shape)
data.head()

test = pd.read_csv('./data/test.csv')
print(test.shape)
X_test = test.drop(['row_id'], axis=1)

X = data.drop(['row_id', 'target'], axis=1)
y = pd.get_dummies(data['target'])

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

class DataConfig:
    def __init__(self, numeric_feature_names, categorical_features_with_vocabulary):
        self.NUMERIC_FEATURE_NAMES = numeric_feature_names
        self.CATEGORICAL_FEATURES_WITH_VOCABULARY = categorical_features_with_vocabulary
        self.CATEGORICAL_FEATURE_NAMES = list(self.CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())
        self.FEATURE_NAMES = self.NUMERIC_FEATURE_NAMES + self.CATEGORICAL_FEATURE_NAMES

from tensorflow.data import Dataset


class DataLoader:
    @classmethod
    def from_df(cls, X, y=None, batch_size=1024):
        return (
            Dataset.from_tensor_slices(({col: X[col].values.tolist() for col in X.columns}, y.values.tolist())).batch(
                batch_size
            )
            if y is not None
            else Dataset.from_tensor_slices({col: X[col].values.tolist() for col in X.columns}).batch(batch_size)
        )

def get_inputs(config):
    return {
        feature_name: L.Input(
            name=feature_name,
            shape=(),
            dtype=(tf.float32 if feature_name in config.NUMERIC_FEATURE_NAMES else tf.string),
        )
        for feature_name in config.FEATURE_NAMES
    }

def encode_inputs(inputs, config, use_embeddings=False, embedding_dim=32, prefix="", concat_features=False):
    cat_features = []
    num_features = []
    for feature_name in inputs:
        if feature_name in config.CATEGORICAL_FEATURE_NAMES:
            vocabulary = config.CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]
            lookup = L.StringLookup(
                vocabulary=vocabulary,
                mask_token=None,
                num_oov_indices=0,
                output_mode="int" if use_embeddings else "binary",
                name=f"{prefix}{feature_name}_lookup",
            )
            if use_embeddings:
                encoded_feature = lookup(inputs[feature_name])
                embedding = L.Embedding(
                    input_dim=len(vocabulary),
                    output_dim=embedding_dim,
                    name=f"{prefix}{feature_name}_embeddings",
                )
                encoded_feature = embedding(encoded_feature)
            else:
                encoded_feature = lookup(
                    L.Reshape((1,), name=f"{prefix}{feature_name}_reshape")(inputs[feature_name])
                )
            cat_features.append(encoded_feature)
        else:
            encoded_feature = L.Reshape((1,), name=f"{prefix}{feature_name}_reshape")(inputs[feature_name])
            num_features.append(encoded_feature)

    features = (
        L.Concatenate(name=f"{prefix}inputs_concatenate")(cat_features + num_features)
        if concat_features
        else (cat_features, num_features)
    )

    return features

class TabularResNetConfig:
    def __init__(
        self, num_outputs, out_activation, hidden_units, dropout_rate=0.3, use_embeddings=True, embedding_dim=32
    ):
        self.NUM_OUT = num_outputs
        self.OUT_ACTIVATION = out_activation
        self.HIDDEN_UNITS = hidden_units
        self.DROPOUT_RATE = dropout_rate
        self.USE_EMBEDDINGS = use_embeddings
        self.EMBEDDING_DIM = embedding_dim

def skip_connection(inputs, units, i, dropout_rate):
    inputs = L.Dense(units, name=f"block_{i+1}_1_dense")(inputs)
    inputs = L.BatchNormalization(name=f"block_{i+1}_1_b_norm")(inputs)
    inputs = L.ReLU(name=f"block_{i+1}_1_relu")(inputs)
    inputs = L.Dropout(dropout_rate, name=f"block_{i+1}_1_dropout")(inputs)
    x = L.Dense(units, name=f"block_{i+1}_2_dense")(inputs)
    x = L.BatchNormalization(name=f"block_{i+1}_2_b_norm")(x)
    x = L.ReLU(name=f"block_{i+1}_2_relu")(x)
    x = L.Dropout(dropout_rate, name=f"block_{i+1}_2_dropout")(x)
    return L.Add(name=f"skip_{i+1}")([inputs, x])


class TabularResNet:
    @classmethod
    def from_config(cls, data_config, model_config, name):
        inputs = get_inputs(data_config)
        features = encode_inputs(
            inputs,
            data_config,
            use_embeddings=True,
            embedding_dim=model_config.EMBEDDING_DIM,
            prefix="",
            concat_features=True,
        )
        for i, units in enumerate(model_config.HIDDEN_UNITS):
            features = skip_connection(features, units, i, model_config.DROPOUT_RATE)
        outputs = L.Dense(
            units=model_config.NUM_OUT,
            activation=model_config.OUT_ACTIVATION,
            name="outputs",
        )(features)
        model = keras.Model(inputs=inputs, outputs=outputs, name=name)
        return model

data_config = DataConfig(
    numeric_feature_names=X.columns.tolist(), categorical_features_with_vocabulary={}
)
model_config = TabularResNetConfig(num_outputs=len(y.columns), out_activation='softmax', hidden_units=[128, 64, 32])

blank_model = TabularResNet.from_config(data_config, model_config, name='tab_resnet')
blank_model.summary()

MAX_EPOCHS  = 50

get_callbacks = lambda : [
    keras.callbacks.EarlyStopping(min_delta=1e-4, patience=3, verbose=1, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(patience=2, verbose=1)
]

preds = []

for fold, (train_index, valid_index) in enumerate(skf.split(X, data['target'])):
    X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]

    scaler = StandardScaler().fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train), columns=X.columns)
    X_valid = pd.DataFrame(scaler.transform(X_valid), columns=X.columns)
    x_test = pd.DataFrame(scaler.transform(X_test), columns=X.columns)

    data_train = DataLoader.from_df(X_train, y_train, batch_size=512)
    data_valid = DataLoader.from_df(X_valid, y_valid, batch_size=512)
    data_test = DataLoader.from_df(x_test, batch_size=512)

    model = TabularResNet.from_config(data_config, model_config, name=f'tab_resnet_fold_{fold}')
    model.compile(
        loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy']
    )
    model.fit(
        data_train, validation_data=data_valid, callbacks=get_callbacks(),
        epochs=MAX_EPOCHS
    )
    preds.append(model.predict(data_test))

submissions = pd.read_csv('./data/sample_submission.csv')
submissions['target'] = pd.DataFrame(
    np.array([arr for arr in preds]).mean(axis=0),columns=y.columns
).idxmax(axis=1).values.tolist()
submissions.to_csv('preds.csv', index=False)