# Feature Tokenizer Transformer
Featured in the paper [Revisiting Deep Learning Models for Tabular Data (2021, June)](https://arxiv.org/abs/2106.11959) Feature Tokenizer Transformer is a simple adaptation of the Transformer architecture for the tabular domain. In a nutshell, Feature Tokenizer Transformer transforms all features (categorical and numerical) to embeddings and applies a stack of Transformer layers to the embeddings. Thus, every Transformer layer operates on the feature level of one object.

In this notebook we will be implementing Feature Tokenizer Transformer using TensorFlow 2 from scratch.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers as L
from tensorflow_addons.activations import sparsemax
from tensorflow.data import Dataset
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold
import joblib

pd.options.display.max_columns = 300

# Data
Loading the train and test csv files into `pandas.DataFrame` and splitting the columns as features and target.

We will be using Stratified K folds as our local cross validation.

In [2]:
data = pd.read_csv('../input/tabular-playground-series-feb-2022/train.csv')
data = data.drop_duplicates(subset=data.columns[1:]).reset_index(drop=True)
print(data.shape)
data.head()

(123993, 288)


Unnamed: 0,row_id,A0T0G0C10,A0T0G1C9,A0T0G2C8,A0T0G3C7,A0T0G4C6,A0T0G5C5,A0T0G6C4,A0T0G7C3,A0T0G8C2,A0T0G9C1,A0T0G10C0,A0T1G0C9,A0T1G1C8,A0T1G2C7,A0T1G3C6,A0T1G4C5,A0T1G5C4,A0T1G6C3,A0T1G7C2,A0T1G8C1,A0T1G9C0,A0T2G0C8,A0T2G1C7,A0T2G2C6,A0T2G3C5,A0T2G4C4,A0T2G5C3,A0T2G6C2,A0T2G7C1,A0T2G8C0,A0T3G0C7,A0T3G1C6,A0T3G2C5,A0T3G3C4,A0T3G4C3,A0T3G5C2,A0T3G6C1,A0T3G7C0,A0T4G0C6,A0T4G1C5,A0T4G2C4,A0T4G3C3,A0T4G4C2,A0T4G5C1,A0T4G6C0,A0T5G0C5,A0T5G1C4,A0T5G2C3,A0T5G3C2,A0T5G4C1,A0T5G5C0,A0T6G0C4,A0T6G1C3,A0T6G2C2,A0T6G3C1,A0T6G4C0,A0T7G0C3,A0T7G1C2,A0T7G2C1,A0T7G3C0,A0T8G0C2,A0T8G1C1,A0T8G2C0,A0T9G0C1,A0T9G1C0,A0T10G0C0,A1T0G0C9,A1T0G1C8,A1T0G2C7,A1T0G3C6,A1T0G4C5,A1T0G5C4,A1T0G6C3,A1T0G7C2,A1T0G8C1,A1T0G9C0,A1T1G0C8,A1T1G1C7,A1T1G2C6,A1T1G3C5,A1T1G4C4,A1T1G5C3,A1T1G6C2,A1T1G7C1,A1T1G8C0,A1T2G0C7,A1T2G1C6,A1T2G2C5,A1T2G3C4,A1T2G4C3,A1T2G5C2,A1T2G6C1,A1T2G7C0,A1T3G0C6,A1T3G1C5,A1T3G2C4,A1T3G3C3,A1T3G4C2,A1T3G5C1,A1T3G6C0,A1T4G0C5,A1T4G1C4,A1T4G2C3,A1T4G3C2,A1T4G4C1,A1T4G5C0,A1T5G0C4,A1T5G1C3,A1T5G2C2,A1T5G3C1,A1T5G4C0,A1T6G0C3,A1T6G1C2,A1T6G2C1,A1T6G3C0,A1T7G0C2,A1T7G1C1,A1T7G2C0,A1T8G0C1,A1T8G1C0,A1T9G0C0,A2T0G0C8,A2T0G1C7,A2T0G2C6,A2T0G3C5,A2T0G4C4,A2T0G5C3,A2T0G6C2,A2T0G7C1,A2T0G8C0,A2T1G0C7,A2T1G1C6,A2T1G2C5,A2T1G3C4,A2T1G4C3,A2T1G5C2,A2T1G6C1,A2T1G7C0,A2T2G0C6,A2T2G1C5,A2T2G2C4,A2T2G3C3,A2T2G4C2,A2T2G5C1,A2T2G6C0,A2T3G0C5,A2T3G1C4,A2T3G2C3,A2T3G3C2,A2T3G4C1,A2T3G5C0,A2T4G0C4,A2T4G1C3,A2T4G2C2,A2T4G3C1,A2T4G4C0,A2T5G0C3,A2T5G1C2,A2T5G2C1,A2T5G3C0,A2T6G0C2,A2T6G1C1,A2T6G2C0,A2T7G0C1,A2T7G1C0,A2T8G0C0,A3T0G0C7,A3T0G1C6,A3T0G2C5,A3T0G3C4,A3T0G4C3,A3T0G5C2,A3T0G6C1,A3T0G7C0,A3T1G0C6,A3T1G1C5,A3T1G2C4,A3T1G3C3,A3T1G4C2,A3T1G5C1,A3T1G6C0,A3T2G0C5,A3T2G1C4,A3T2G2C3,A3T2G3C2,A3T2G4C1,A3T2G5C0,A3T3G0C4,A3T3G1C3,A3T3G2C2,A3T3G3C1,A3T3G4C0,A3T4G0C3,A3T4G1C2,A3T4G2C1,A3T4G3C0,A3T5G0C2,A3T5G1C1,A3T5G2C0,A3T6G0C1,A3T6G1C0,A3T7G0C0,A4T0G0C6,A4T0G1C5,A4T0G2C4,A4T0G3C3,A4T0G4C2,A4T0G5C1,A4T0G6C0,A4T1G0C5,A4T1G1C4,A4T1G2C3,A4T1G3C2,A4T1G4C1,A4T1G5C0,A4T2G0C4,A4T2G1C3,A4T2G2C2,A4T2G3C1,A4T2G4C0,A4T3G0C3,A4T3G1C2,A4T3G2C1,A4T3G3C0,A4T4G0C2,A4T4G1C1,A4T4G2C0,A4T5G0C1,A4T5G1C0,A4T6G0C0,A5T0G0C5,A5T0G1C4,A5T0G2C3,A5T0G3C2,A5T0G4C1,A5T0G5C0,A5T1G0C4,A5T1G1C3,A5T1G2C2,A5T1G3C1,A5T1G4C0,A5T2G0C3,A5T2G1C2,A5T2G2C1,A5T2G3C0,A5T3G0C2,A5T3G1C1,A5T3G2C0,A5T4G0C1,A5T4G1C0,A5T5G0C0,A6T0G0C4,A6T0G1C3,A6T0G2C2,A6T0G3C1,A6T0G4C0,A6T1G0C3,A6T1G1C2,A6T1G2C1,A6T1G3C0,A6T2G0C2,A6T2G1C1,A6T2G2C0,A6T3G0C1,A6T3G1C0,A6T4G0C0,A7T0G0C3,A7T0G1C2,A7T0G2C1,A7T0G3C0,A7T1G0C2,A7T1G1C1,A7T1G2C0,A7T2G0C1,A7T2G1C0,A7T3G0C0,A8T0G0C2,A8T0G1C1,A8T0G2C0,A8T1G0C1,A8T1G1C0,A8T2G0C0,A9T0G0C1,A9T0G1C0,A9T1G0C0,A10T0G0C0,target
0,0,-9.536743e-07,-1e-05,-4.3e-05,-0.000114,-0.0002,-0.00024,-0.0002,-0.000114,-4.3e-05,-1e-05,-9.536743e-07,-1e-05,-8.583069e-05,-0.000343,-0.000801,-0.001202,-0.001202,-0.000801,-0.000343,-8.6e-05,-1e-05,-4.291534e-05,-0.000343,-0.001202,-0.002403,-0.003004,-0.002403,-0.001202,-0.000343,-4.3e-05,-0.000114,-0.000801,-0.002403,-0.004005,-0.004005,-0.002403,-0.000801,-0.000114,-0.0002,-0.001202,-0.003004,-0.004005,-0.003004,-0.001202,-0.0002,-0.00024,-0.001202,-0.002403,-0.002403,-0.001202,-0.00024,-0.0002,-0.000801,-0.001202,-0.000801,-0.0002,-0.000114,-0.000343,-0.000343,-0.000114,-4.3e-05,-8.6e-05,-4.3e-05,-1e-05,-1e-05,-9.536743e-07,-9.536743e-06,-8.6e-05,-0.000343,-0.000801,-0.001202,-0.001202,-0.000801,-0.000343,-8.6e-05,-1e-05,-8.6e-05,-0.000687,-0.002403,-0.004807,-0.006008,-0.004807,-0.002403,-0.000687,-8.6e-05,-0.000343,-0.002403,-0.00721,-0.002016,-0.012016,-0.00721,0.017597,0.009657,-0.000801,-0.004807,0.007984,-0.006022,0.007984,-0.004807,-0.000801,-0.001202,0.003992,-0.012016,-0.012016,0.003992,0.008798,-0.001202,-0.004807,0.00279,-0.004807,-0.001202,-0.000801,0.007597,0.007597,-0.000801,-0.000343,-0.000687,-0.000343,-8.6e-05,-8.6e-05,-1e-05,-4.3e-05,-0.000343,-0.001202,-0.002403,-0.003004,-0.002403,-0.001202,-0.000343,-4.3e-05,-0.000343,-0.002403,-0.00721,-0.012016,-0.002016,-0.00721,-0.002403,-0.000343,0.008798,-0.00721,0.011976,-0.014033,0.021976,0.01279,-0.001202,-0.002403,-0.012016,-0.014033,-0.004033,-0.002016,-0.002403,-0.003004,0.007984,0.001976,0.017984,0.006996,-0.002403,0.01279,-0.00721,-0.002403,-0.001202,0.007597,0.008798,-0.000343,-0.000343,-4.3e-05,-0.000114,-0.000801,0.007597,0.015995,0.005995,0.007597,-0.000801,-0.000114,-0.000801,0.005193,-0.012016,0.023978,-0.012016,0.005193,-0.000801,-0.002403,-0.002016,-0.004033,-0.014033,-0.002016,0.007597,-0.004005,0.003978,0.005967,-0.006022,-0.004005,-0.004005,-0.002016,0.007984,0.015995,-0.002403,-0.004807,-0.002403,-0.000801,-0.000801,-0.000114,-0.0002,0.008798,0.006996,0.005995,-0.003004,-0.001202,-0.0002,-0.001202,-0.006008,-0.002016,0.007984,0.013992,-0.001202,-0.003004,-0.012016,0.001976,0.007984,-0.003004,-0.004005,-0.002016,0.017984,-0.004005,-0.003004,0.023992,-0.003004,-0.001202,-0.001202,-0.0002,-0.00024,-0.001202,0.017597,-0.002403,-0.001202,-0.00024,-0.001202,-0.004807,-0.00721,0.005193,-0.001202,-0.002403,-0.00721,0.00279,0.007597,0.007597,0.015193,-0.002403,0.008798,-0.001202,0.00976,-0.0002,-0.000801,-0.001202,-0.000801,-0.0002,-0.000801,-0.002403,-0.002403,-0.000801,-0.001202,-0.002403,-0.001202,-0.000801,-0.000801,-0.0002,-0.000114,-0.000343,-0.000343,-0.000114,-0.000343,-0.000687,-0.000343,-0.000343,-0.000343,-0.000114,-4.3e-05,-8.6e-05,-4.3e-05,-8.6e-05,-8.6e-05,-4.3e-05,-1e-05,-1e-05,-1e-05,-9.536743e-07,Streptococcus_pyogenes
1,1,-9.536743e-07,-1e-05,-4.3e-05,0.000886,-0.0002,0.00076,-0.0002,-0.000114,-4.3e-05,-1e-05,-9.536743e-07,-1e-05,-8.583069e-05,-0.000343,0.000199,-0.000202,0.001798,-0.000801,-0.000343,-8.6e-05,-1e-05,-4.291534e-05,-0.000343,0.001798,-0.000403,0.001996,0.003597,-0.001202,-0.000343,-4.3e-05,-0.000114,-0.000801,-0.000403,0.002995,0.002995,0.000597,0.001199,-0.000114,-0.0002,-0.001202,-0.002004,-5e-06,-4e-06,0.000798,-0.0002,-0.00024,0.002798,0.001597,0.000597,-0.000202,-0.00024,-0.0002,0.001199,-0.000202,0.000199,-0.0002,-0.000114,-0.000343,-0.000343,-0.000114,-4.3e-05,-8.6e-05,-4.3e-05,-1e-05,-1e-05,-9.536743e-07,-9.536743e-06,0.000914,-0.000343,0.000199,-0.001202,0.000798,0.001199,0.000657,-8.6e-05,-1e-05,-8.6e-05,-0.000687,0.000597,0.002193,-0.003008,0.003193,0.002597,-0.000687,-8.6e-05,-0.000343,0.000597,0.00079,-0.002016,0.001984,-0.00021,0.002597,-0.000343,0.000199,0.001193,-0.001016,0.000978,-0.003016,-0.000807,0.000199,-0.001202,0.001992,-0.003016,0.000984,0.001992,-0.001202,-0.000202,0.000193,-0.00221,-0.001807,-0.001202,-0.000801,-0.001403,0.001597,-0.000801,-0.000343,0.001313,0.000657,-8.6e-05,-8.6e-05,-1e-05,-4.3e-05,0.000657,-0.001202,0.001597,-0.002004,-0.000403,0.001798,-0.000343,-4.3e-05,-0.000343,0.000597,-0.00021,-0.002016,-1.6e-05,-0.00121,-0.000403,-0.000343,0.000798,-0.00121,0.007976,-0.001033,0.001976,-0.00221,-0.001202,0.000597,-0.005016,-0.001033,-0.002033,-0.001016,-0.002403,-0.001004,-0.005016,0.002976,0.008984,0.000996,-0.002403,-0.00121,-0.00021,0.000597,-0.000202,-0.001403,0.001798,-0.000343,0.000657,-4.3e-05,-0.000114,-0.000801,0.000597,0.001995,0.001995,0.001597,-0.000801,-0.000114,-0.000801,-0.000807,-0.004016,0.003978,-0.003016,0.000193,-0.000801,-0.000403,-0.004016,-0.004033,-3.3e-05,-0.004016,-0.000403,0.001995,-0.002022,0.000967,-0.003022,-5e-06,-0.003005,-0.005016,-0.003016,0.005995,0.000597,0.000193,0.001597,-0.000801,-0.000801,0.000886,-0.0002,0.001798,0.000996,-0.001005,-0.003004,-0.001202,0.0008,-0.000202,-0.003008,-1.6e-05,-1.6e-05,0.005992,-0.000202,-0.002004,-0.002016,0.000976,0.002984,-0.002004,-5e-06,-0.004016,0.001984,-0.003005,-4e-06,0.000992,-0.003004,-0.000202,-0.001202,-0.0002,-0.00024,-0.000202,0.001597,-0.000403,0.003798,-0.00024,-0.001202,0.001193,-0.00421,-0.000807,-0.000202,0.000597,0.00379,-0.00021,0.000597,0.000597,-0.002807,0.000597,0.002798,0.001798,0.00076,-0.0002,-0.000801,-0.000202,-0.000801,0.0008,0.000199,0.003597,0.001597,-0.000801,-0.000202,-0.001403,0.000798,0.001199,0.001199,0.0008,-0.000114,-0.000343,-0.000343,-0.000114,-0.000343,0.001313,0.000657,0.001657,0.001657,0.000886,-4.3e-05,-8.6e-05,-4.3e-05,0.000914,0.000914,-4.3e-05,-1e-05,-1e-05,-1e-05,-9.536743e-07,Salmonella_enterica
2,2,-9.536743e-07,-2e-06,7e-06,0.000129,0.000268,0.00027,0.000243,0.000125,1e-06,-7e-06,4.632568e-08,-4e-06,1.693115e-07,8e-06,0.000366,0.000766,0.000937,0.000632,0.000101,-2.1e-05,-8e-06,8.465576e-08,-8e-05,0.000114,0.00071,0.001307,0.001065,0.000375,-1.7e-05,-1.7e-05,-1.4e-05,-9.4e-05,0.000183,0.000817,0.000968,0.000469,-7e-06,-1.7e-05,-3.1e-05,-4.9e-05,0.000245,0.000437,0.000258,-5e-06,-2.2e-05,-2.5e-05,8.3e-05,0.00033,0.000153,6.3e-05,-2.8e-05,3.1e-05,0.000157,0.000227,0.00014,2e-06,6e-05,0.000191,0.000133,2.6e-05,2.5e-05,0.0001,1.6e-05,4e-06,9e-06,-9.536743e-07,-5.367432e-07,-1.3e-05,0.000119,0.000604,0.000956,0.000892,0.000392,2.8e-05,-1.8e-05,-5e-06,-1e-05,-9.5e-05,0.000279,0.00165,0.002327,0.001573,0.000378,-7.3e-05,-2.6e-05,-0.000105,-0.000468,0.000588,0.001672,0.002073,0.000652,-0.000197,-8.9e-05,-0.000176,-0.000472,0.000311,0.00044,0.000435,-0.000382,-0.000155,-0.000133,-0.000423,7.2e-05,-8.2e-05,-0.000493,-0.000211,4.2e-05,0.000152,0.000131,-0.000143,-0.000164,0.000193,0.000416,0.000304,8.9e-05,0.000179,0.000297,8.4e-05,0.000123,9e-05,2.1e-05,-1e-05,-2.5e-05,0.000365,0.001105,0.001269,0.000726,3.1e-05,-6.3e-05,-7e-06,-8.8e-05,-0.000165,0.000774,0.002056,0.001407,0.000417,-0.000313,-8.4e-05,-0.000273,-0.000725,-0.0001,0.000122,-0.000102,-0.000707,-0.000276,-0.000544,-0.001238,-0.001388,-0.001945,-0.001202,-0.000434,-0.000448,-0.000957,-0.001513,-0.001352,-0.000515,-8.9e-05,-0.000119,-0.000158,-0.000285,0.000272,0.000342,0.000139,0.000213,0.000171,8.6e-05,-2.3e-05,1.8e-05,0.000398,0.00096,0.000709,0.000131,-5.7e-05,-9e-06,-0.000183,-0.000231,0.000335,0.000796,0.000231,-0.000591,-0.000178,-0.000527,-0.001606,-0.001762,-0.001832,-0.001317,-0.000524,-0.000785,-0.001951,-0.002587,-0.001926,-0.000689,-0.00044,-0.001386,-0.001142,-0.000507,-2.6e-05,-3e-05,-9.6e-05,0.000197,0.000142,0.000147,-3e-05,-3.5e-05,0.000282,0.0005,0.000202,-3.2e-05,-5.1e-05,-0.000179,-0.000324,-0.000137,6.9e-05,-0.000161,-0.000171,-0.000413,-0.001122,-0.001422,-0.00116,-0.000436,-0.00061,-0.001299,-0.00115,-0.000605,-0.000149,-0.00049,-0.000183,0.000137,0.000168,0.000137,-2.8e-05,1.1e-05,0.000249,0.000258,9.6e-05,-1.5e-05,-0.000152,-0.000126,0.00025,0.000161,3.9e-05,-0.000221,-0.000326,-0.000205,8e-05,-0.000116,-8.5e-05,-3.1e-05,0.000146,0.000118,0.000156,3e-06,8e-05,0.000219,0.000225,1.7e-05,1.4e-05,0.000319,0.000355,0.000175,3e-05,0.000347,0.000229,0.000203,0.000238,0.00012,2.7e-05,9.7e-05,0.000159,7e-05,0.000138,0.000315,0.000214,0.000178,0.000296,0.000118,4.2e-05,8.4e-05,4.8e-05,8.1e-05,0.000106,7.2e-05,1e-05,8e-06,1.9e-05,1.046326e-06,Salmonella_enterica
3,3,4.632568e-08,-6e-06,1.2e-05,0.000245,0.000492,0.000522,0.000396,0.000197,-3e-06,-7e-06,-9.536743e-07,-1e-05,-1.783069e-05,6.8e-05,0.000722,0.001594,0.001913,0.001111,0.000247,-4.7e-05,-1e-05,-1.891534e-05,-0.000138,0.000161,0.001345,0.002526,0.002155,0.000754,-2.9e-05,-2.5e-05,-3.4e-05,-0.000179,0.000345,0.001427,0.001964,0.000861,-3.8e-05,-5e-05,-6.7e-05,-0.000137,0.000437,0.000987,0.000498,3e-06,-6.2e-05,-2.3e-05,8.9e-05,0.00053,0.000428,5.3e-05,-5e-05,6.7e-05,0.000316,0.00045,0.00018,4e-06,0.000111,0.000339,0.000198,5.6e-05,7.5e-05,0.000142,5.3e-05,2.1e-05,2.2e-05,-9.536743e-07,-8.536743e-06,-3.3e-05,0.000213,0.001123,0.001838,0.001725,0.000723,6.3e-05,-4.9e-05,-9e-06,-4.1e-05,-0.000194,0.000708,0.003112,0.004602,0.003092,0.000662,-0.000147,-5.7e-05,-0.000188,-0.000655,0.000784,0.003456,0.003771,0.001364,-0.000403,-0.000162,-0.000342,-0.000828,0.000437,0.001299,0.000558,-0.000663,-0.000304,-0.000376,-0.000507,-1.3e-05,-0.000186,-0.000787,-0.000353,-1e-05,0.000225,0.000198,-0.000193,-0.000285,0.000302,0.000735,0.000481,8e-05,0.000397,0.000582,0.000212,0.000244,0.000172,4.2e-05,-2e-05,-4e-06,0.000738,0.0021,0.002546,0.001449,0.000121,-8.9e-05,-2e-05,-0.000188,-0.000388,0.001525,0.00362,0.00344,0.000862,-0.000598,-0.00016,-0.000516,-0.001558,-0.00015,0.000439,-0.000553,-0.001339,-0.000573,-0.000909,-0.002356,-0.003248,-0.003088,-0.002446,-0.000983,-0.000763,-0.001795,-0.002776,-0.002388,-0.000972,-0.00012,-0.000187,-0.000591,-0.000435,0.000383,0.000632,0.000177,0.000419,0.000326,0.000148,-2.7e-05,1.2e-05,0.000789,0.001784,0.001499,0.000263,-0.000192,-4.5e-05,-0.000332,-0.000673,0.000544,0.001514,0.000323,-0.000884,-0.000316,-0.000965,-0.00271,-0.003186,-0.003082,-0.002416,-0.000924,-0.001328,-0.004096,-0.005403,-0.003819,-0.001369,-0.001011,-0.00252,-0.002501,-0.001056,-2.2e-05,-0.000134,-0.000242,0.000474,0.000286,0.000271,-5.4e-05,-4.1e-05,0.000548,0.000955,0.000445,-9.1e-05,-5.4e-05,-0.000399,-0.000615,-0.000265,9.5e-05,-0.000466,-0.000318,-0.000949,-0.002322,-0.002791,-0.002042,-0.000733,-0.001078,-0.002585,-0.002304,-0.00094,-0.000372,-0.000835,-0.000421,0.000276,0.00023,0.000274,-5.2e-05,1.7e-05,0.000388,0.000541,0.00018,-4.4e-05,-0.000236,-0.000115,0.000304,0.000259,-4e-05,-0.000423,-0.000729,-0.000319,-1.8e-05,-0.000188,-0.000215,-7.5e-05,0.000284,0.000251,0.000296,-2e-05,0.000194,0.000399,0.000407,4e-05,5.8e-05,0.000505,0.000784,0.000296,0.000128,0.000835,0.000445,0.000401,0.000412,0.000246,4.7e-05,0.00016,0.000309,0.000129,0.000243,0.000597,0.000428,0.000363,0.000472,0.000197,6.8e-05,0.000151,0.0001,0.00018,0.000202,0.000153,2.1e-05,1.5e-05,4.6e-05,-9.536743e-07,Salmonella_enterica
4,4,-9.536743e-07,-1e-05,-4.3e-05,-0.000114,-0.0002,-0.00024,-0.0002,-0.000114,-4.3e-05,-1e-05,-9.536743e-07,-1e-05,-8.583069e-05,-0.000343,-0.000801,-0.001202,-0.001202,-0.000801,-0.000343,-8.6e-05,-1e-05,-4.291534e-05,-0.000343,-0.001202,-0.002403,-0.003004,-0.002403,-0.001202,-0.000343,-4.3e-05,-0.000114,-0.000801,-0.002403,-0.004005,-0.004005,-0.002403,-0.000801,-0.000114,-0.0002,-0.001202,-0.003004,-0.004005,-0.003004,-0.001202,-0.0002,-0.00024,-0.001202,-0.002403,-0.002403,-0.001202,-0.00024,-0.0002,-0.000801,-0.001202,-0.000801,-0.0002,-0.000114,-0.000343,-0.000343,-0.000114,-4.3e-05,-8.6e-05,-4.3e-05,-1e-05,-1e-05,-9.536743e-07,-9.536743e-06,-8.6e-05,-0.000343,-0.000801,-0.001202,-0.001202,-0.000801,-0.000343,-8.6e-05,-1e-05,-8.6e-05,-0.000687,-0.002403,-0.004807,-0.006008,-0.004807,-0.002403,-0.000687,-8.6e-05,-0.000343,-0.002403,-0.00721,-0.012016,-0.002016,-0.00721,-0.002403,-0.000343,-0.000801,-0.004807,-0.012016,-0.016022,-0.012016,-0.004807,-0.000801,-0.001202,-0.006008,-0.012016,-0.012016,-0.006008,-0.001202,-0.001202,0.005193,-0.00721,-0.004807,-0.001202,0.009199,0.007597,-0.002403,-0.000801,-0.000343,-0.000687,-0.000343,-8.6e-05,-8.6e-05,-1e-05,-4.3e-05,-0.000343,-0.001202,-0.002403,-0.003004,-0.002403,-0.001202,-0.000343,-4.3e-05,-0.000343,-0.002403,-0.00721,-0.012016,-0.012016,-0.00721,-0.002403,-0.000343,-0.001202,-0.00721,-0.018024,-0.024033,-0.018024,0.00279,-0.001202,-0.002403,-0.012016,-0.004033,-0.024033,-0.002016,-0.002403,-0.003004,-0.002016,0.001976,-0.012016,0.006996,0.007597,-0.00721,0.01279,-0.002403,0.028798,0.017597,0.008798,-0.000343,-0.000343,-4.3e-05,-0.000114,-0.000801,-0.002403,0.015995,0.005995,0.007597,-0.000801,-0.000114,-0.000801,0.015193,-0.012016,0.033978,-0.012016,0.005193,-0.000801,-0.002403,-0.002016,0.015967,-0.014033,-0.002016,0.007597,-0.004005,0.003978,0.025967,-0.006022,-0.004005,-0.004005,-0.012016,0.017984,0.015995,0.007597,-0.004807,0.007597,-0.000801,-0.000801,0.009886,-0.0002,0.008798,0.006996,0.005995,-0.003004,-0.001202,-0.0002,-0.001202,-0.006008,0.007984,0.007984,0.013992,-0.001202,-0.003004,-0.002016,0.001976,0.007984,-0.003004,-0.004005,0.007984,0.037984,-0.004005,-0.003004,0.013992,0.006996,-0.001202,0.008798,-0.0002,-0.00024,-0.001202,0.017597,-0.002403,-0.001202,-0.00024,-0.001202,-0.004807,0.00279,0.015193,-0.001202,0.007597,-0.00721,0.01279,0.007597,0.007597,0.015193,-0.002403,0.008798,-0.001202,0.00976,-0.0002,-0.000801,0.008798,-0.000801,-0.0002,-0.000801,-0.002403,-0.002403,-0.000801,-0.001202,-0.002403,0.008798,-0.000801,-0.000801,-0.0002,-0.000114,-0.000343,-0.000343,-0.000114,-0.000343,0.009313,-0.000343,-0.000343,0.009657,-0.000114,-4.3e-05,-8.6e-05,-4.3e-05,-8.6e-05,-8.6e-05,-4.3e-05,-1e-05,-1e-05,-1e-05,-9.536743e-07,Enterococcus_hirae


In [3]:
test = pd.read_csv('../input/tabular-playground-series-feb-2022/test.csv')
print(test.shape)
X_test = test.drop(['row_id'], axis=1)

(100000, 287)


In [4]:
X = data.drop(['row_id', 'target'], axis=1)
y = pd.get_dummies(data['target'])

In [5]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

# Model
**Creating some data utility classes**:

`DataConfig` helps to segregate the features into numeric features and categorical features and maintain a vocabulary for the categorical ones.

`DataLoader` class creates `tf.data.Dataset` objects from `pandas.DataFrame` to ensure efficiency in the input pipeline to the model.

In [6]:
class DataConfig:
    def __init__(self, numeric_feature_names, categorical_features_with_vocabulary):
        self.NUMERIC_FEATURE_NAMES = numeric_feature_names
        self.CATEGORICAL_FEATURES_WITH_VOCABULARY = categorical_features_with_vocabulary
        self.CATEGORICAL_FEATURE_NAMES = list(self.CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())
        self.FEATURE_NAMES = self.NUMERIC_FEATURE_NAMES + self.CATEGORICAL_FEATURE_NAMES
        
class DataLoader:
    @classmethod
    def from_df(cls, X, y=None, batch_size=1024):
        return (
            Dataset.from_tensor_slices(({col: X[col].values.tolist() for col in X.columns}, y.values.tolist())).batch(
                batch_size
            )
            if y is not None
            else Dataset.from_tensor_slices({col: X[col].values.tolist() for col in X.columns}).batch(batch_size)
        )        

**Creating Input Layers and Feature Encoding Layers**

`get_inputs` returns a dictionary of Input Layers based on the data types of the feature columns mentioned in the `DataConfig` object.

`encode_inputs` applies StringLookup and Embedding Layer to the categorical features and Reshapes the Numeric Features in order to encode the inputs.

In [7]:
def get_inputs(config):
    return {
        feature_name: L.Input(
            name=feature_name,
            shape=(),
            dtype=(tf.float32 if feature_name in config.NUMERIC_FEATURE_NAMES else tf.string),
        )
        for feature_name in config.FEATURE_NAMES
    }

def encode_inputs(inputs, config, use_embeddings=False, embedding_dim=32, prefix="", concat_features=False):
    cat_features = []
    num_features = []
    for feature_name in inputs:
        if feature_name in config.CATEGORICAL_FEATURE_NAMES:
            vocabulary = config.CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]
            lookup = L.StringLookup(
                vocabulary=vocabulary,
                mask_token=None,
                num_oov_indices=0,
                output_mode="int" if use_embeddings else "binary",
                name=f"{prefix}{feature_name}_lookup",
            )
            if use_embeddings:
                encoded_feature = lookup(inputs[feature_name])
                embedding = L.Embedding(
                    input_dim=len(vocabulary),
                    output_dim=embedding_dim,
                    name=f"{prefix}{feature_name}_embeddings",
                )
                encoded_feature = embedding(encoded_feature)
            else:
                encoded_feature = lookup(
                    L.Reshape((1,), name=f"{prefix}{feature_name}_reshape")(inputs[feature_name])
                )
            cat_features.append(encoded_feature)
        else:
            encoded_feature = L.Reshape((1,), name=f"{prefix}{feature_name}_reshape")(inputs[feature_name])
            num_features.append(encoded_feature)

    features = (
        L.Concatenate(name=f"{prefix}inputs_concatenate")(cat_features + num_features)
        if concat_features
        else (cat_features, num_features)
    )

    return features

**Defining Model Configurations**
* Number of Outputs
* Activation of the Output Layer
* Number of Transformer Blocks
* Number of heads in the Transformer Blocks
* Embedding Dimension for the features
* Dimesion of the Dense Projections in the transfomer blocks

In [8]:
class FeatureTokenizerTransformerConfig:
    def __init__(
        self,
        num_outputs,
        out_activation,
        num_transformer_blocks=2,
        num_heads=8,
        embedding_dim=32,
        dense_dim=16,
    ):
        self.NUM_OUT = num_outputs
        self.OUT_ACTIVATION = out_activation
        self.NUM_TRANSFORMER_BLOCKS = num_transformer_blocks
        self.NUM_HEADS = num_heads
        self.EMBEDDING_DIM = embedding_dim
        self.DENSE_DIM = dense_dim

**Defining a standard Transformer Block**

In [9]:
class TransformerBlock(L.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention = L.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = tf.keras.Sequential([L.Dense(dense_dim, activation="relu"), L.Dense(embed_dim)])
        self.layernorm1 = L.LayerNormalization()
        self.layernorm2 = L.LayerNormalization()

    def call(self, inputs, mask=None):
        if mask is not None:
            mask = mask[: tf.newaxis, :]
        attention_output = self.attention(inputs, inputs, attention_mask=mask)
        proj_input = self.layernorm1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm2(proj_input + proj_output)

**Defining the Model**
The model takes Inputs Layers and then encodes the features from the functions defined above, the numerical features are then passed through a Dense layer of the same dimensions as the embeddings of the categorical features.

All the feature embeddings are then stacked and then passed through a series of Transformer Blocks followed by the Global Average Pooling and Final Output Layer

In [10]:
class FeatureTokenizerTransformer:
    @classmethod
    def from_config(cls, data_config, model_config, name):
        inputs = get_inputs(data_config)
        cat_features, num_features = encode_inputs(
            inputs,
            data_config,
            use_embeddings=True,
            embedding_dim=model_config.EMBEDDING_DIM,
            prefix="",
            concat_features=False,
        )
        num_feat_emb = [
            L.Dense(model_config.EMBEDDING_DIM, name=f"{feature_name}_embeddings")
            for _, feature_name in zip(range(len(num_features)), data_config.NUMERIC_FEATURE_NAMES)
        ]
        num_features = [emb(feat) for emb, feat in zip(num_feat_emb, num_features)]

        features = L.Concatenate(axis=1, name="feature_embeddings_stack")(
            [
                L.Reshape((1, 32), name=f"{feat_name}_reshape_2")(feat)
                for feat, feat_name in zip((num_features + cat_features), data_config.FEATURE_NAMES)
            ]
        )

        for _ in range(model_config.NUM_TRANSFORMER_BLOCKS):
            features = TransformerBlock(
                embed_dim=model_config.EMBEDDING_DIM,
                dense_dim=model_config.DENSE_DIM,
                num_heads=model_config.NUM_HEADS,
            )(features)
        features = L.GlobalMaxPooling1D()(features)
        outputs = L.Dense(
            units=model_config.NUM_OUT,
            activation=model_config.OUT_ACTIVATION,
            name="outputs",
        )(features)
        model = keras.Model(inputs=inputs, outputs=outputs, name=name)
        return model

**Creating instances of the various classes defined so far**

In [11]:
data_config = DataConfig(
    numeric_feature_names=X.columns.tolist(), categorical_features_with_vocabulary={}
)
model_config = FeatureTokenizerTransformerConfig(num_outputs=len(y.columns), out_activation='softmax')

In [12]:
blank_model = FeatureTokenizerTransformer.from_config(data_config, model_config, name='ftt')
blank_model.summary()

2022-03-31 14:05:31.911963: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-31 14:05:32.048224: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-31 14:05:32.049350: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-31 14:05:32.051022: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Model: "ftt"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
A0T0G0C10 (InputLayer)          [(None,)]            0                                            
__________________________________________________________________________________________________
A0T0G1C9 (InputLayer)           [(None,)]            0                                            
__________________________________________________________________________________________________
A0T0G2C8 (InputLayer)           [(None,)]            0                                            
__________________________________________________________________________________________________
A0T0G3C7 (InputLayer)           [(None,)]            0                                            
________________________________________________________________________________________________

In [13]:
MAX_EPOCHS  = 50

get_callbacks = lambda : [
    keras.callbacks.EarlyStopping(min_delta=1e-4, patience=3, verbose=1, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(patience=2, verbose=1)
]

# Training Loop

In [14]:
preds = []

for fold, (train_index, valid_index) in enumerate(skf.split(X, data['target'])):
    X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
    
    scaler = StandardScaler().fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train), columns=X.columns)
    X_valid = pd.DataFrame(scaler.transform(X_valid), columns=X.columns)
    x_test = pd.DataFrame(scaler.transform(X_test), columns=X.columns)
    
    data_train = DataLoader.from_df(X_train, y_train, batch_size=512)
    data_valid = DataLoader.from_df(X_valid, y_valid, batch_size=512)
    data_test = DataLoader.from_df(x_test, batch_size=512)
    
    model = FeatureTokenizerTransformer.from_config(data_config, model_config, name=f'ftt_fold_{fold}')
    model.compile(
        loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy']
    )
    model.fit(
        data_train, validation_data=data_valid, callbacks=get_callbacks(), 
        epochs=MAX_EPOCHS
    )  
    preds.append(model.predict(data_test))

Epoch 1/50


2022-03-31 14:10:37.033886: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-03-31 14:10:43.890460: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50

Epoch 00026: ReduceLROnPlateau red

# Submission

In [15]:
submissions = pd.read_csv('../input/tabular-playground-series-feb-2022/sample_submission.csv')
submissions['target'] = pd.DataFrame(
    np.array([arr for arr in preds]).mean(axis=0),columns=y.columns
).idxmax(axis=1).values.tolist()
submissions.to_csv('preds.csv', index=False)