In [1]:
from sklearn.datasets import make_classification

In [3]:
dataset = make_classification(
    n_samples=10_000,
    n_features=25,
    n_informative=10,
    n_classes=2
)
x,y = dataset

In [5]:
import pandas as pd
import numpy as np

np.set_printoptions(precision=2)

In [7]:
num_categories = 100
for i in range(5):
    x[:, i] = pd.cut(x[:, i], num_categories, labels=False)

In [8]:
x[0]

array([ 5.00e+01,  6.10e+01,  8.40e+01,  2.40e+01,  2.10e+01, -2.49e+00,
       -2.79e-01,  2.17e+00, -3.59e-01, -5.50e-01,  3.61e+00, -4.01e-01,
        1.92e+00,  2.76e+00, -4.46e-02, -6.29e-01, -4.53e-01,  9.99e-01,
       -6.61e+00,  4.66e+00, -7.02e-01,  2.29e-01, -3.84e-01, -1.09e+00,
        2.78e+00])

In [14]:
x_numeric = x[:, 5:]
x_cat = x[:, :5]

In [15]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
 
x_s = ss.fit_transform(x_numeric)

In [16]:
import tensorflow as tf

p = .1

numerics_inputs = tf.keras.layers.Input((20,), name='numeric_inputs')
cat_inputs = tf.keras.layers.Input((5,), name='cat_inputs')

In [17]:
def emb_sz_rule(n_cat):
    return min(600, round(1.6 * n_cat**0.56))

embedding_layer = tf.keras.layers.Embedding(
    num_categories,
    emb_sz_rule(num_categories),
    input_length=5
)
cat_inputs = embedding_layer(cat_inputs)
cat_inputs = tf.keras.layers.Flatten()(cat_inputs)

In [18]:
x = tf.keras.layers.Concatenate()([cat_inputs, numerics_inputs])

x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Dense(100, activation='relu')(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Dense(20, activation='relu')(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Dense(10, activation='relu')(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Dense(1, activation='sigmoid', name='output')(x)

In [20]:
model = tf.keras.models.Model(
    inputs=[numerics_inputs, cat_inputs], outputs=x
)
model.compile(optimizer='rmsprop', loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy'])

In [21]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 105)]        0           []                               
                                                                                                  
 numeric_inputs (InputLayer)    [(None, 20)]         0           []                               
                                                                                                  
 concatenate (Concatenate)      (None, 125)          0           ['input_1[0][0]',                
                                                                  'numeric_inputs[0][0]']         
                                                                                                  
 dropout (Dropout)              (None, 125)          0           ['concatenate[1][0]']        