### Handling Mixed Data Types

Import our Libraries etc.

In [62]:
from types import SimpleNamespace
import pandas as pd
import numpy as np
np.set_printoptions(precision=3, suppress=True)
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras import backend

Import data and setup a namespace for our dataset

In [None]:
model_name = 'heart_health'           
hh = pd.read_csv('./input/Heart_health.csv')

# tensorflow is a bit more strict with column strings now
hh.columns = [c.replace("/","_") for c in list(hh.columns)]

# prefer namespace over extra top level variables
dset = SimpleNamespace(feats=None,labels=None,tdict=None)
dset.feats = hh.copy()
dset.labels = dset.feats.pop("Heart Attack")

To build the preprocessing model, start by building a set of symbolic tf.keras.Input objects, matching the names and data-types of the CSV columns.

In [None]:
def as_keras_inputs(*,items):
    inputs = {}
    for name,column in items:
        if column.dtype == object:
            inputs[name] = tf.keras.Input(shape=(1,),name=name,dtype=tf.string)
        else:
            inputs[name] = tf.keras.Input(shape=(1,),name=name,dtype=tf.float32)    
    
    return inputs

ktinputs = as_keras_inputs(items=dset.feats.items())

The first step in your preprocessing logic is to concatenate the numeric inputs together, and run them through a normalization layer:

In [None]:
def normalize_numeric_inputs(*,inputs,dataframe):
    normalizer = layers.Normalization()
    
    all_numeric_inputs = {name:input for name,input in inputs.items() if input.dtype==tf.float32}
    x = layers.Concatenate()(list(all_numeric_inputs.values()))
    normalizer.adapt(np.array(dataframe[all_numeric_inputs.keys()]))
    
    return normalizer(x)

ppinputs = [normalize_numeric_inputs(inputs=ktinputs,dataframe=hh)]

Now we need to convert our strings into float32 appropriate for the model

In [None]:
def strings_to_float32(*,inputs,feats):
    for name, input in inputs.items():
        if input.dtype == tf.float32:
            continue

        # For the string inputs use the tf.keras.layers.StringLookup function 
        # to map from strings to integer indices in a vocabulary. 
        lookup = layers.StringLookup(vocabulary=np.unique(feats[name]))

        # Next, use tf.keras.layers.CategoryEncoding to convert the indexes 
        # into float32 data appropriate for the model.
        # The default settings for the tf.keras.layers.CategoryEncoding layer 
        # create a one-hot vector for each input.
        one_hot_vector = layers.CategoryEncoding(num_tokens=lookup.vocabulary_size())
        
        return one_hot_vector(lookup(input))

ppinputs.append(strings_to_float32(inputs=ktinputs,feats=dset.feats))

With the collection of inputs and preprocessed_inputs, you can concatenate all the preprocessed inputs together, and build a model that handles the preprocessing:

In [None]:
preprocessed_inputs = layers.Concatenate()(ppinputs)
dset_preprocessing_model = tf.keras.Model(inputs=ktinputs,outputs=preprocessed_inputs)

Keras models don't automatically convert pandas DataFrames because it's not clear if it should be converted to one tensor or to a dictionary of tensors. 
So, convert it to a dictionary of tensors:

In [None]:
dset.tdict = {name:np.array(value) for name, value in dset.feats.items()}

Now build a model to train based on a sequential model from our 
preprocessing model and our keras inputs based on the panda dataframe

In [80]:
def build_model(*,preprocessing_head,inputs):
    seq_model = tf.keras.Sequential([
        layers.Dense(64),
        layers.Dense(1)
    ])
    result = seq_model(preprocessing_head(inputs))
    model = tf.keras.Model(inputs=inputs,outputs=result,name=model_name)
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  optimizer = tf.keras.optimizers.Adam())
    return model
    
dset_model = build_model(preprocessing_head=dset_preprocessing_model,inputs=ktinputs)
dset_model.fit(
    x=dset.tdict,
    y=dset.labels,
    epochs=10)

dset_model.save(f'{dset_model.name}_test.keras')

Epoch 1/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 573us/step - loss: 0.6481
Epoch 2/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 580us/step - loss: 0.2762
Epoch 3/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 538us/step - loss: 0.1563
Epoch 4/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 568us/step - loss: 0.1005
Epoch 5/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 472us/step - loss: 0.0659
Epoch 6/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 463us/step - loss: 0.0500
Epoch 7/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 485us/step - loss: 0.0436
Epoch 8/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 425us/step - loss: 0.0303
Epoch 9/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 457us/step - loss: 0.0245
Epoch 10/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 461us/step - lo