<a href="https://colab.research.google.com/github/Jihaad2021/Paractical_Keras/blob/main/Structured_Data/01_Structured_datas_classification_with_FeatureSpace.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
# setup 
import pandas as pd 
import tensorflow as tf 
from tensorflow import keras

In [17]:
# preparing the data 
file_url = "http://storage.googleapis.com/download.tensorflow.org/data/heart.csv"
dataframe = pd.read_csv(file_url)

In [18]:
print(dataframe.shape)

(303, 14)


In [19]:
dataframe.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


In [20]:
# creat train and validation data
val_dataframe = dataframe.sample(frac=0.2, random_state=1337) 
train_dataframe = dataframe.drop(val_dataframe.index) 

print(
    "Using %d samples for training and %d for validation"
    % (len(train_dataframe), len(val_dataframe))
)

Using 242 samples for training and 61 for validation


In [21]:
# convert data to tensor file
def dataframe_to_dataset(dataframe): 
  dataframe = dataframe.copy()
  labels = dataframe.pop("target")
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels)) 
  ds = ds.shuffle(buffer_size=len(dataframe))
  return ds 

train_ds = dataframe_to_dataset(train_dataframe)
val_ds = dataframe_to_dataset(val_dataframe)

In [22]:
for x,y in train_ds.take(1):
  print("Input:", x)
  print("Target:", y)

Input: {'age': <tf.Tensor: shape=(), dtype=int64, numpy=58>, 'sex': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'cp': <tf.Tensor: shape=(), dtype=int64, numpy=4>, 'trestbps': <tf.Tensor: shape=(), dtype=int64, numpy=100>, 'chol': <tf.Tensor: shape=(), dtype=int64, numpy=234>, 'fbs': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'restecg': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'thalach': <tf.Tensor: shape=(), dtype=int64, numpy=156>, 'exang': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'oldpeak': <tf.Tensor: shape=(), dtype=float64, numpy=0.1>, 'slope': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'ca': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'thal': <tf.Tensor: shape=(), dtype=string, numpy=b'reversible'>}
Target: tf.Tensor(1, shape=(), dtype=int64)


In [23]:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

In [24]:
# confuguring a FeatureSpace 

from keras.utils import FeatureSpace 

feature_space = FeatureSpace(
    features={
        #categorical features encoded as integers 
        "sex":"integer_categorical",
        "cp":"integer_categorical",
        "fbs":"integer_categorical",
        "restecg":"integer_categorical",
        "exang":"integer_categorical",
        "ca":"integer_categorical",
        # categorical features encode as string
        "thal":"string_categorical",
        # numerical features to discrete 
        "age":"float_discretized",
        # numerical features ti normalize 
        "trestbps":"float_normalized",
        "chol":"float_normalized",
        "oldpeak":"float_normalized",
        "slope":"float_normalized",
    },
    crosses = [("sex", "age"), ("thal", "ca")],
    crossing_dim=32,
    output_mode="concat",
)

In [25]:
# further customizing a featurespace 
feature_space = FeatureSpace(
    features={
        # Categorical features encoded as integers
        "sex": FeatureSpace.integer_categorical(num_oov_indices=0),
        "cp": FeatureSpace.integer_categorical(num_oov_indices=0),
        "fbs": FeatureSpace.integer_categorical(num_oov_indices=0),
        "restecg": FeatureSpace.integer_categorical(num_oov_indices=0),
        "exang": FeatureSpace.integer_categorical(num_oov_indices=0),
        "ca": FeatureSpace.integer_categorical(num_oov_indices=0),
        # Categorical feature encoded as string
        "thal": FeatureSpace.string_categorical(num_oov_indices=0),
        # Numerical features to normalize
        "age": FeatureSpace.float_discretized(num_bins=30),
        # Numerical features to normalize
        "trestbps": FeatureSpace.float_normalized(),
        "chol": FeatureSpace.float_normalized(),
        "thalach": FeatureSpace.float_normalized(),
        "oldpeak": FeatureSpace.float_normalized(),
        "slope": FeatureSpace.float_normalized(),
    },
    # Specify feature cross with a custom crossing dim.
    crosses=[
        FeatureSpace.cross(feature_names=("sex", "age"), crossing_dim=64),
        FeatureSpace.cross(
            feature_names=("thal", "ca"),
            crossing_dim=16,
        ),
    ],
    output_mode="concat",
)


In [26]:
# adapt the featurespace to the training data 
train_ds_with_no_labels = train_ds.map(lambda x, _: x) 
feature_space.adapt(train_ds_with_no_labels)

In [27]:
for x, _ in train_ds.take(1):
  preprocessed_x = feature_space(x) 
  print("preprocessed_x.shape:", preprocessed_x.shape)
  print("preprocessed_x.dtype:", preprocessed_x.dtype)
  

preprocessed_x.shape: (32, 138)
preprocessed_x.dtype: <dtype: 'float32'>


In [29]:
preprocessed_train_ds = train_ds.map(
    lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE
)
preprocessed_train_ds = preprocessed_train_ds.prefetch(tf.data.AUTOTUNE) 


preprocessed_val_ds = val_ds.map(
    lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE
)
preprocessed_val_ds = preprocessed_val_ds.prefetch(tf.data.AUTOTUNE) 


In [30]:
# Build a model 

dict_inputs = feature_space.get_inputs()
encoded_features = feature_space.get_encoded_features() 

x = keras.layers.Dense(32, activation="relu")(encoded_features)
x = keras.layers.Dropout(0.5)(x) 
predictions = keras.layers.Dense(1, activation="sigmoid")(x)

training_model = keras.Model(inputs=encoded_features, outputs=predictions) 
training_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

inference_model = keras.Model(inputs=dict_inputs, outputs=predictions)

In [31]:
# train the model 
training_model.fit(preprocessed_train_ds, epochs=20, validation_data=preprocessed_val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f188cf4dba0>

In [32]:
# inference on new data with end-to-end model 

sample = {
    "age": 60,
    "sex": 1,
    "cp": 1,
    "trestbps": 145,
    "chol": 233,
    "fbs": 1,
    "restecg": 2,
    "thalach": 150,
    "exang": 0,
    "oldpeak": 2.3,
    "slope": 3,
    "ca": 0,
    "thal": "fixed",
}

input_dict = {name: tf.convert_to_tensor([value]) for name, value in sample.items()}
predictions = inference_model.predict(input_dict)

print(
    f"This particular patient had a {100 * predictions[0][0]:.2f}% probability "
    "of having a heart disease, as evaluated by our model."
)

This particular patient had a 50.55% probability of having a heart disease, as evaluated by our model.
