In [133]:
import tensorflow as tf
import pandas as pd
from tensorflow import keras
from sklearn.metrics import classification_report
from tensorflow.keras.utils import FeatureSpace
import copy
import numpy as np

# Load dataset.
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') # training data
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') # testing data
# y_train = dftrain.pop('survived')
# y_eval = dfeval.pop('survived')



In [134]:
test_df = dfeval.sample(frac=0.2, random_state=1337)
dfeval = dfeval.drop(test_df.index)
print(dftrain.shape)
print(
    "Using %d samples for training and %d for validation and %d for test"
    % (len(dftrain), len(dfeval), len(test_df))
)


(627, 10)
Using 627 samples for training and 211 for validation and 53 for test


In [135]:
dftrain.head()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,1,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,1,female,35.0,1,0,53.1,First,C,Southampton,n
4,0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


In [136]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("survived")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    # ds = ds.shuffle(buffer_size=len(dataframe))
    return ds


train_ds = dataframe_to_dataset(dftrain)
val_ds = dataframe_to_dataset(dfeval)
test_ds = dataframe_to_dataset(test_df)

In [137]:
for x, y in train_ds.take(1):
    print("Input:", x)
    print("Survived:", y)

Input: {'sex': <tf.Tensor: shape=(), dtype=string, numpy=b'male'>, 'age': <tf.Tensor: shape=(), dtype=float64, numpy=22.0>, 'n_siblings_spouses': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'parch': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'fare': <tf.Tensor: shape=(), dtype=float64, numpy=7.25>, 'class': <tf.Tensor: shape=(), dtype=string, numpy=b'Third'>, 'deck': <tf.Tensor: shape=(), dtype=string, numpy=b'unknown'>, 'embark_town': <tf.Tensor: shape=(), dtype=string, numpy=b'Southampton'>, 'alone': <tf.Tensor: shape=(), dtype=string, numpy=b'n'>}
Survived: tf.Tensor(0, shape=(), dtype=int64)


In [138]:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)
test_ds = test_ds.batch(32)

In [139]:
# feature_space = FeatureSpace(
#     features={
#     "sex": FeatureSpace.integer_categorical(num_oov_indices=0),
#     "age": FeatureSpace.float_discretized(num_bins=30),
#     "n_siblings_spouses": FeatureSpace.float_discretized(num_bins=3),
#     "parch": FeatureSpace.float_discretized(num_bins=3),
#     "fare": FeatureSpace.float_discretized(num_bins=3),
#     "class": FeatureSpace.integer_categorical(num_oov_indices=0),
#     "deck": FeatureSpace.integer_categorical(num_oov_indices=0),
#     "embark_town": FeatureSpace.integer_categorical(num_oov_indices=0),
#     "alone": FeatureSpace.integer_categorical(num_oov_indices=0),
#     },
#     # Our utility will one-hot encode all categorical
#     # features and concat all features into a single
#     # vector (one vector per sample).
#     output_mode="concat",
# )

# # Define a dictionary to map feature names to their type specification
# feature_names = {
#     "sex": "integer_categorical",
#     "age": "float_discretized",
#     "n_siblings_spouses": "float_normalized",
#     "parch": "float_normalized",
#     "fare": "float_normalized",
#     "class": "integer_categorical",
#     "deck": "integer_categorical",
#     "embark_town": "integer_categorical",
#     "alone": "integer_categorical",
# }

# feature_names = {
#     "sex": "integer_categorical",  # Assumes sex is a categorical feature with string values
#     "age": "float_discretized",    # Assumes age is a float feature that needs normalization
#     "n_siblings_spouses": "float_normalized",  # Assumes integer feature that needs normalization
#     "parch": "float_normalized",  # Assumes integer feature that needs normalization
#     "fare": "float_normalized",   # Assumes fare is a float feature that needs normalization
#     "class": "integer_categorical",  # Assumes class is a categorical feature with string values
#     "deck": "integer_categorical",   # Assumes deck is a categorical feature with string values
#     "embark_town": "integer_categorical",  # Assumes embark_town is a categorical feature with string values
#     "alone": "integer_categorical",   # Assumes alone is a categorical feature with string values
#     # "survived": "integer_categorical"  # Assumes survived is the target with integer values
# }

feature_space = FeatureSpace(
    features={
        "sex": FeatureSpace.string_categorical(),  # Assumes sex is a categorical feature with string values
        "age": FeatureSpace.float_normalized(),    # Assumes age is a float feature that needs normalization
        "n_siblings_spouses": FeatureSpace.integer_categorical(),  # Assumes integer feature that needs normalization
        "parch": FeatureSpace.integer_categorical(),  # Assumes integer feature that needs normalization
        "fare": FeatureSpace.float_normalized(),   # Assumes fare is a float feature that needs normalization
        "class": FeatureSpace.string_categorical(),  # Assumes class is a categorical feature with string values
        "deck": FeatureSpace.string_categorical(),   # Assumes deck is a categorical feature with string values
        "embark_town": FeatureSpace.string_categorical(),  # Assumes embark_town is a categorical feature with string values
        "alone": FeatureSpace.string_categorical(),   # Assumes alone is a categorical feature with string values
        # "survived": "integer_categorical"  # Assumes survived is the target with integer values
    },
    output_mode="concat",
)

In [140]:
train_ds_with_no_labels = train_ds.map(lambda x, _: x)
feature_space.adapt(train_ds_with_no_labels)


In [141]:
for x, _ in train_ds.take(1):
    preprocessed_x = feature_space(x)
    print("preprocessed_x.shape:", preprocessed_x.shape)
    print("preprocessed_x.dtype:", preprocessed_x.dtype)

preprocessed_x.shape: (32, 41)
preprocessed_x.dtype: <dtype: 'float32'>


In [142]:
preprocessed_train_ds = train_ds.map(
    lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE
)
preprocessed_train_ds = preprocessed_train_ds.prefetch(tf.data.AUTOTUNE)

preprocessed_val_ds = val_ds.map(
    lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE
)
preprocessed_val_ds = preprocessed_val_ds.prefetch(tf.data.AUTOTUNE)

preprocessed_test_ds = test_ds.map(
    lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE
)
preprocessed_test_ds = preprocessed_test_ds.prefetch(tf.data.AUTOTUNE)

In [143]:
# dict_inputs = feature_space.get_inputs()
# encoded_features = feature_space.get_encoded_features()

# sq_model = keras.Sequential()
# sq_model.add(keras.layers.Dense(32, activation="relu", input_shape=encoded_features.shape[1:]))
# sq_model.add(keras.layers.Dropout(0.5))
# sq_model.add(keras.layers.Dense(1, activation="sigmoid"))

# sq_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Create the training model
dict_inputs = feature_space.get_inputs()
encoded_features = feature_space.get_encoded_features()

sq_model = keras.Sequential()
sq_model.add(keras.layers.Dense(32, activation="relu", input_shape=encoded_features.shape[1:]))
sq_model.add(keras.layers.Dropout(0.5))
sq_model.add(keras.layers.Dense(1, activation="sigmoid"))

sq_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train your training model (sq_model) with your data here



In [144]:
sq_model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 32)                1344      
                                                                 
 dropout_6 (Dropout)         (None, 32)                0         
                                                                 
 dense_17 (Dense)            (None, 1)                 33        
                                                                 
Total params: 1377 (5.38 KB)
Trainable params: 1377 (5.38 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [145]:
sq_model.fit(
    preprocessed_train_ds, epochs=25, validation_data=preprocessed_val_ds, verbose=2
)

Epoch 1/25
20/20 - 3s - loss: 0.6604 - accuracy: 0.6140 - val_loss: 0.6267 - val_accuracy: 0.6209 - 3s/epoch - 135ms/step
Epoch 2/25
20/20 - 1s - loss: 0.6304 - accuracy: 0.6475 - val_loss: 0.6025 - val_accuracy: 0.6398 - 566ms/epoch - 28ms/step
Epoch 3/25
20/20 - 1s - loss: 0.6075 - accuracy: 0.6587 - val_loss: 0.5823 - val_accuracy: 0.6777 - 523ms/epoch - 26ms/step
Epoch 4/25
20/20 - 1s - loss: 0.5916 - accuracy: 0.7097 - val_loss: 0.5646 - val_accuracy: 0.7204 - 555ms/epoch - 28ms/step
Epoch 5/25
20/20 - 1s - loss: 0.5608 - accuracy: 0.7289 - val_loss: 0.5478 - val_accuracy: 0.7441 - 547ms/epoch - 27ms/step
Epoch 6/25
20/20 - 1s - loss: 0.5506 - accuracy: 0.7560 - val_loss: 0.5304 - val_accuracy: 0.7441 - 522ms/epoch - 26ms/step
Epoch 7/25
20/20 - 1s - loss: 0.5340 - accuracy: 0.7544 - val_loss: 0.5150 - val_accuracy: 0.7773 - 538ms/epoch - 27ms/step
Epoch 8/25
20/20 - 1s - loss: 0.5266 - accuracy: 0.7911 - val_loss: 0.5023 - val_accuracy: 0.7773 - 535ms/epoch - 27ms/step
Epoch 9/25

<keras.src.callbacks.History at 0x25998fd8950>

In [107]:
predictions = sq_model.predict(preprocessed_test_ds)



In [108]:
print(predictions)

[[0.88312924]
 [0.76166606]
 [0.33288527]
 [0.11357001]
 [0.16023742]
 [0.07630911]
 [0.11996026]
 [0.12456548]
 [0.9754267 ]
 [0.09192388]
 [0.6679702 ]
 [0.11549904]
 [0.10991735]
 [0.79428804]
 [0.7388393 ]
 [0.4658922 ]
 [0.9128577 ]
 [0.5441925 ]
 [0.80347174]
 [0.08881468]
 [0.80342984]
 [0.17839518]
 [0.35160193]
 [0.10838598]
 [0.09820534]
 [0.1462464 ]
 [0.1164173 ]
 [0.10454416]
 [0.52648205]
 [0.10419982]
 [0.876744  ]
 [0.19481464]
 [0.2796858 ]
 [0.2935987 ]
 [0.11629638]
 [0.11545198]
 [0.09029475]
 [0.09621318]
 [0.76231176]
 [0.4748198 ]
 [0.16756034]
 [0.11438232]
 [0.76260924]
 [0.23814404]
 [0.7742723 ]
 [0.9570616 ]
 [0.14080954]
 [0.22503024]
 [0.60616136]
 [0.11236387]
 [0.13690378]
 [0.54022837]
 [0.10793312]]


In [109]:
predictions = (predictions > 0.5).astype(int).reshape(-1,)
label_values = []

# Iterate through preprocessed_val_ds to collect labels
for _, labels in preprocessed_test_ds.as_numpy_iterator():
    label_values.extend(labels)

# Convert the collected labels to a NumPy array
label_values = np.array(label_values)

In [110]:
print(classification_report(label_values, predictions))

              precision    recall  f1-score   support

           0       0.83      0.85      0.84        34
           1       0.72      0.68      0.70        19

    accuracy                           0.79        53
   macro avg       0.78      0.77      0.77        53
weighted avg       0.79      0.79      0.79        53



In [111]:
# dict_inputs = feature_space.get_inputs()
# encoded_features = feature_space.get_encoded_features()

x = keras.layers.Dense(32, activation="relu")(encoded_features)
x = keras.layers.Dropout(0.5)(x)
predictions = keras.layers.Dense(1, activation="sigmoid")(x)

training_model = keras.Model(inputs=encoded_features, outputs=predictions)
training_model.compile(
    optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]
)

inference_model = keras.Model(inputs=dict_inputs, outputs=predictions)

In [112]:
training_model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 41)]              0         
                                                                 
 dense_6 (Dense)             (None, 32)                1344      
                                                                 
 dropout_3 (Dropout)         (None, 32)                0         
                                                                 
 dense_7 (Dense)             (None, 1)                 33        
                                                                 
Total params: 1377 (5.38 KB)
Trainable params: 1377 (5.38 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [113]:
training_model.fit(
    preprocessed_train_ds, epochs=50, validation_data=preprocessed_val_ds, verbose=2
)

Epoch 1/50
20/20 - 2s - loss: 0.6938 - accuracy: 0.5694 - val_loss: 0.6283 - val_accuracy: 0.6445 - 2s/epoch - 123ms/step
Epoch 2/50
20/20 - 1s - loss: 0.6373 - accuracy: 0.6507 - val_loss: 0.5954 - val_accuracy: 0.6777 - 592ms/epoch - 30ms/step
Epoch 3/50
20/20 - 1s - loss: 0.6147 - accuracy: 0.6715 - val_loss: 0.5703 - val_accuracy: 0.7109 - 707ms/epoch - 35ms/step
Epoch 4/50
20/20 - 1s - loss: 0.5708 - accuracy: 0.7129 - val_loss: 0.5487 - val_accuracy: 0.7346 - 725ms/epoch - 36ms/step
Epoch 5/50
20/20 - 1s - loss: 0.5753 - accuracy: 0.7177 - val_loss: 0.5316 - val_accuracy: 0.7488 - 561ms/epoch - 28ms/step
Epoch 6/50
20/20 - 1s - loss: 0.5446 - accuracy: 0.7321 - val_loss: 0.5160 - val_accuracy: 0.7488 - 522ms/epoch - 26ms/step
Epoch 7/50
20/20 - 1s - loss: 0.5451 - accuracy: 0.7560 - val_loss: 0.5036 - val_accuracy: 0.7583 - 524ms/epoch - 26ms/step
Epoch 8/50
20/20 - 0s - loss: 0.5134 - accuracy: 0.7847 - val_loss: 0.4927 - val_accuracy: 0.7725 - 467ms/epoch - 23ms/step
Epoch 9/50

<keras.src.callbacks.History at 0x25996d8b410>

In [114]:
predictions = training_model.predict(preprocessed_test_ds)



In [115]:
predictions = (predictions > 0.5).astype(int).reshape(-1,)
label_values = []

# Iterate through preprocessed_val_ds to collect labels
for _, labels in preprocessed_test_ds.as_numpy_iterator():
    label_values.extend(labels)

# Convert the collected labels to a NumPy array
label_values = np.array(label_values)

In [116]:
print(classification_report(label_values, predictions))

              precision    recall  f1-score   support

           0       0.82      0.79      0.81        34
           1       0.65      0.68      0.67        19

    accuracy                           0.75        53
   macro avg       0.73      0.74      0.74        53
weighted avg       0.76      0.75      0.76        53



In [124]:
sample = {
    "sex": "female",
    "age": 25,
    "n_siblings_spouses": 0,
    "parch": 0,
    "fare": 5,
    "class": "Third",
    "deck": "Unknown",
    "embark_town": "Southampton",
    "alone": "y",
}

input_dict = {name: tf.convert_to_tensor([value]) for name, value in sample.items()}
print(input_dict)
predictions = inference_model.predict(input_dict)
print(predictions)

print(
    f"This particular passenger had a {100 * predictions[0][0]:.2f}% probability "
    "of surviving, as evaluated by our model."
)

{'sex': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'female'], dtype=object)>, 'age': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([25])>, 'n_siblings_spouses': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([0])>, 'parch': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([0])>, 'fare': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([5])>, 'class': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Third'], dtype=object)>, 'deck': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Unknown'], dtype=object)>, 'embark_town': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Southampton'], dtype=object)>, 'alone': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'y'], dtype=object)>}
[[0.61438274]]
This particular passenger had a 61.44% probability of surviving, as evaluated by our model.


# Inference model for the SQ_Model

In [146]:

# Create the inference model with the same architecture as sq_model
inference_model = keras.Sequential()
inference_model.add(keras.layers.Dense(32, activation="relu", input_shape=encoded_features.shape[1:]))
inference_model.add(keras.layers.Dense(1, activation="sigmoid"))

# Load the trained weights from the training model (sq_model) into the inference model
inference_model.set_weights(sq_model.get_weights())

# Now you can use inference_model for making predictions with the trained weights

In [147]:
predictions = inference_model.predict(preprocessed_test_ds)




In [148]:
predictions = (predictions > 0.5).astype(int).reshape(-1,)
label_values = []

# Iterate through preprocessed_val_ds to collect labels
for _, labels in preprocessed_test_ds.as_numpy_iterator():
    label_values.extend(labels)

# Convert the collected labels to a NumPy array
label_values = np.array(label_values)

In [149]:
print(classification_report(label_values, predictions))

              precision    recall  f1-score   support

           0       0.79      0.79      0.79        34
           1       0.63      0.63      0.63        19

    accuracy                           0.74        53
   macro avg       0.71      0.71      0.71        53
weighted avg       0.74      0.74      0.74        53



In [150]:
predictions_sq = sq_model.predict(preprocessed_test_ds)
predictions_sq = (predictions_sq > 0.5).astype(int).reshape(-1,)
print(classification_report(label_values, predictions))

              precision    recall  f1-score   support

           0       0.79      0.79      0.79        34
           1       0.63      0.63      0.63        19

    accuracy                           0.74        53
   macro avg       0.71      0.71      0.71        53
weighted avg       0.74      0.74      0.74        53

