In [35]:
import tensorflow as tf
import tensorflow_probability as tfp
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("train.csv")
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [64]:
x_train, x_test, y_train, y_test = train_test_split(df.drop("Survived", axis = 1), df["Survived"], test_size = 0.2, random_state=123)

# Build Pipeline

In [65]:
CAT = ["Pclass", "Sex"]
NUM = ["Age", "Fare"]

features = list()

cat_var = [
    tf.feature_column.categorical_column_with_vocabulary_list(
        cat, df[cat].value_counts().index.tolist()
    )
    for cat in CAT
]

cat_var = [
    tf.feature_column.indicator_column(cat)
    for cat in cat_var
]

def MinMax(x, num, df):

    x = tf.cast(x, tf.float32)
    MIN = np.float32(np.min(df[num]))
    MAX = np.float32(np.max(df[num]))
    MEAN = np.float32(np.mean(df[num]))

    x = tf.where(tf.math.is_nan(x), MEAN, x)
    x = (x - MIN) / (MAX - MIN)

    return x

num_var = list()
num_var.append(
    tf.feature_column.numeric_column("Age", normalizer_fn=lambda x: MinMax(x, "Age", x_train))
)
num_var.append(
    tf.feature_column.numeric_column("Fare", normalizer_fn=lambda x: MinMax(x, "Fare", x_train))
)

for var in cat_var:
    features.append(var)

for var in num_var:
    features.append(var)

dense_features = tf.keras.layers.DenseFeatures(features)

In [66]:
dense_features(x_train.iloc[5:6].to_dict("list"))

<tf.Tensor: shape=(1, 7), dtype=float32, numpy=
array([[0.6481528 , 0.02537431, 0.        , 0.        , 1.        ,
        1.        , 0.        ]], dtype=float32)>

# Bayesian Neural Net (Aleatoric uncertainty)

In [67]:
tfpl = tfp.layers
layers = tf.keras.layers

inp = {
    "Pclass" : tf.keras.layers.Input(shape=(), dtype=tf.int32),
    "Sex" : tf.keras.layers.Input(shape=(), dtype=tf.string),
    "Age" : tf.keras.layers.Input(shape=(), dtype=tf.float32),
    "Fare" : tf.keras.layers.Input(shape=(), dtype=tf.float32)
}

num_class = 2

densef = dense_features(inp)
x = layers.Dense(64, activation="relu")(densef)
x = layers.Dense(32, activation= "relu")(x)
x = layers.Dense(tfpl.OneHotCategorical.params_size(num_class))(x)
out = tfpl.OneHotCategorical(num_class)(x)

model = tf.keras.models.Model(
    inp, out
)

def negative_log_likelihood(y_true, y_pred):
    return -y_pred.log_prob(y_true)

model.compile(
    optimizer="adam",
    loss = negative_log_likelihood,
    metrics = ["acc"]
)

model.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_43 (InputLayer)          [(None,)]            0           []                               
                                                                                                  
 input_44 (InputLayer)          [(None,)]            0           []                               
                                                                                                  
 input_41 (InputLayer)          [(None,)]            0           []                               
                                                                                                  
 input_42 (InputLayer)          [(None,)]            0           []                               
                                                                                            

In [68]:
df_train = x_train.copy()
df_train["Survived"] = y_train

df_train.dropna(axis = 0, inplace=True)

x_train = df_train.drop("Survived", axis = 1)
y_train = df_train["Survived"]

TRAIN = tf.data.Dataset.from_tensor_slices((x_train.to_dict("list"),tf.keras.utils.to_categorical(y_train, 2)))
TRAIN = TRAIN.shuffle(10).batch(16).prefetch(tf.data.AUTOTUNE)

In [69]:
model.fit(TRAIN, epochs = 20)

Epoch 1/20


  inputs = self._flatten_to_reference_inputs(inputs)


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2757889bcd0>

In [70]:
to_pred = {'PassengerId': np.array([1]),
 'Survived': np.array([0]),
 'Pclass': np.array([3]),
 'Name': np.array(['Braund, Mr. Owen Harris']),
 'Sex': np.array(['male']),
 'Age': np.array([22.0]),
 'SibSp': np.array([1]),
 'Parch': np.array([0]),
 'Ticket': np.array(['A/5 21171']),
 'Fare': np.array([7.25]),
 'Cabin': np.array([np.nan]),
 'Embarked': np.array(['S'])}

In [95]:
to_pred = {
        "Pclass": np.array([2]),
        "Sex": np.array(["male"]),
        "Age": np.array([2]),
        "Fare": np.array([0.2])
    }

In [96]:
np.argmax(model.predict(to_pred), axis=-1)[0]



0

In [72]:
model.save_weights("weights.h5")