In [1]:
# https://github.com/BeneKenobi/kaggle-titanic

from typing import Optional

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import tensorflow as tf
from plotly.subplots import make_subplots
from sklearn.preprocessing import LabelEncoder, StandardScaler

keras = tf.keras

CUD_COLORS = (
    "#e69f00",  # orange
    "#56b4e9",  # sky-blue
    "#009e73",  # bluish-green
    "#f0e442",  # yellow
    "#0072b2",  # blue
    "#d55e00",  # vermilion
    "#cc79a7",  # reddish-purple
)


def strip_title(name: str) -> str:
    return name.split(",")[1].split(".")[0].strip().lower()


In [2]:
X = pd.read_csv("/kaggle/input/titanic/train.csv")
X_test = pd.read_csv("/kaggle/input/titanic/test.csv")
passenger_ids_test = X_test["PassengerId"].copy()
Y = X["Survived"].copy()
X.drop("Survived", axis=1, inplace=True)


In [3]:
def engineer_features(df_in: pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    df["Title"] = df["Name"].apply(strip_title)
    df["Age"] = df.groupby(["Sex", "Pclass"])["Age"].apply(
        lambda x: x.fillna(x.median())
    )
    df["Embarked"] = df["Embarked"].fillna(
        "S"
    )  # https://www.encyclopedia-titanica.org/titanic-survivor/martha-evelyn-stone.html
    df["Fare"] = df["Fare"].fillna(
        df.groupby(["Pclass", "Parch", "SibSp"])["Fare"].median()[3][0][0]
    )  # Filling the missing value in Fare with the median Fare of 3rd class alone passenger
    df["Deck"] = df["Cabin"].fillna("M").apply(lambda x: x[0])
    df["Ticket_Frequency"] = df.groupby("Ticket")["Ticket"].transform("count")
    df.drop(["Ticket", "PassengerId", "Name", "Cabin"], axis=1, inplace=True)
    df = df.fillna(df.median())
    return df


X = engineer_features(X)
X_test = engineer_features(X_test)

encoding_help = pd.concat([X, X_test]).copy()

for column in encoding_help.columns:
    encoder = LabelEncoder().fit(encoding_help[column])
    X[column] = encoder.transform(X[column])
    X_test[column] = encoder.transform(X_test[column])

    encoding_help[column] = encoder.transform(encoding_help[column])

scaler = StandardScaler().fit(encoding_help)
X = scaler.transform(X)
X_test = scaler.transform(X_test)

  
  


In [4]:
model = keras.Sequential()
model.add(keras.layers.Dense(9, input_shape=(10,), activation="relu"))
model.add(keras.layers.Dense(1, activation="sigmoid"))
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"],
)

2022-09-27 08:48:23.655490: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-27 08:48:23.765352: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-27 08:48:23.766138: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-27 08:48:23.767741: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [5]:
fit = model.fit(
    X,
    Y,
    shuffle=True,
    epochs=300,
    validation_split=0.1,
    verbose="auto",
)

2022-09-27 08:48:26.512482: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [6]:
figure_history = make_subplots(rows=2, cols=1)

x_range = list(range(1, len(fit.history["loss"])))
figure_history.add_trace(
    go.Scatter(
        x=x_range,
        y=fit.history["loss"],
        mode="lines",
        name="train loss",
        line_color=CUD_COLORS[0],
    ),
    row=1,
    col=1,
)
figure_history.add_trace(
    go.Scatter(
        x=x_range,
        y=fit.history["val_loss"],
        mode="lines",
        name="validation loss",
        line_color=CUD_COLORS[1],
    ),
    row=1,
    col=1,
)
figure_history.add_trace(
    go.Scatter(
        x=x_range,
        y=fit.history["accuracy"],
        mode="lines",
        name="train accuracy",
        line_color=CUD_COLORS[2],
    ),
    row=2,
    col=1,
)
figure_history.add_trace(
    go.Scatter(
        x=x_range,
        y=fit.history["val_accuracy"],
        mode="lines",
        name="validation accuracy",
        line_color=CUD_COLORS[3],
    ),
    row=2,
    col=1,
)
figure_history.update_layout(
    height=768,
    width=1024,
    title_text="Training History",
    template="plotly_white",
)
figure_history.show()

In [7]:
prediction = model.predict(X_test).round()
results = pd.DataFrame(passenger_ids_test.copy())
results["Survived"] = prediction.astype(int)
results.to_csv("results.csv", index=False)
