In [1]:
"""
A more advance Neural Network that will used the titanic dataset to predict, if a person survives or no base on categories.
"""
import seaborn as sns

df = sns.load_dataset('titanic')

# Checking for null values
display(df.isnull())

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
887,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
888,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False
889,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [2]:
# Clean data / Remove missing values
clean_df = df.dropna()
display(clean_df)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
10,1,3,female,4.0,1,1,16.7000,S,Third,child,False,G,Southampton,yes,False
11,1,1,female,58.0,0,0,26.5500,S,First,woman,False,C,Southampton,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
871,1,1,female,47.0,1,1,52.5542,S,First,woman,False,D,Southampton,yes,False
872,0,1,male,33.0,0,0,5.0000,S,First,man,True,B,Southampton,no,True
879,1,1,female,56.0,0,1,83.1583,C,First,woman,False,C,Cherbourg,yes,False
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True


In [3]:
print("(Rows, Columns):", clean_df.shape)

(Rows, Columns): (182, 15)


In [4]:
print("All current columns:", clean_df.columns.to_list())

# Hidden layer: Calculates if it will survive or not base on categories: fare, pclass, age, sex and alone
# Output layer: Says if survives base on hidden layer calculations, will say 0 or 1

X = clean_df[['pclass','sex', 'age', 'fare', 'alone']].copy()

print("\nColumns that will be used to predict (survived): ",X.columns.to_list())
display(X)



All current columns: ['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town', 'alive', 'alone']

Columns that will be used to predict (survived):  ['pclass', 'sex', 'age', 'fare', 'alone']


Unnamed: 0,pclass,sex,age,fare,alone
1,1,female,38.0,71.2833,False
3,1,female,35.0,53.1000,False
6,1,male,54.0,51.8625,True
10,3,female,4.0,16.7000,False
11,1,female,58.0,26.5500,True
...,...,...,...,...,...
871,1,female,47.0,52.5542,False
872,1,male,33.0,5.0000,True
879,1,female,56.0,83.1583,False
887,1,female,19.0,30.0000,True


In [5]:
# Encode Sex and Alone with 0 and 1
X['sex'] = X['sex'].map({'male': 0, 'female': 1})
X['alone'] = X['alone'].map({False: 0, True: 1})
display(X)

Unnamed: 0,pclass,sex,age,fare,alone
1,1,1,38.0,71.2833,0
3,1,1,35.0,53.1000,0
6,1,0,54.0,51.8625,1
10,3,1,4.0,16.7000,0
11,1,1,58.0,26.5500,1
...,...,...,...,...,...
871,1,1,47.0,52.5542,0
872,1,0,33.0,5.0000,1
879,1,1,56.0,83.1583,0
887,1,1,19.0,30.0000,1


In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X[["age", "fare"]] = scaler.fit_transform(X[["age", "fare"]])
display(X)


Unnamed: 0,pclass,sex,age,fare,alone
1,1,1,0.152082,-0.100110,0
3,1,1,-0.039875,-0.338485,0
6,1,0,1.175852,-0.354708,1
10,3,1,-2.023430,-0.815672,0
11,1,1,1.431795,-0.686543,1
...,...,...,...,...,...
871,1,1,0.727953,-0.345640,0
872,1,0,-0.167846,-0.969053,1
879,1,1,1.303824,0.055566,0
887,1,1,-1.063646,-0.641315,1


In [7]:
from sklearn.model_selection import train_test_split

y = clean_df['survived']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [8]:
# NN(Neural Network Model)

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout

model = Sequential([
    Input(shape = (X.shape[1],)), # Input Layer:  auto matches feature size 
    Dense(16, activation='relu'), # Hidden Layer 1: 16 Neurons 
    Dropout(0.2), # For Regularization
    Dense(8, activation='relu'), # Hidden Layer 2: 8 Neurons
    Dense(1, activation='sigmoid') # Output Layer: 1 Neuron
])

In [13]:
# Much cleaner print than watching verbose print every line.
class PrintEvery(tf.keras.callbacks.Callback):
    def __init__(self, interval=50):
        self.interval = interval

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.interval == 0:
            print(f"Epoch {epoch+1} → Loss: {logs['loss']:.4f} - Accuracy: {logs['accuracy']:.4f}")

In [16]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, validation_split=0.2, verbose=0, callbacks=[PrintEvery(10)])

Epoch 10 → Loss: 0.4036 - Accuracy: 0.7586
Epoch 20 → Loss: 0.4236 - Accuracy: 0.7759
Epoch 30 → Loss: 0.4189 - Accuracy: 0.7931
Epoch 40 → Loss: 0.3956 - Accuracy: 0.7672
Epoch 50 → Loss: 0.3863 - Accuracy: 0.7759
Epoch 60 → Loss: 0.3719 - Accuracy: 0.7931
Epoch 70 → Loss: 0.3975 - Accuracy: 0.7931
Epoch 80 → Loss: 0.3933 - Accuracy: 0.7759
Epoch 90 → Loss: 0.3794 - Accuracy: 0.8017
Epoch 100 → Loss: 0.3872 - Accuracy: 0.8017


<keras.src.callbacks.history.History at 0x303e80850>

In [17]:
y_pred = model.predict(X_test)
y_pred_class = (y_pred >= 0.5).astype(int)

import pandas as pd

# Create a DataFrame to compare
predictions_df = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted_Prob': y_pred.flatten(),
    'Predicted_Label': y_pred_class.flatten()
})

# Show first few rows
display(predictions_df.head(10))



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


Unnamed: 0,Actual,Predicted_Prob,Predicted_Label
0,0,0.457866,0
1,0,0.795937,1
2,1,0.983142,1
3,1,0.989196,1
4,1,0.375465,0
5,0,0.224844,0
6,0,0.53083,1
7,1,0.99853,1
8,1,0.383713,0
9,0,0.2694,0


In [24]:
loss, acc = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {(acc * 100):.2f}%")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7365 - loss: 0.5325

Test Accuracy: 72.97%


In [32]:
print("Reflection:\n")
print(f'My model achieved a {(acc * 100):.2f}% accuracy in predicting survival on the Titanic dataset test set.\nConsidering the dataset was relatively small {X.shape} after cleaning, this is a strong result for a simple feedforward network.\nSo a model with limited data (based on how data hungry Neural Networks normally are), a well-preprocessed input and simple architecture can generalize reasonably well.')


Reflection:

My model achieved a 72.97% accuracy in predicting survival on the Titanic dataset test set.
Considering the dataset was relatively small (182, 5) after cleaning, this is a strong result for a simple feedforward network.
So a model with limited data (based on how data hungry Neural Networks normally are), a well-preprocessed input and simple architecture can generalize reasonably well.
