In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [5]:
dataset = pd.read_csv("titanic.csv")
dataset.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [6]:
columns = ["Name", "Ticket", "Fare", "Cabin"]
dataset.drop(columns=columns, inplace=True)
dataset.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,1,0,3,male,22.0,1,0,S
1,2,1,1,female,38.0,1,0,C
2,3,1,3,female,26.0,0,0,S
3,4,1,1,female,35.0,1,0,S
4,5,0,3,male,35.0,0,0,S


In [9]:
dataset.drop(columns=["PassengerId"], inplace=True)

In [16]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Survived  891 non-null    int64  
 1   Pclass    891 non-null    int64  
 2   Sex       891 non-null    object 
 3   Age       891 non-null    float64
 4   SibSp     891 non-null    int64  
 5   Parch     891 non-null    int64  
 6   Embarked  889 non-null    object 
dtypes: float64(1), int64(4), object(2)
memory usage: 48.9+ KB


In [13]:
# Numerical_cols = ["Pclass", "Age", "SibSp", "Parch"]
# categorical_cols = ["Sex", "Embarked"]

# feature_cols = []
# for col in categorical_cols:
#     vocab = dataset[col].unique()
#     feature_cols.append(tf.feature_column.categorical_columns_with_vocabulary_list(col, vocab))
# for col in Numerical_cols:
#     feature_cols.append(tf.numeric_columns(col, dtype=tf.float))

# Standardize Age (mean = 0, std = 1)
dataset["Age"].fillna(dataset["Age"].mean(), inplace=True)
dataset["Age"] = (dataset["Age"] - dataset["Age"].mean()) / dataset["Age"].std()
dataset.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,0,3,male,-0.592148,1,0,S
1,1,1,female,0.63843,1,0,C
2,1,3,female,-0.284503,0,0,S
3,1,1,female,0.407697,1,0,S
4,0,3,male,0.407697,0,0,S


In [19]:
dataset["Embarked"].fillna(dataset["Embarked"].mode()[0], inplace=True)
dataset.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset["Embarked"].fillna(dataset["Embarked"].mode()[0], inplace=True)


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,0,3,male,-0.592148,1,0,S
1,1,1,female,0.63843,1,0,C
2,1,3,female,-0.284503,0,0,S
3,1,1,female,0.407697,1,0,S
4,0,3,male,0.407697,0,0,S


In [20]:
sex = {
    "male" : 1,
    "female" : 0
}
embarked = {
    "S" : 0,
    "C" : 1,
    "Q" : 2
}
dataset["Sex"] = dataset["Sex"].map(sex)
dataset["Embarked"] = dataset["Embarked"].map(embarked)
dataset.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,0,3,1,-0.592148,1,0,0
1,1,1,0,0.63843,1,0,1
2,1,3,0,-0.284503,0,0,0
3,1,1,0,0.407697,1,0,0
4,0,3,1,0.407697,0,0,0


In [21]:
target = dataset.pop("Survived")

In [24]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation="relu", input_shape=(6,)),
    tf.keras.layers.Dense(16, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [25]:
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

In [26]:
x_train, x_test, y_train, y_test = train_test_split(dataset, target, random_state=42, test_size=.25)
x_train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Embarked
298,1,1,-1.553581e-18,0,0,0
884,3,1,-3.614146e-01,0,0,0
247,2,0,-4.383257e-01,0,2,0
478,3,1,-5.921480e-01,0,0,0
305,1,1,-2.213435e+00,1,2,0
...,...,...,...,...,...,...
106,3,0,-6.690592e-01,0,0,0
270,1,1,-1.553581e-18,0,0,0
860,3,1,8.691639e-01,2,0,0
435,1,0,-1.207437e+00,1,2,0


In [40]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.fit(
    x_train, y_train,
    epochs=40,
    batch_size=32,
    validation_split=0.20
)

Epoch 1/40
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.8316 - loss: 0.3800 - val_accuracy: 0.8209 - val_loss: 0.4200
Epoch 2/40
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.8341 - loss: 0.4018 - val_accuracy: 0.8209 - val_loss: 0.4188
Epoch 3/40
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8526 - loss: 0.3773 - val_accuracy: 0.8358 - val_loss: 0.4160
Epoch 4/40
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8302 - loss: 0.3990 - val_accuracy: 0.8209 - val_loss: 0.4190
Epoch 5/40
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8281 - loss: 0.3790 - val_accuracy: 0.8284 - val_loss: 0.4160
Epoch 6/40
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.8575 - loss: 0.3623 - val_accuracy: 0.8209 - val_loss: 0.4190
Epoch 7/40
[1m17/17[0m [32m━━━━

<keras.src.callbacks.history.History at 0x22580d9bf40>

In [41]:
train_loss, train_acc = model.evaluate(x_train, y_train)
print(f"Train Acc: {train_acc * 100:.2f}%\nTrain Loss: {train_loss * 100:.2f}%")

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8436 - loss: 0.3772
Train Acc: 83.53%
Train Loss: 39.27%


In [42]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Acc: {test_acc * 100:.2f}%\nTest Loss: {test_loss * 100:.2f}%")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7984 - loss: 0.4332
Test Acc: 80.27%
Test Loss: 43.80%
