In [489]:
import tensorflow as tf
import pandas as pd
import matplotlib
import os

In [490]:
data = pd.read_csv(os.path.join("data", "train.csv"))

In [491]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [492]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [493]:
data.drop(columns=["PassengerId", "Name", "Ticket", "Cabin"], inplace=True)

In [494]:
data["FamilySize"] = data["Parch"] + data["SibSp"]

In [495]:
data.drop(columns=["SibSp", "Parch"], inplace=True)

In [496]:
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Fare,Embarked,FamilySize
0,0,3,male,22.0,7.25,S,1
1,1,1,female,38.0,71.2833,C,1
2,1,3,female,26.0,7.925,S,0
3,1,1,female,35.0,53.1,S,1
4,0,3,male,35.0,8.05,S,0


In [497]:
X_data = data.drop(columns=["Survived"])
y_data = data["Survived"]

In [498]:
X_data.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Embarked,FamilySize
0,3,male,22.0,7.25,S,1
1,1,female,38.0,71.2833,C,1
2,3,female,26.0,7.925,S,0
3,1,female,35.0,53.1,S,1
4,3,male,35.0,8.05,S,0


In [499]:
X_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Pclass      891 non-null    int64  
 1   Sex         891 non-null    object 
 2   Age         714 non-null    float64
 3   Fare        891 non-null    float64
 4   Embarked    889 non-null    object 
 5   FamilySize  891 non-null    int64  
dtypes: float64(2), int64(2), object(2)
memory usage: 41.9+ KB


In [500]:
X_data.isnull().sum()

Pclass          0
Sex             0
Age           177
Fare            0
Embarked        2
FamilySize      0
dtype: int64

In [501]:
X_data["Age"] = X_data["Age"].fillna(X_data["Age"].median())
X_data["Embarked"] = X_data["Embarked"].ffill()

In [502]:
X_data.isnull().sum()

Pclass        0
Sex           0
Age           0
Fare          0
Embarked      0
FamilySize    0
dtype: int64

In [503]:
X_data["Embarked"].unique()

array(['S', 'C', 'Q'], dtype=object)

In [504]:
X_data["Embarked"] = X_data["Embarked"].map({"S": 0, "C": 1, "Q": 2})
X_data["Sex"] = X_data["Sex"].map({"male": 0, "female": 1})

In [505]:
X_data.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Embarked,FamilySize
0,3,0,22.0,7.25,0,1
1,1,1,38.0,71.2833,1,1
2,3,1,26.0,7.925,0,0
3,1,1,35.0,53.1,0,1
4,3,0,35.0,8.05,0,0


In [506]:
dataset = tf.data.Dataset.from_tensor_slices((X_data.values, y_data.values))

In [507]:
batch_size = 32
dataset = dataset.shuffle(buffer_size=len(X_data)).batch(batch_size)

In [508]:
len(dataset)

28

In [509]:
train_size = int(0.7 * len(dataset))
val_size = int(0.1 * len(dataset)) + 1
test_size = int(0.2 * len(dataset)) + 1
train_size, val_size, test_size

(19, 3, 6)

In [510]:
train_data = dataset.take(train_size)
val_data = dataset.skip(train_size).take(val_size)
test_data = dataset.skip(train_size + val_size).take(test_size)

len(train_data), len(test_data)

(19, 6)

Creating a model

In [511]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [512]:
model = Sequential()

In [513]:
model.add(Dense(64, activation="relu", input_shape=(6,)))
model.add(Dense(64, activation="relu"))
model.add(Dense(32, activation="relu"))

model.add(Dense(32, activation="relu"))

model.add(Dense(16, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [514]:
from tensorflow.keras.optimizers import Adam

In [515]:
model.compile(optimizer=Adam(learning_rate=0.001), loss=tf.keras.losses.BinaryCrossentropy(), metrics=["accuracy"])

In [516]:
model.summary()

In [517]:
history = model.fit(train_data, epochs=100, validation_data=val_data)

Epoch 1/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.5652 - loss: 0.7197 - val_accuracy: 0.6875 - val_loss: 0.6172
Epoch 2/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6822 - loss: 0.6161 - val_accuracy: 0.6354 - val_loss: 0.6262
Epoch 3/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7081 - loss: 0.5906 - val_accuracy: 0.6354 - val_loss: 0.6275
Epoch 4/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6849 - loss: 0.6064 - val_accuracy: 0.7396 - val_loss: 0.5407
Epoch 5/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6737 - loss: 0.5928 - val_accuracy: 0.7500 - val_loss: 0.5789
Epoch 6/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7233 - loss: 0.5638 - val_accuracy: 0.6875 - val_loss: 0.5876
Epoch 7/100
[1m19/19[0m [32m━━━