In [39]:
# Importing dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

train_df = pd.read_csv('../Resources/train_clean.csv')
train_df

Unnamed: 0,PassengerId,Survived,Pclass,last_name,first_name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked,Deck
0,1,0,3,Braund,Mr Owen Harris,male,22.0,1,0,A/5 21171,7.2500,S,"D,E,F,G"
1,2,1,1,Cumings,Mrs John Bradley (Florence Briggs Thayer),female,38.0,1,0,PC 17599,71.2833,C,"A,B,C,D,E"
2,3,1,3,Heikkinen,Miss Laina,female,26.0,0,0,STON/O2. 3101282,7.9250,S,"D,E,F,G"
3,4,1,1,Futrelle,Mrs Jacques Heath (Lily May Peel),female,35.0,1,0,113803,53.1000,S,"A,B,C,D,E"
4,5,0,3,Allen,Mr William Henry,male,35.0,0,0,373450,8.0500,S,"D,E,F,G"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,887,0,2,Montvila,Rev Juozas,male,27.0,0,0,211536,13.0000,S,"D,E,F,G"
885,888,1,1,Graham,Miss Margaret Edith,female,19.0,0,0,112053,30.0000,S,"A,B,C,D,E"
886,889,0,3,Johnston,Miss Catherine Helen Carrie,female,7.0,1,2,W./C. 6607,23.4500,S,"D,E,F,G"
887,890,1,1,Behr,Mr Karl Howell,male,26.0,0,0,111369,30.0000,C,"A,B,C,D,E"


In [40]:
# Dropping non-beneficial ID columns
train_df_dropped = train_df.drop(['PassengerId', 'last_name', 'first_name'], axis=1)
train_df_dropped

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked,Deck
0,0,3,male,22.0,1,0,A/5 21171,7.2500,S,"D,E,F,G"
1,1,1,female,38.0,1,0,PC 17599,71.2833,C,"A,B,C,D,E"
2,1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,S,"D,E,F,G"
3,1,1,female,35.0,1,0,113803,53.1000,S,"A,B,C,D,E"
4,0,3,male,35.0,0,0,373450,8.0500,S,"D,E,F,G"
...,...,...,...,...,...,...,...,...,...,...
884,0,2,male,27.0,0,0,211536,13.0000,S,"D,E,F,G"
885,1,1,female,19.0,0,0,112053,30.0000,S,"A,B,C,D,E"
886,0,3,female,7.0,1,2,W./C. 6607,23.4500,S,"D,E,F,G"
887,1,1,male,26.0,0,0,111369,30.0000,C,"A,B,C,D,E"


In [41]:
# Looking at the number of unique values in each column for train_df
train_df_dropped.nunique()

Survived      2
Pclass        3
Sex           2
Age          89
SibSp         7
Parch         7
Ticket      679
Fare        248
Embarked      3
Deck          2
dtype: int64

In [42]:
# Converting categorical data to numeric with `pd.get_dummies`
train_dummies_df = pd.get_dummies(train_df_dropped)
train_dummies_df

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Ticket_110152,Ticket_110413,...,Ticket_W./C. 6608,Ticket_W./C. 6609,Ticket_W.E.P. 5734,Ticket_W/C 14208,Ticket_WE/P 5735,Embarked_C,Embarked_Q,Embarked_S,"Deck_A,B,C,D,E","Deck_D,E,F,G"
0,0,3,22.0,1,0,7.2500,False,True,False,False,...,False,False,False,False,False,False,False,True,False,True
1,1,1,38.0,1,0,71.2833,True,False,False,False,...,False,False,False,False,False,True,False,False,True,False
2,1,3,26.0,0,0,7.9250,True,False,False,False,...,False,False,False,False,False,False,False,True,False,True
3,1,1,35.0,1,0,53.1000,True,False,False,False,...,False,False,False,False,False,False,False,True,True,False
4,0,3,35.0,0,0,8.0500,False,True,False,False,...,False,False,False,False,False,False,False,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,0,2,27.0,0,0,13.0000,False,True,False,False,...,False,False,False,False,False,False,False,True,False,True
885,1,1,19.0,0,0,30.0000,True,False,False,False,...,False,False,False,False,False,False,False,True,True,False
886,0,3,7.0,1,2,23.4500,True,False,False,False,...,False,False,False,False,False,False,False,True,False,True
887,1,1,26.0,0,0,30.0000,False,True,False,False,...,False,False,False,False,False,True,False,False,True,False


In [23]:
# Spltting preprocessed data into features and target arrays
X = train_dummies_df.copy()
X.drop('Survived', axis=1, inplace=True)
y = train_dummies_df['Survived']

# Splitting data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=7)

In [24]:
# Creating a StandardScaler instance
scaler = StandardScaler()

# Fiting the StandardScaler
X_scaler = scaler.fit(X_train)

# Scaling the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [26]:
# Defining the model - deep neural net aka the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer_1 = 8
hidden_nodes_layer_2 = 5

nn_model = tf.keras.models.Sequential()

# First hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer_1, input_dim=number_input_features, activation='relu'))
# Second hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer_2, activation='relu'))
# Output layer
nn_model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
# Checking the structure of the model
nn_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 8)                 5536      
                                                                 
 dense_1 (Dense)             (None, 5)                 45        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 5587 (21.82 KB)
Trainable params: 5587 (21.82 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [27]:
# Compiling the model
nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [33]:
# Train the model
nn_model.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x29be139b250>

In [34]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

7/7 - 0s - loss: 1.9618 - accuracy: 0.7848 - 22ms/epoch - 3ms/step
Loss: 1.9618265628814697, Accuracy: 0.7847533822059631


In [35]:
# Exporting model to HDF5 file
nn_model.save('../Models/titanic_predictions.h5')

  saving_api.save_model(
