### Module Importation and DataFrame Setup

In [1]:
# Reset the model for re-run
from tensorflow.keras import backend as K
K.clear_session()

# Import dependencies
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler
import tensorflow as tf
import keras_tuner as kt
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
# Read the CSV file from the Dataset folder into a Pandas DataFrame
dfSurvivals = pd.read_csv(
    Path("../CSV_OUTPUT/Base_Cleaned_DS_CSV.csv")
)

In [4]:
# Review the DataFrame
dfSurvivals.head()

Unnamed: 0,Passenger Id,Title,First Name,Last Name,Sex,Age,Sibling/Spouse Aboard,Parent/Children Aboard,Passenger Class,Fare,Embarkation Port,Survival Boat,Body Number,Survived,age_group,family_size
0,1,Miss,Elisabeth Walton,Allen,female,29.0,0,0,1,211.3375,S,2,S,1,Adults,0
1,2,Master,Hudson Trevor,Allison,male,0.9167,1,2,1,151.55,S,11,S,1,Children,3
2,3,Miss,Helen Loraine,Allison,female,2.0,1,2,1,151.55,S,DNS,BNR,0,Children,3
3,4,Mr,Hudson Joshua Creighton,Allison,male,30.0,1,2,1,151.55,S,DNS,135,0,Adults,3
4,5,Mrs,Hudson J C (Bessie Waldo Daniels),Allison,female,25.0,1,2,1,151.55,S,DNS,BNR,0,Adults,3


In [5]:
# Print DataFrame information to ensure no null values and check datatypes
dfSurvivals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1308 entries, 0 to 1307
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Passenger Id            1308 non-null   int64  
 1   Title                   1308 non-null   object 
 2   First Name              1308 non-null   object 
 3   Last Name               1308 non-null   object 
 4   Sex                     1308 non-null   object 
 5   Age                     1308 non-null   float64
 6   Sibling/Spouse Aboard   1308 non-null   int64  
 7   Parent/Children Aboard  1308 non-null   int64  
 8   Passenger Class         1308 non-null   int64  
 9   Fare                    1308 non-null   float64
 10  Embarkation Port        1308 non-null   object 
 11  Survival Boat           1308 non-null   object 
 12  Body Number             1308 non-null   object 
 13  Survived                1308 non-null   int64  
 14  age_group               1308 non-null   

In [6]:
# Preprocessing for features which are currently objects
labelencoder_X = LabelEncoder()
dfSurvivals["Title"] = labelencoder_X.fit_transform(dfSurvivals["Title"])
dfSurvivals["Last Name"] = labelencoder_X.fit_transform(dfSurvivals["Last Name"])
dfSurvivals["Sex"] = labelencoder_X.fit_transform(dfSurvivals["Sex"])
dfSurvivals["Embarkation Port"] = labelencoder_X.fit_transform(dfSurvivals["Embarkation Port"])
dfSurvivals["age_group"] = labelencoder_X.fit_transform(dfSurvivals["age_group"])

In [7]:
# Drop columns
dfSurvivals = dfSurvivals.drop(['Passenger Id', 'First Name', 'Survival Boat', 'Body Number'], axis=1)
dfSurvivals.head()

Unnamed: 0,Title,Last Name,Sex,Age,Sibling/Spouse Aboard,Parent/Children Aboard,Passenger Class,Fare,Embarkation Port,Survived,age_group,family_size
0,9,15,0,29.0,0,0,1,211.3375,2,1,0,0
1,8,16,1,0.9167,1,2,1,151.55,2,1,1,3
2,9,16,0,2.0,1,2,1,151.55,2,0,1,3
3,12,16,1,30.0,1,2,1,151.55,2,0,0,3
4,13,16,0,25.0,1,2,1,151.55,2,0,0,3


In [8]:
# Ensure all columns are numerical
dfSurvivals.dtypes

Title                       int32
Last Name                   int32
Sex                         int32
Age                       float64
Sibling/Spouse Aboard       int64
Parent/Children Aboard      int64
Passenger Class             int64
Fare                      float64
Embarkation Port            int32
Survived                    int64
age_group                   int32
family_size                 int64
dtype: object

### Split the data into X and y and then into testing and training sets

In [9]:
# Split the data into X (features) and y (target)

# Set the y variable, which is the target
y = dfSurvivals['Survived']

# Set the X variable, which includes all features escept the target
X = dfSurvivals.drop(columns=['Survived'])

In [10]:
# Split into testing and training sets using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [11]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Compile, Train, and Evaluate the Model

In [12]:
# Get the number of input features
input_dim = X_train.shape[1]

# Function to create and compile the neural network
def create_model(hp):
    
    # Create a new model
    nn_model = tf.keras.models.Sequential()
    
    # Choose activation function for hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Add first hidden layer with hyperparameter-tuned settings
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units', 
        min_value=1, 
        max_value=10, 
        step=2), 
        activation=activation, 
        input_dim=input_dim))
    
    # Add second hidden layer with 10 neurons
    nn_model.add(tf.keras.layers.Dense(units=10, activation=activation))
    
    # Add output layer for binary classification
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    
    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy", "Precision", "Recall"]) 
    
    # Return compiled model
    return nn_model  

In [13]:
# Set up the kerastuner then allow it to search for best hyperparameters
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2,
    overwrite=True)

# Perform hyperparameter search
tuner.search(X_train_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))

Trial 21 Complete [00h 00m 02s]
val_accuracy: 0.6269113421440125

Best val_accuracy So Far: 0.7737002968788147
Total elapsed time: 00h 00m 42s
INFO:tensorflow:Oracle triggered exit


In [14]:
# Get the hyperparameters of the best model
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'relu',
 'first_units': 9,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 7,
 'tuner/bracket': 2,
 'tuner/round': 2,
 'tuner/trial_id': '0013'}

In [15]:
# Get the best model
best_model = tuner.get_best_models(1)[0]
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 9)                 108       
                                                                 
 dense_1 (Dense)             (None, 10)                100       
                                                                 
 dense_2 (Dense)             (None, 1)                 11        
                                                                 
Total params: 219 (876.00 Byte)
Trainable params: 219 (876.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
# Train the model
fit_model = best_model.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [17]:
# Evaluate the model using the test data
model_loss, model_accuracy, model_precision, model_recall = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}, Precision: {model_precision}, Recall: {model_recall}")

11/11 - 0s - loss: 0.5010 - accuracy: 0.7768 - precision: 0.7426 - recall: 0.6148 - 229ms/epoch - 21ms/step
Loss: 0.5009827017784119, Accuracy: 0.7767584323883057, Precision: 0.7425742745399475, Recall: 0.6147540807723999


In [18]:
# Use the model to make predictions on test data
y_pred_prob = best_model.predict(X_test_scaled).flatten()
y_pred = [1 if prob >= 0.5 else 0 for prob in y_pred_prob]

# Generate a confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Generate a classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Confusion Matrix:
[[179  26]
 [ 47  75]]
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.87      0.83       205
           1       0.74      0.61      0.67       122

    accuracy                           0.78       327
   macro avg       0.77      0.74      0.75       327
weighted avg       0.77      0.78      0.77       327



In [19]:
# Export our model to keras file
best_model.save('OUTPUT/Titanic_survival_mod1_reprocessed.keras')