# Titanic Survival Prediction using Machine Learning

In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import pandas as pd

### Load the dataset

In [3]:
titanic_data, info = tfds.load('titanic', split='train', as_supervised=False, with_info=True)
titanic_df = tfds.as_dataframe(titanic_data, info)
train_data, test_data = train_test_split(titanic_df, test_size=0.2, random_state=42)



[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\myxio\tensorflow_datasets\titanic\4.0.0...[0m


  from .autonotebook import tqdm as notebook_tqdm
Dl Size...: 0 MiB [00:00, ? MiB/s]1/1 [00:00<00:00,  1.07 url/s]
Dl Completed...: 100%|██████████| 1/1 [00:00<00:00,  1.02 url/s]
                                                                        

[1mDataset titanic downloaded and prepared to C:\Users\myxio\tensorflow_datasets\titanic\4.0.0. Subsequent calls will reuse this data.[0m


### Preprocess the dataset

In [4]:
titanic_df.head()

Unnamed: 0,age,boat,body,cabin,embarked,fare,home.dest,name,parch,pclass,sex,sibsp,survived,ticket
0,30.0,b'Unknown',-1,b'Unknown',2,13.0,"b'Sarnia, ON'","b'McCrie, Mr. James Matthew'",0,1,0,0,0,b'233478'
1,37.0,b'Unknown',98,b'Unknown',2,7.925,"b'Ruotsinphytaa, Finland New York, NY'","b'Gustafsson, Mr. Anders Vilhelm'",0,2,0,2,0,b'3101276'
2,28.0,b'9',-1,b'Unknown',2,13.0,b'Spain',"b'Reynaldo, Ms. Encarnacion'",0,1,1,0,1,b'230434'
3,18.0,b'Unknown',-1,b'Unknown',2,73.5,"b'Lyndhurst, England'","b'Davies, Mr. Charles Henry'",0,1,0,0,0,b'S.O.C. 14879'
4,-1.0,b'Unknown',-1,b'Unknown',0,7.8958,b'Unknown',"b'Gheorgheff, Mr. Stanio'",0,2,0,0,0,b'349254'


In [5]:
# 1. Encode categorical features
encoder = preprocessing.OneHotEncoder(sparse_output=False)
train_data['sex'] = encoder.fit_transform(train_data['sex'].values.reshape(-1, 1))
test_data['sex'] = encoder.transform(test_data['sex'].values.reshape(-1, 1))

In [6]:
# 2. Handle missing values
train_data['age'].fillna(train_data['age'].median(), inplace=True)
test_data['age'].fillna(train_data['age'].median(), inplace=True)

In [7]:
# 3.Scale numerical features
scaler = preprocessing.StandardScaler()
train_data[['age', 'fare']] = scaler.fit_transform(train_data[['age','fare']])
test_data[['age', 'fare']] = scaler.transform(test_data[['age', 'fare']])

### Build Neural Network Classifier

In [8]:
# Define features and target and convert to tensors
features = ['sex', 'age', 'fare']
target = 'survived'
train_features = tf.cast(train_data[features], tf.float32)
train_target = tf.cast(train_data[target], tf.int32)
test_features = tf.cast(test_data[features], tf.float32)
test_target = tf.cast(test_data[target], tf.int32)

# Build the binary classification model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation='relu', input_shape=(len(features),)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Model training

In [9]:
# Define optimizer and loss then train and evaluate
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_features, train_target, epochs=20, validation_split=0.2)
test_loss, test_acc = model.evaluate(test_features, test_target)
print(f"Test accuracy: {test_acc:.4f}")

Epoch 1/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.5346 - loss: 0.6940 - val_accuracy: 0.6952 - val_loss: 0.6629
Epoch 2/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6234 - loss: 0.6825 - val_accuracy: 0.6762 - val_loss: 0.6519
Epoch 3/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6876 - loss: 0.6577 - val_accuracy: 0.6905 - val_loss: 0.6398
Epoch 4/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6820 - loss: 0.6368 - val_accuracy: 0.7714 - val_loss: 0.6211
Epoch 5/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7181 - loss: 0.6197 - val_accuracy: 0.7667 - val_loss: 0.5993
Epoch 6/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7587 - loss: 0.5917 - val_accuracy: 0.7667 - val_loss: 0.5802
Epoch 7/20
[1m27/27[0m [32m━━━━━━━━━

### Model prediction

In [11]:
predictions = model.predict(test_features)
binary_predictions = (predictions > 0.5).astype(int)
results_df = pd. DataFrame ({
  'Actual': test_target,
  'Predicted': binary_predictions.flatten(),
  'Sex': test_data['sex'].values,
  'Age': test_data['age'].values
})

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


In [None]:
# Analyze survival based on Sex and Age
sex_analysis = results_df.groupby('Sex').mean()
print("Survival rate based on Sex : \n")
print(sex_analysis, "\n")

age_analysis = results_df.groupby('Age').mean()
print("Survival rate based on Age : \n")
print(age_analysis)