In [76]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

Loadind Data

In [77]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
passenger_id = test_data['PassengerId']

In [78]:
full_data = pd.concat([train_data.drop('Survived', axis=1), test_data], axis=0)

In [79]:
full_data

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


Feature Engineering


In [80]:
full_data['Title'] = full_data['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
full_data['Title'] = full_data['Title'].replace(['Lady', 'Countess','Capt','Col','Don','Dr','Major','Rev','Sir','Jonkheer','Dona'], 'Rare')
full_data['Title'] = full_data['Title'].replace('Mlle', 'Miss').replace('Ms', 'Miss').replace('Mme', 'Mrs')

In [81]:
full_data['FamilySize'] = full_data['SibSp'] + full_data['Parch'] + 1
full_data['IsAlone'] = (full_data['FamilySize'] == 1).astype(int)
full_data['CabinKnown'] = full_data['Cabin'].notnull().astype(int)

In [82]:
full_data

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Title,FamilySize,IsAlone,CabinKnown
0,1,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,Mr,2,0,0
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,Mrs,2,0,1
2,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,Miss,1,1,0
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,Mrs,2,0,1
4,5,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,Mr,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S,Mr,1,1,0
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C,Rare,1,1,1
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S,Mr,1,1,0
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S,Mr,1,1,0


Dropping Unneeded feature


In [83]:
full_data = full_data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

Separate back into train/test

In [84]:
X = full_data.iloc[:len(train_data), :]
X_test_final = full_data.iloc[len(train_data):, :]
y = train_data['Survived']

Define preprocessing

In [85]:
num_features = ['Age', 'Fare', 'FamilySize']
cat_features = ['Sex', 'Embarked', 'Pclass', 'Title', 'IsAlone', 'CabinKnown']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, num_features),
    ('cat', categorical_transformer, cat_features)
])

Applying Preprocessing


In [87]:
X_processed = preprocessor.fit_transform(X)
X_test_processed = preprocessor.transform(X_test_final)

Train Test Split


In [88]:
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

Deep Learning Model

In [89]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [90]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Early stopping

In [91]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

Train

In [92]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, callbacks=[early_stop], verbose=1)

Epoch 1/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.5516 - loss: 0.6902 - val_accuracy: 0.6592 - val_loss: 0.6105
Epoch 2/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7054 - loss: 0.5824 - val_accuracy: 0.7654 - val_loss: 0.5305
Epoch 3/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7554 - loss: 0.5228 - val_accuracy: 0.7933 - val_loss: 0.4846
Epoch 4/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7931 - loss: 0.5021 - val_accuracy: 0.8045 - val_loss: 0.4616
Epoch 5/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8195 - loss: 0.4425 - val_accuracy: 0.8045 - val_loss: 0.4484
Epoch 6/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8029 - loss: 0.4687 - val_accuracy: 0.8101 - val_loss: 0.4442
Epoch 7/100
[1m23/23[0m [32m━━

<keras.src.callbacks.history.History at 0x7cfb3b884f90>

Evaluation

In [93]:
val_preds = (model.predict(X_test) > 0.5).astype(int)
print("Validation Accuracy:", accuracy_score(y_test, val_preds))

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Validation Accuracy: 0.8100558659217877


Predict for Submission

In [94]:
final_preds = (model.predict(X_test_processed) > 0.5).astype(int)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


Create submission

In [95]:
submission = pd.DataFrame({'PassengerId': passenger_id, 'Survived': final_preds.ravel().astype(int)})
submission.to_csv('titanic_dl_submission.csv', index=False)