In [1]:
# Scenario Question: Predicting Titanic Survival
# Researchers are studying the Titanic disaster and want to build models that predict whether a
#  passenger would survive or not survive based on their information.
# - Features used:
# - Passenger class (pclass)
# - Gender (sex)
# - Age (age)
# - Number of siblings/spouses aboard (sibsp)
# - Number of parents/children aboard (parch)
# - Ticket fare (fare)
# - Label:
# - 1 = Survived
# - 0 = Died
# The researchers train three different models:
# - Logistic Regression
# - K-Nearest Neighbors (KNN) with k=5
# - Decision Tree with max depth = 4
# They then evaluate each model using a classification report (precision, recall, F1-score, accuracy).

import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

df = sns.load_dataset('titanic')

df = df[['survived','pclass','sex','age','sibsp','parch','fare']]
df = df.dropna()

df['sex'] = df['sex'].map({'male':0,'female':1})

X = df.drop('survived',axis=1)
y = df['survived']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train_scaled,y_train)
log_pred = log_model.predict(X_test_scaled)

knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_scaled,y_train)
knn_pred = knn_model.predict(X_test_scaled)

tree_model = DecisionTreeClassifier(max_depth=4,random_state=42)
tree_model.fit(X_train,y_train)
tree_pred = tree_model.predict(X_test)

print("Logistic Regression")
print(classification_report(y_test,log_pred))
print("Accuracy:",accuracy_score(y_test,log_pred))

print("KNN (k=5)")
print(classification_report(y_test,knn_pred))
print("Accuracy:",accuracy_score(y_test,knn_pred))

print("Decision Tree (max_depth=4)")
print(classification_report(y_test,tree_pred))
print("Accuracy:",accuracy_score(y_test,tree_pred))

Logistic Regression
              precision    recall  f1-score   support

           0       0.83      0.82      0.83        85
           1       0.75      0.76      0.75        58

    accuracy                           0.80       143
   macro avg       0.79      0.79      0.79       143
weighted avg       0.80      0.80      0.80       143

Accuracy: 0.7972027972027972
KNN (k=5)
              precision    recall  f1-score   support

           0       0.79      0.81      0.80        85
           1       0.71      0.69      0.70        58

    accuracy                           0.76       143
   macro avg       0.75      0.75      0.75       143
weighted avg       0.76      0.76      0.76       143

Accuracy: 0.7622377622377622
Decision Tree (max_depth=4)
              precision    recall  f1-score   support

           0       0.83      0.85      0.84        85
           1       0.77      0.74      0.75        58

    accuracy                           0.80       143
   macro avg