# KNN â€“ DS-- titanic: Compare with tree baseline; F1 + confusion matrix.

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_csv("titanic.csv")
cols = ['Survived', 'Pclass', 'Sex', 'Age', 'Embarked']
data = data[cols]

In [3]:
data['Age'] = data['Age'].fillna(data['Age'].mean())
data['Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])

data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})
data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

In [5]:
dt = DecisionTreeClassifier(
    criterion='entropy',
    max_depth=4,
    random_state=42
)

dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

dt_acc = accuracy_score(y_test, y_pred_dt)
dt_f1 = f1_score(y_test, y_pred_dt)
dt_cm = confusion_matrix(y_test, y_pred_dt)

print("Decision Tree Accuracy:", dt_acc)
print("Decision Tree F1-score:", dt_f1)
print("Decision Tree Confusion Matrix:\n", dt_cm)

Decision Tree Accuracy: 0.7988826815642458
Decision Tree F1-score: 0.7313432835820896
Decision Tree Confusion Matrix:
 [[94 11]
 [25 49]]


In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# KNN model
knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(X_train_scaled, y_train)
y_pred_knn = knn.predict(X_test_scaled)

knn_acc = accuracy_score(y_test, y_pred_knn)
knn_f1 = f1_score(y_test, y_pred_knn)
knn_cm = confusion_matrix(y_test, y_pred_knn)

print("KNN Accuracy:", knn_acc)
print("KNN F1-score:", knn_f1)
print("KNN Confusion Matrix:\n", knn_cm)

KNN Accuracy: 0.8100558659217877
KNN F1-score: 0.7638888888888888
KNN Confusion Matrix:
 [[90 15]
 [19 55]]
