In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
import joblib

# 1. Load Titanic dataset
df = sns.load_dataset('titanic')

# 2. Feature Selection (Selecting 5 input features + target)
# Features: Pclass, sex, age, sibsp, fare
selected_df = df[['pclass', 'sex', 'age', 'sibsp', 'fare', 'survived']].copy()

# 3. Handling Missing Values
selected_df['age'] = selected_df['age'].fillna(selected_df['age'].median())

# 4. Encoding Categorical Variables (Sex)
le = LabelEncoder()
selected_df['sex'] = le.fit_transform(selected_df['sex']) # male: 1, female: 0

# 5. Define X and y
X = selected_df.drop('survived', axis=1)
y = selected_df['survived']

# 6. Feature Scaling (Mandatory for KNN)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 7. Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 8. Implement KNN Algorithm
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# 9. Evaluate Model
y_pred = knn.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))

# 10. Save Model, Scaler, and Encoder
joblib.dump(knn, 'titanic_survival_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(le, 'label_encoder.pkl')

print("All files saved successfully!")

Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.88      0.83       105
           1       0.79      0.68      0.73        74

    accuracy                           0.79       179
   macro avg       0.79      0.78      0.78       179
weighted avg       0.79      0.79      0.79       179

All files saved successfully!
