# 🚢 Titanic Survival Prediction with Multiple Models

In [None]:
# 1. Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# 2. Load Dataset (from seaborn)
titanic = sns.load_dataset("titanic")
print(titanic.head())

In [None]:
# 3. Data Preprocessing
# ----------------------
# Select relevant features
data = titanic[["survived", "pclass", "sex", "age", "sibsp", "parch", "fare", "embarked"]]

# Handle missing values
data["age"].fillna(data["age"].median(), inplace=True)
data["embarked"].fillna(data["embarked"].mode()[0], inplace=True)

# Encode categorical variables
le_sex = LabelEncoder()
data["sex"] = le_sex.fit_transform(data["sex"])  # male=1, female=0

le_embarked = LabelEncoder()
data["embarked"] = le_embarked.fit_transform(data["embarked"])

# Features & Target
X = data.drop("survived", axis=1)
y = data["survived"]

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# 4. Train Multiple Models
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN (k=5)": KNeighborsClassifier(n_neighbors=5)
}

results = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    
    print(f"\n🔹 {name} Results:")
    print("Accuracy:", acc)
    print("Classification Report:\n", classification_report(y_test, y_pred))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Died","Survived"], yticklabels=["Died","Survived"])
    plt.title(f"{name} - Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()
    plt.clf() 

In [None]:
# 5. Compare Models
plt.figure(figsize=(6,4))
plt.bar(results.keys(), results.values(), color=['skyblue','lightgreen','salmon'])
plt.ylabel("Accuracy")
plt.title("Model Comparison - Accuracy")
plt.ylim(0,1)
plt.show()
plt.clf() 
