# Titanic Survival Prediction

Machine Learning Internship Task

## Objective
Predict passenger survival using machine learning models.

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

sns.set()


## Load Dataset

In [None]:

# Dataset: Kaggle Titanic train.csv
data = pd.read_csv("train.csv")
data.head()


## Exploratory Data Analysis

In [None]:

data.info()
data.describe()
data.isnull().sum()


## Visualizations

In [None]:

plt.figure()
sns.countplot(x="Survived", data=data)
plt.title("Survival Count")
plt.show()

plt.figure()
sns.barplot(x="Sex", y="Survived", data=data)
plt.title("Survival by Gender")
plt.show()

plt.figure()
sns.boxplot(x="Survived", y="Age", data=data)
plt.title("Age vs Survival")
plt.show()

plt.figure()
sns.histplot(data["Fare"], bins=30)
plt.title("Fare Distribution")
plt.show()


## Data Preprocessing

In [None]:

data["Age"].fillna(data["Age"].median(), inplace=True)
data["Embarked"].fillna(data["Embarked"].mode()[0], inplace=True)

data["Sex"] = data["Sex"].map({"male": 0, "female": 1})
data = pd.get_dummies(data, columns=["Embarked"], drop_first=True)

features = ["Pclass", "Sex", "Age", "Fare", "Embarked_Q", "Embarked_S"]
X = data[features]
y = data["Survived"]


## Train-Test Split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


## Model Training & Comparison

In [None]:

models = {
    "Logistic Regression": Pipeline([
        ("scaler", StandardScaler()),
        ("model", LogisticRegression(max_iter=200))
    ]),
    "k-NN": Pipeline([
        ("scaler", StandardScaler()),
        ("model", KNeighborsClassifier(n_neighbors=5))
    ]),
    "Decision Tree": DecisionTreeClassifier(random_state=42)
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(name, "Accuracy:", acc)
    print(classification_report(y_test, y_pred))


## Save Best Model

In [None]:

best_model_name = max(results, key=results.get)
best_model = models[best_model_name]

joblib.dump(best_model, "best_titanic_model.pkl")
print("Saved model:", best_model_name)


## Inference Example

In [None]:

model = joblib.load("best_titanic_model.pkl")

# Example passenger: [Pclass, Sex, Age, Fare, Embarked_Q, Embarked_S]
sample = [[3, 0, 22, 7.25, 0, 1]]
prediction = model.predict(sample)

print("Survival Prediction (0 = No, 1 = Yes):", prediction[0])
