# Titanic Survival Prediction - Model Development

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

## 2. Load Dataset
Loading the Titanic dataset from OpenML.

In [None]:
# Fetch dataset, version 1 is standard Titanic
titanic = fetch_openml('titanic', version=1, as_frame=True)
df = titanic.frame
df.head()

## 3. Data Preprocessing
Selected features: 
1. `pclass`
2. `sex`
3. `age`
4. `sibsp`
5. `fare`

Target: `survived`

In [None]:
features = ['pclass', 'sex', 'age', 'sibsp', 'fare']
target = 'survived'

X = df[features].copy()
y = df[target]

# Check for missing values
print(X.isnull().sum())

### Handling Missing Values & Encoding
- Fill Age with median
- Encode Sex (female=0, male=1 or similar)

In [None]:
X['age'] = X['age'].fillna(X['age'].median())
X['fare'] = X['fare'].fillna(X['fare'].median())

# Encode Sex: male=0, female=1 (sklearn fetch_openml returns string or cat)
X['sex'] = X['sex'].map({'male': 0, 'female': 1})

print("Missing values after processing:")
print(X.isnull().sum())

### Feature Scaling

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

## 4. Model Training
Using Random Forest Classifier.

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

## 5. Evaluation

In [None]:
y_pred = model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))

## 6. Save Model

In [None]:
joblib.dump(model, 'titanic_survival_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("Model and scaler saved successfully.")