# Titanic Survival Prediction - ML Pipeline

## 1. Load Dataset

In [None]:

import pandas as pd
df = pd.read_csv("Titanic-Dataset.csv")
df.head()


## 2. Separate Features and Target

In [None]:

X = df.drop("Survived", axis=1)
y = df["Survived"]


## 3. Identify Numerical and Categorical Features

In [None]:

categorical_features = ["Sex", "Embarked"]
numerical_features = [col for col in X.columns if col not in categorical_features]


## 4. Create ColumnTransformer and Pipeline

In [None]:

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

pipeline = Pipeline([
    ("preprocessing", preprocessor),
    ("model", LogisticRegression(max_iter=1000))
])


## 5. Train-Test Split

In [None]:

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


## 6. Train and Predict

In [None]:

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)


## 7. Evaluation Metrics

In [None]:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-score:", f1_score(y_test, y_pred))
