# Titanic Survival Prediction
## Machine Learning Classification Project

In [None]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

In [None]:
# Load Dataset
data = pd.read_csv('/path/to/tested.csv')
data.head()

In [None]:
# Data Preprocessing
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Fare'].fillna(data['Fare'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
data.drop(['Cabin', 'Name', 'Ticket', 'PassengerId'], axis=1, inplace=True)
le = LabelEncoder()
data['Sex'] = le.fit_transform(data['Sex'])
data['Embarked'] = le.fit_transform(data['Embarked'])

In [None]:
# Feature Scaling
X = data.drop('Survived', axis=1)
y = data['Survived']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Train-Test Split
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Model Training
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_val)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_val)

xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_val)

In [None]:
# Evaluation Function
def evaluate_model(y_val, y_pred, model_name):
    print(f'----{model_name}----')
    print('Accuracy:', accuracy_score(y_val, y_pred))
    print('Precision:', precision_score(y_val, y_pred))
    print('Recall:', recall_score(y_val, y_pred))
    print('F1-Score:', f1_score(y_val, y_pred))
    print('Confusion Matrix:
', confusion_matrix(y_val, y_pred))
    print('Classification Report:
', classification_report(y_val, y_pred))
    print('
')

evaluate_model(y_val, y_pred_lr, 'Logistic Regression')
evaluate_model(y_val, y_pred_rf, 'Random Forest')
evaluate_model(y_val, y_pred_xgb, 'XGBoost')