# Titanic Survival Prediction Project
This notebook uses the Titanic dataset to predict whether a passenger survived or not.

In [None]:
# 📦 Step 1: Import Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# 📤 Step 2: Upload CSV files (if using Colab)
from google.colab import files
uploaded = files.upload()

In [None]:
# 📥 Step 3: Load Data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train.head()

In [None]:
# 🔍 Step 4: Explore Data
print(train.info())
print(train.isnull().sum())

In [None]:
# 🧹 Step 5: Clean Data
train['Age'].fillna(train['Age'].median(), inplace=True)
test['Age'].fillna(test['Age'].median(), inplace=True)
train['Embarked'].fillna(train['Embarked'].mode()[0], inplace=True)
test['Fare'].fillna(test['Fare'].median(), inplace=True)
train.drop('Cabin', axis=1, inplace=True)
test.drop('Cabin', axis=1, inplace=True)

In [None]:
# 🏗️ Step 6: Feature Engineering
train['Title'] = train['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
test['Title'] = test['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
for dataset in [train, test]:
    dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt','Col',
     'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
    dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')
    dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')
    dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')

label = LabelEncoder()
for col in ['Sex', 'Embarked', 'Title']:
    train[col] = label.fit_transform(train[col])
    test[col] = label.transform(test[col])

train.drop(['PassengerId', 'Name', 'Ticket'], axis=1, inplace=True)
test_passenger_ids = test['PassengerId']
test.drop(['PassengerId', 'Name', 'Ticket'], axis=1, inplace=True)

In [None]:
# 📊 Step 7: Split Data
X = train.drop('Survived', axis=1)
y = train['Survived']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 🤖 Step 8: Train Models
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)
log_preds = log_model.predict(X_val)
print('Logistic Regression Accuracy:', accuracy_score(y_val, log_preds))

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_val)
print('Random Forest Accuracy:', accuracy_score(y_val, rf_preds))
print('Random Forest Report:
', classification_report(y_val, rf_preds))

In [None]:
# 📈 Step 9: Predict on Test Set
test_predictions = rf_model.predict(test)
submission = pd.DataFrame({
    'PassengerId': test_passenger_ids,
    'Survived': test_predictions
})
submission.to_csv('submission.csv', index=False)
from google.colab import files
files.download('submission.csv')