# 🧪 Titanic Survival Prediction

**Author**: Shariq Shaikh  
**Domain**: Data Science  
**Objective**: Build a machine learning model to predict whether a passenger on the Titanic survived or not.

## 🧾 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')

## 📂 2. Load Dataset

In [None]:
data = pd.read_csv('Titanic-Dataset.csv')
data.head()

## 📊 3. Data Exploration

In [None]:
print("Dataset Shape:", data.shape)
print("\nMissing Values:\n", data.isnull().sum())
print("\nData Description:\n", data.describe())

### 🔍 Visualizations

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x='Survived', data=data)
plt.title("Survival Count")
plt.show()

plt.figure(figsize=(6,4))
sns.countplot(x='Pclass', data=data)
plt.title("Passenger Class Distribution")
plt.show()

plt.figure(figsize=(6,4))
sns.countplot(x='Sex', data=data)
plt.title("Gender Distribution")
plt.show()

plt.figure(figsize=(8,6))
sns.countplot(x='Survived', hue='Sex', data=data)
plt.title("Survival by Gender")
plt.show()

plt.figure(figsize=(8,6))
sns.countplot(x='Survived', hue='Pclass', data=data)
plt.title("Survival by Passenger Class")
plt.show()

## 🛠️ 4. Data Preprocessing

In [None]:
# Fill missing values
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Fare'].fillna(data['Fare'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)

# Encode categorical variables
le = LabelEncoder()
data['Sex'] = le.fit_transform(data['Sex'])        # Male:1, Female:0
data['Embarked'] = le.fit_transform(data['Embarked'])

# Drop irrelevant columns
data = data.drop(['Name', 'Ticket', 'Cabin', 'PassengerId'], axis=1)

data.head()

## 🤖 5. Train the Model

In [None]:
X = data[['Pclass', 'Sex']]  # You can add more features here
y = data['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

## 📈 6. Model Evaluation

In [None]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

## 🔮 7. Make a Prediction

In [None]:
# Predict for a sample passenger: Pclass=2, Sex=1 (male)
sample = pd.DataFrame({'Pclass': [2], 'Sex': [1]})
result = model.predict(sample)

if result[0] == 1:
    print("🎉 Passenger Survived")
else:
    print("😞 Passenger Did Not Survive")

## 📌 8. Bonus: Prediction Function

In [None]:
def predict_survival(pclass, sex):
    sample = pd.DataFrame({'Pclass': [pclass], 'Sex': [sex]})
    prediction = model.predict(sample)[0]
    return "🎉 Survived" if prediction == 1 else "😞 Not Survived"

print(predict_survival(3, 0))  # Pclass=3, Female
print(predict_survival(1, 1))  # Pclass=1, Male