# 🛳️ Titanic Dataset - EDA

In this notebook, we will perform exploratory data analysis (EDA) and data cleaning on the Titanic dataset.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Load the dataset
df = pd.read_csv('train.csv')
df.head()

## 📊 Basic Info and Missing Values

In [None]:
df.info()
df.isnull().sum()

## 👶 Handling Missing Values
We will fill 'Age' with median and drop 'Cabin' due to too many missing values.

In [None]:
df['Age'].fillna(df['Age'].median(), inplace=True)
df.drop(columns=['Cabin'], inplace=True)
df.dropna(inplace=True)  # Drop rows with missing Embarked

## 🔍 Univariate Analysis

In [None]:
sns.countplot(x='Survived', data=df)
plt.title('Survival Count')
plt.show()

In [None]:
sns.countplot(x='Pclass', data=df)
plt.title('Passenger Class Distribution')
plt.show()

In [None]:
sns.histplot(df['Age'], bins=30, kde=True)
plt.title('Age Distribution')
plt.show()

## 🧑‍🤝‍🧑 Bivariate Analysis

In [None]:
sns.countplot(x='Sex', hue='Survived', data=df)
plt.title('Survival by Gender')
plt.show()

In [None]:
sns.countplot(x='Pclass', hue='Survived', data=df)
plt.title('Survival by Class')
plt.show()

In [None]:
sns.boxplot(x='Survived', y='Age', data=df)
plt.title('Age vs Survival')
plt.show()

In [None]:
sns.boxplot(x='Survived', y='Fare', data=df)
plt.title('Fare vs Survival')
plt.show()

## ✅ Save Cleaned Dataset (Optional)

In [None]:
df.to_csv('titanic_cleaned.csv', index=False)