# Titanic EDA Notebook

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load Titanic dataset
# You can change the path if needed, or use seaborn's built-in Titanic dataset
titanic = sns.load_dataset('titanic')

In [None]:
titanic.head()

In [None]:
# Basic Information
titanic.info()

In [None]:
# Statistical Summary
titanic.describe()

In [None]:
# Value Counts for Categorical Features
print("\nSurvived value counts:\n", titanic['survived'].value_counts())
print("\nSex value counts:\n", titanic['sex'].value_counts())
print("\nPclass value counts:\n", titanic['pclass'].value_counts())

In [None]:
# Checking missing values
titanic.isnull().sum()

# Visual Explorations

In [None]:
# Pairplot
sns.pairplot(titanic.dropna(), hue='survived')
plt.suptitle('Pairplot of Titanic Features', y=1.02)
plt.show()

In [None]:
# Heatmap of correlations
plt.figure(figsize=(12,8))
sns.heatmap(titanic.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Feature Correlation Heatmap')
plt.show()

# Additional visualizations to identify trends

In [None]:
# Survival rate by Sex
sns.barplot(x='sex', y='survived', data=titanic)
plt.title('Survival Rate by Sex')
plt.show()

In [None]:
# Survival rate by Class
sns.barplot(x='pclass', y='survived', data=titanic)
plt.title('Survival Rate by Passenger Class')
plt.show()

In [None]:
# Age distribution by survival
sns.histplot(data=titanic, x='age', hue='survived', kde=True, multiple="stack")
plt.title('Age Distribution by Survival')
plt.show()

# Insights/Relationships:
1. Females had a significantly higher survival rate than males.
2. First-class passengers survived at a much higher rate compared to third-class passengers.
3. Younger passengers (children) seemed to have higher survival rates.
4. Strong correlations exist between "fare", "pclass", and "survived".
5. Missing values exist in "age", "embarked", "deck", and "embark_town".