# Titanic Dataset: Extensive Exploratory Data Analysis

This notebook performs a deep dive into the Titanic dataset to uncover patterns related to passenger survival.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid", palette="pastel", font_scale=1.2)


## Data Loading

In [None]:

df = pd.read_csv("/kaggle/input/titanic-dataset/Titanic-Dataset.csv")
df.head()


## Dataset Shape

In [None]:

df.shape


## Missing Values

In [None]:

df.isnull().sum().sort_values(ascending=False)


## Missing Percentage

In [None]:

(df.isnull().mean()*100).round(2)


## Numerical Summary

In [None]:

df.describe().T


## Categorical Summary

In [None]:

df.select_dtypes(include='object').describe().T


## Age Distribution

In [None]:

sns.histplot(df['Age'], bins=30, kde=True)
plt.show()


## Fare Distribution

In [None]:

sns.histplot(df['Fare'], bins=30, kde=True)
plt.show()


## Survival Count

In [None]:

sns.countplot(x='Survived', data=df)
plt.show()


## Survival by Gender

In [None]:

sns.barplot(x='Sex', y='Survived', data=df, errorbar=None)
plt.show()


## Survival by Class

In [None]:

sns.barplot(x='Pclass', y='Survived', data=df, errorbar=None)
plt.show()


## Feature Engineering: Family Size

In [None]:

df['FamilySize'] = df['SibSp'] + df['Parch'] + 1
sns.barplot(x='FamilySize', y='Survived', data=df, errorbar=None)
plt.show()


## Correlation Heatmap

In [None]:

corr = df.select_dtypes(include=np.number).corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.show()


## Conclusion
Sex, Pclass, and Age are the strongest predictors of survival.