# Titanic Dataset - Exploratory Data Analysis (EDA)
This notebook is based on manually written analysis steps, converted into clean code and explanations for GitHub sharing.


## 1. Import Libraries and Load Dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

df = pd.read_csv('Titanic-Final-Processed.csv')
df.head()

## 2. General Information and Description

In [None]:
df.info()

In [None]:
df.describe()

## 3. Check for Missing Values

In [None]:
df.isnull().sum()

## 4. Univariate Analysis

In [None]:
sns.countplot(x='Survived', data=df)
plt.title('Survival Count')
plt.show()

In [None]:
sns.countplot(x='Pclass', data=df)
plt.title('Passenger Class Count')
plt.show()

In [None]:
sns.histplot(df['Age'], kde=True)
plt.title('Age Distribution')
plt.show()

## 5. Bivariate Analysis

In [None]:
sns.boxplot(x='Survived', y='Age', data=df)
plt.title('Age vs Survival')
plt.show()

In [None]:
sns.countplot(x='Sex', hue='Survived', data=df)
plt.title('Gender vs Survival')
plt.show()

## 6. Correlation Heatmap

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

## 7. Outlier Detection and Removal

In [None]:
sns.boxplot(df['Fare'])
plt.title('Boxplot of Fare')
plt.show()

In [None]:
Q1 = df['Fare'].quantile(0.25)
Q3 = df['Fare'].quantile(0.75)
IQR = Q3 - Q1
df = df[(df['Fare'] >= Q1 - 1.5 * IQR) & (df['Fare'] <= Q3 + 1.5 * IQR)]
df.shape

In [None]:
sns.boxplot(df['Fare'])
plt.title('Boxplot of Fare (Cleaned)')
plt.show()

## 8. Final Cleaned Data Overview

In [None]:
df.describe()