# **Importing Essential libraries**

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# **Exploratory Data Analysis (EDA)**
#### **EDA helps uncover patterns, anomalies, and relationships in the data.**

In [None]:
# Dataset load
data = sns.load_dataset('titanic')
data

In [None]:
# Checking dataset info
data.info()

In [None]:
data.describe()

In [None]:
data['survived'].value_counts()

In [None]:
# Checking missing values by index
data.isnull().sum().sort_index()

In [None]:
# Checking missing values by sorting in ascending order and descending order
data.isnull().sum().sort_values(ascending=False)

In [None]:
# Checking missing values by percentage
print(f"Missing values in %age: \n{round(data.isnull().sum() / len(data) * 100, 0).sort_values(ascending=False)}")

# **Data Visualizations**

In [None]:
data.plot(kind='scatter', x = 'age', y='fare', color = 'orange')

In [None]:
sns.barplot(data= data, orient='v')

In [None]:
sns.lineplot(x ='age', y = 'fare', data= data)

In [None]:
sns.barplot(x ='sex', y = 'fare', data= data, hue='class')

In [None]:
sns.barplot(x ='sex', y = 'fare', data= data, hue='class', ci= None)

In [None]:
sns.barplot(x ='sex', y = 'fare', data= data, hue='class', errorbar=None)

In [None]:
sns.barplot(x ='sex', y = 'fare', data= data, hue='class', errorbar=None, estimator= np.median)

In [None]:
sns.barplot(x ='sex', y = 'fare', data= data, hue='class', errorbar=None, estimator= np.median, saturation=1)

In [None]:
sns.barplot(x ='sex', y = 'fare', data= data, hue='class', errorbar=None, estimator= np.median, saturation=1, linewidth = 2.5, errcolor=0.2)  # pass err_kws={'color': 0.2} instead of errcolor

In [None]:
sns.set(style='white') # style must be one of white, dark, whitegrid, darkgrid, ticks
sns.barplot(x ='sex', y = 'fare', data= data, hue='class', errorbar=None, estimator= np.median, saturation=1, linewidth = 2.5, err_kws={'color': 0.2})

In [None]:
sns.boxplot(x ='sex', y = 'fare', data= data, hue='class')

In [None]:
sns.boxplot(x ='sex', y = 'fare', data= data, hue='class', color= 'blue', order= ['female', 'male'])

In [None]:
sns.boxplot(x ='sex', y = 'fare', data= data, hue='class', color= 'blue', order= ['female', 'male'])

In [None]:
sns.boxplot(x ='sex', y = 'fare', data= data, hue='class', showmeans = True)

In [None]:
sns.boxplot(x ='survived', y = 'age', data= data, hue='class', showmeans = True)

In [None]:
sns.boxplot(x ='survived', y = 'age', data= data, hue='class', showmeans = True, meanprops = {"marker": "*",
                                                                                              "markersize": 12,
                                                                                              "markeredgecolor": "red"})

plt.xlabel("Survived", size= 10)
plt.ylabel("Age (Years)", size = 10)
plt.title('How many survived and drwned', size = 10, weight = "bold")
plt.show()

In [None]:
# Distribution Plot – Age & Fare (to check skewness/outliers)
plt.figure(figsize=(12, 5))

# Age Distribution
plt.subplot(1, 2, 1)
sns.histplot(data['age'], kde=True, bins=30, color='skyblue')
plt.title('Age Distribution')

# Fare Distribution
plt.subplot(1, 2, 2)
sns.histplot(data['fare'], kde=True, bins=30, color='salmon')
plt.title('Fare Distribution')

plt.tight_layout()
plt.show()


In [None]:
# Box Plots – Spot Outliers
plt.figure(figsize=(10, 5))

# Age by Survival (fixed)
plt.subplot(1, 2, 1)
sns.boxplot(x='survived', y='age', data=data, hue='survived', palette='Set2', legend=False)
plt.title('Age vs Survived')

# Fare by Class (fixed)
plt.subplot(1, 2, 2)
sns.boxplot(x='pclass', y='fare', data=data, hue='pclass', palette='Set3', legend=False)
plt.title('Fare vs Pclass')

plt.tight_layout()
plt.show()

In [None]:
# Count Plot – Categorical Feature Distribution
plt.figure(figsize=(12, 6))

# Gender distribution
plt.subplot(1, 2, 1)
sns.countplot(x='sex', data=data, hue='sex', palette='Set2', legend=False)
plt.title('Count of Passengers by Sex')

# Class distribution
plt.subplot(1, 2, 2)
sns.countplot(x='pclass', data=data, hue='sex', palette='Set2', legend=False)
plt.title('Passenger Count by Pclass')

plt.tight_layout()
plt.show()


In [None]:
# Survival Rate by Group – Bar Chart
plt.figure(figsize=(12, 4))

# Survival rate by sex
sns.barplot(x='sex', y='survived', data=data, palette='coolwarm', hue= 'sex', legend = False)
plt.title('Survival Rate by Sex')
plt.ylabel('Survival Rate')
plt.show()


In [None]:
# Heatmap of missing values
sns.heatmap(data.isnull(), cbar=False, cmap='viridis')
plt.title('Missing Data Heatmap')
plt.show()

In [None]:
# Convert categorical columns to numeric for correlation analysis
data_numeric = data.copy()
data_numeric['sex'] = data_numeric['sex'].map({'male': 0, 'female': 1})
data_numeric['embarked'] = data_numeric['embarked'].map({'S': 0, 'C': 1, 'Q': 2})
data_numeric['alone'] = data_numeric['alone'].astype(int)
data_numeric['adult_male'] = data_numeric['adult_male'].astype(int)
data_numeric['who'] = data_numeric['who'].map({'man': 0, 'woman': 1, 'child': 2})
data_numeric['class'] = data_numeric['class'].map({'First': 1, 'Second': 2, 'Third': 3})
data_numeric['embark_town'] = data_numeric['embark_town'].map({'Southampton': 0, 'Cherbourg': 1, 'Queenstown': 2})
data_numeric['alive'] = data_numeric['alive'].map({'no': 0, 'yes': 1})

# Drop columns that are not numeric or have too many missing values like 'deck'
data_numeric = data_numeric.drop(columns=['deck'])

# Drop rows with any missing values for correlation heatmap
corr_data = data_numeric.dropna()

# Plot heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(corr_data.corr(), annot=True, fmt=".2f", cmap='coolwarm')
plt.title('Correlation Heatmap - Titanic Dataset')
plt.show()
