In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12
plt.rcParams['axes.titlesize'] = 16
plt.rcParams['axes.labelsize'] = 14

import os
os.makedirs('visuals', exist_ok=True)
os.makedirs('data', exist_ok=True)

print("TITANIC VISUALIZATION")
print("=" * 60)


In [None]:
df = sns.load_dataset('titanic')
print(f" Dataset Loaded: {df.shape[0]} passengers, {df.shape[1]} features")
print("\n Initial Dataset Info:")
print(df.info())

print("\n Missing Data Analysis:")
missing_data = df.isnull().sum()
missing_percent = (missing_data / len(df)) * 100
missing_df = pd.DataFrame({
    'Missing Count': missing_data,
    'Missing Percentage': missing_percent
}).sort_values('Missing Percentage', ascending=False)
print(missing_df[missing_df['Missing Count'] > 0])

print("\n Executing Elite Data Cleaning...")

columns_to_drop = ['deck', 'embark_town', 'alive', 'who', 'adult_male', 'alone']
df_clean = df.drop(columns=columns_to_drop, errors='ignore')

age_median_by_survival_sex = df_clean.groupby(['survived', 'sex'])['age'].median()
print(" Age Median by Survival & Gender:")
print(age_median_by_survival_sex)

def fill_age(row):
    if pd.isna(row['age']):
        return age_median_by_survival_sex[row['survived'], row['sex']]
    return row['age']

df_clean['age'] = df_clean.apply(fill_age, axis=1)

df_clean['embarked'].fillna(df_clean['embarked'].mode()[0], inplace=True)

df_clean['age_group'] = pd.cut(df_clean['age'], 
                               bins=[0, 12, 18, 35, 60, 100], 
                               labels=['Child', 'Teen', 'Young Adult', 'Adult', 'Senior'])

df_clean['fare_category'] = pd.cut(df_clean['fare'], 
                                   bins=[0, 10, 30, 100, 1000], 
                                   labels=['Low', 'Medium', 'High', 'Luxury'])

print(f" Cleaning Complete! Final dataset: {df_clean.shape}")
print(f" Missing values remaining: {df_clean.isnull().sum().sum()}")


In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))

sns.histplot(data=df_clean, x='age', kde=True, alpha=0.7, 
             color='steelblue', ax=ax1, bins=30)
ax1.set_title(' Age Distribution of Titanic Passengers', 
              fontsize=18, fontweight='bold', pad=20)
ax1.set_xlabel('Age (Years)', fontsize=14)
ax1.set_ylabel('Frequency', fontsize=14)
ax1.grid(True, alpha=0.3)

sns.histplot(data=df_clean, x='age', hue='survived', kde=True, 
             alpha=0.7, ax=ax2, bins=30)
ax2.set_title(' Age Distribution by Survival Status', 
              fontsize=18, fontweight='bold', pad=20)
ax2.set_xlabel('Age (Years)', fontsize=14)
ax2.set_ylabel('Frequency', fontsize=14)
ax2.legend(['Died', 'Survived'], title='Survival Status')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('visuals/age_distribution_kde.png', dpi=300, bbox_inches='tight')
plt.show()

print(" Age Distribution Visualization Saved!")


In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))

sns.countplot(data=df_clean, x='sex', hue='survived', 
              palette=['#FF6B6B', '#4ECDC4'], ax=ax1)
ax1.set_title(' Survival Rate by Gender\n"Ladies First Policy"', 
              fontsize=16, fontweight='bold', pad=20)
ax1.set_xlabel('Gender', fontsize=14)
ax1.set_ylabel('Number of Passengers', fontsize=14)
ax1.legend(['Died', 'Survived'], title='Survival Status')

for container in ax1.containers:
    ax1.bar_label(container, fmt='%d')

sns.countplot(data=df_clean, x='pclass', hue='survived', 
              palette=['#FF6B6B', '#4ECDC4'], ax=ax2)
ax2.set_title(' Survival Rate by Passenger Class\n"Money Talks"', 
              fontsize=16, fontweight='bold', pad=20)
ax2.set_xlabel('Passenger Class', fontsize=14)
ax2.set_ylabel('Number of Passengers', fontsize=14)
ax2.legend(['Died', 'Survived'], title='Survival Status')

for container in ax2.containers:
    ax2.bar_label(container, fmt='%d')

plt.tight_layout()
plt.savefig('visuals/survival_by_gender_class.png', dpi=300, bbox_inches='tight')
plt.show()

print(" Gender & Class Survival Analysis Saved!")


In [None]:
fig, ax = plt.subplots(figsize=(16, 10))

# Create the scatter plot with survival color coding
scatter = sns.scatterplot(data=df_clean, x='age', y='fare', 
                         hue='survived', style='sex', s=100, 
                         alpha=0.8, ax=ax)

ax.set_title(' Fare vs Age: The Economics of Survival\n"Age, Wealth, and Destiny"', 
             fontsize=18, fontweight='bold', pad=20)
ax.set_xlabel('Age (Years)', fontsize=14)
ax.set_ylabel('Fare Paid (£)', fontsize=14)

handles, labels = ax.get_legend_handles_labels()
survival_legend = plt.legend(handles[:2], ['Died', 'Survived'], 
                           title='Survival Status', loc='upper left')
plt.gca().add_artist(survival_legend)
plt.legend(handles[3:], ['Female', 'Male'], title='Gender', loc='upper right')

sns.regplot(data=df_clean[df_clean['survived']==0], x='age', y='fare', 
           scatter=False, color='red', alpha=0.5, ax=ax)
sns.regplot(data=df_clean[df_clean['survived']==1], x='age', y='fare', 
           scatter=False, color='green', alpha=0.5, ax=ax)

plt.grid(True, alpha=0.3)
plt.savefig('visuals/fare_age_scatter.png', dpi=300, bbox_inches='tight')
plt.show()

print(" Fare vs Age Scatter Plot Saved!")


In [None]:
numeric_cols = ['survived', 'pclass', 'age', 'sibsp', 'parch', 'fare']
df_numeric = df_clean[numeric_cols].copy()

df_numeric['sex_encoded'] = df_clean['sex'].map({'male': 0, 'female': 1})
df_numeric['embarked_encoded'] = df_clean['embarked'].map({'S': 0, 'C': 1, 'Q': 2})

correlation_matrix = df_numeric.corr()

fig, ax = plt.subplots(figsize=(14, 12))

mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
sns.heatmap(correlation_matrix, 
            mask=mask,
            annot=True, 
            cmap='RdYlBu_r', 
            center=0,
            square=True, 
            linewidths=0.5, 
            cbar_kws={"shrink": .8},
            fmt='.2f',
            annot_kws={'size': 12, 'weight': 'bold'})

ax.set_title(' Titanic Features Correlation Matrix\n"Uncovering Hidden Relationships"', 
             fontsize=18, fontweight='bold', pad=30)

# Rotate labels for better readability
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)

plt.tight_layout()
plt.savefig('visuals/correlation_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()

print(" Correlation Heatmap Saved!")


In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))

sns.violinplot(data=df_clean, x='survived', y='age', 
               palette=['#FF6B6B', '#4ECDC4'], ax=ax1)
ax1.set_title(' Age Distribution by Survival Status\n"The Shape of Life and Death"', 
              fontsize=16, fontweight='bold', pad=20)
ax1.set_xlabel('Survival Status', fontsize=14)
ax1.set_ylabel('Age (Years)', fontsize=14)
ax1.set_xticklabels(['Died', 'Survived'])

sns.boxplot(data=df_clean, x='pclass', y='age', hue='survived',
            palette=['#FF6B6B', '#4ECDC4'], ax=ax2)
sns.swarmplot(data=df_clean, x='pclass', y='age', hue='survived',
              palette=['darkred', 'darkgreen'], size=3, alpha=0.6, ax=ax2)

ax2.set_title(' Age Distribution by Class & Survival\n"Class Warfare Visualization"', 
              fontsize=16, fontweight='bold', pad=20)
ax2.set_xlabel('Passenger Class', fontsize=14)
ax2.set_ylabel('Age (Years)', fontsize=14)
ax2.legend(['Died', 'Survived'], title='Survival Status')

plt.tight_layout()
plt.savefig('visuals/age_violin_plot.png', dpi=300, bbox_inches='tight')
plt.show()

print(" Age Violin Plot Saved!")


In [None]:
df_sunburst = df_clean.copy()
df_sunburst['survived_label'] = df_sunburst['survived'].map({0: 'Died', 1: 'Survived'})
df_sunburst['class_label'] = 'Class ' + df_sunburst['pclass'].astype(str)

sunburst_data = df_sunburst.groupby(['class_label', 'sex', 'survived_label']).size().reset_index(name='count')

fig = px.sunburst(sunburst_data, 
                  path=['class_label', 'sex', 'survived_label'], 
                  values='count',
                  color='count',
                  color_continuous_scale='RdYlBu',
                  title=' Interactive Titanic Survival Sunburst<br><sub>Hover and Click to Explore!</sub>')

fig.update_layout(
    title_font_size=24,
    title_x=0.5,
    width=800,
    height=800,
    font=dict(size=14)
)

fig.write_html('visuals/survival_sunburst.html')
fig.show()

print(" Interactive Sunburst Chart Saved as HTML!")


In [None]:
!pip install ydata-profiling

try:
    from ydata_profiling import ProfileReport
    
    print(" Generating Comprehensive EDA Report...")
    
    profile = ProfileReport(df_clean, 
                          title="Titanic Dataset - Elite EDA Report",
                          explorative=True,
                          dark_mode=True)
    
    profile.to_file("visuals/titanic_eda_report.html")
    
    print(" Comprehensive EDA Report Generated!")
    print(" Saved as: visuals/titanic_eda_report.html")
    
except ImportError:
    print(" ydata-profiling not available. Install with: !pip install ydata-profiling")


In [None]:
print(" TITANIC SURVIVAL ANALYSIS - KEY INSIGHTS")
print("=" * 60)

total_passengers = len(df_clean)
survival_rate = df_clean['survived'].mean() * 100
female_survival = df_clean[df_clean['sex'] == 'female']['survived'].mean() * 100
male_survival = df_clean[df_clean['sex'] == 'male']['survived'].mean() * 100

class_survival = df_clean.groupby('pclass')['survived'].mean() * 100

print(f" OVERALL STATISTICS:")
print(f"   • Total Passengers: {total_passengers:,}")
print(f"   • Overall Survival Rate: {survival_rate:.1f}%")
print(f"   • Female Survival Rate: {female_survival:.1f}%")
print(f"   • Male Survival Rate: {male_survival:.1f}%")
print()

print(f" CLASS-BASED SURVIVAL:")
for class_num, rate in class_survival.items():
    print(f"   • Class {class_num}: {rate:.1f}%")
print()

print(" KEY BUSINESS INSIGHTS:")
print("   •  Gender was the strongest predictor of survival")
print("   •  Higher class passengers had significantly better survival rates")
print("   •  Children had higher survival rates than adults")
print("   •  Port of embarkation showed correlation with survival")
print("   •  Fare paid was strongly correlated with passenger class")
print()

print(" ACTIONABLE RECOMMENDATIONS:")
print("   • Emergency protocols should prioritize women and children")
print("   • Higher-paying passengers require premium safety measures")
print("   • Age-based evacuation strategies could improve outcomes")
print("   • Port-specific safety briefings may be beneficial")

print("\n PORTFOLIO IMPACT:")
print("   • Demonstrates advanced data visualization skills")
print("   • Shows business acumen through actionable insights")
print("   • Exhibits technical proficiency across multiple libraries")
print("   • Proves ability to tell compelling data stories")
