# data_visualization_project.py

This notebook was automatically converted from a Python script.

In [None]:
"""
Seaborn 101: Data Visualization Project
=======================================

This script demonstrates a complete data visualization project using Seaborn.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap
import os



Create output directory


In [None]:
output_dir = "project_outputs"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)



Set the aesthetic style and context


In [None]:
sns.set_theme(style="whitegrid", context="talk")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['savefig.dpi'] = 100
plt.rcParams['savefig.bbox'] = 'tight'



Load and prepare data


In [None]:
titanic = sns.load_dataset("titanic")
diamonds = sns.load_dataset("diamonds")

print("Project: Analyzing Titanic and Diamond Datasets")
print("-" * 50)
print("\nTitanic Dataset Overview:")
print(titanic.head())
print(f"\nShape: {titanic.shape}")
print("\nDiamonds Dataset Overview:")
print(diamonds.head())
print(f"\nShape: {diamonds.shape}")



Data Preparation


Handle missing values in Titanic dataset


In [None]:
titanic['age'] = titanic['age'].fillna(titanic['age'].median())
titanic['embarked'] = titanic['embarked'].fillna(titanic['embarked'].mode()[0])
titanic['deck'] = titanic['deck'].fillna('Unknown')



Create age categories


In [None]:
titanic['age_group'] = pd.cut(
    titanic['age'],
    bins=[0, 12, 18, 35, 60, 100],
    labels=['Child', 'Teenager', 'Young Adult', 'Adult', 'Senior']
)



Create fare categories


In [None]:
titanic['fare_category'] = pd.qcut(
    titanic['fare'],
    q=4,
    labels=['Budget', 'Economy', 'Standard', 'Premium']
)



Part 1: Titanic Survival Analysis


---------------------------------


1. Survival by passenger class and gender


In [None]:
plt.figure(figsize=(14, 10))

plt.subplot(2, 2, 1)
sns.barplot(x='class', y='survived', data=titanic, hue='sex', palette='viridis')
plt.title('Survival Rate by Class and Gender')
plt.ylabel('Survival Rate')
plt.xlabel('Passenger Class')



2. Age distribution of passengers


In [None]:
plt.subplot(2, 2, 2)
sns.histplot(
    data=titanic, 
    x='age', 
    hue='survived', 
    multiple='stack',
    bins=20, 
    palette='coolwarm'
)
plt.axvline(x=titanic['age'].median(), color='black', linestyle='--', label='Median Age')
plt.title('Age Distribution by Survival')
plt.legend(['Median Age', 'Died', 'Survived'])



3. Survival by age group and class


In [None]:
plt.subplot(2, 2, 3)
sns.heatmap(
    titanic.pivot_table(
        index='age_group', 
        columns='class', 
        values='survived', 
        aggfunc='mean'
    ),
    annot=True, 
    cmap='YlGnBu', 
    fmt='.2f'
)
plt.title('Survival Rate by Age Group and Class')



4. Fare vs. Survival


In [None]:
plt.subplot(2, 2, 4)
sns.boxplot(x='survived', y='fare', data=titanic, palette='Set2')
plt.title('Fare Distribution by Survival')
plt.yscale('log')
plt.ylabel('Fare (log scale)')
plt.xlabel('Survived')

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'titanic_survival_analysis.png'))
plt.close()



5. Complex visualization: Survival patterns by multiple factors


In [None]:
g = sns.catplot(
    data=titanic,
    kind='bar',
    x='class',
    y='survived',
    hue='sex',
    col='embarked',
    height=6,
    aspect=0.7,
    palette='dark',
    alpha=0.8,
    ci=None
)
g.fig.suptitle('Survival Rate by Class, Gender, and Embarkation Port', y=1.05)
g.set_axis_labels('Passenger Class', 'Survival Rate')
g.savefig(os.path.join(output_dir, 'titanic_multivariate.png'))
plt.close()



Part 2: Diamond Price Analysis


-----------------------------


1. Price distribution by cut, color, and clarity


In [None]:
plt.figure(figsize=(16, 12))

plt.subplot(3, 1, 1)
sns.boxplot(x='cut', y='price', data=diamonds, palette='Spectral')
plt.title('Diamond Price by Cut Quality')
plt.ylabel('Price (USD)')

plt.subplot(3, 1, 2)
sns.boxplot(x='color', y='price', data=diamonds, palette='RdYlBu')
plt.title('Diamond Price by Color Grade')
plt.ylabel('Price (USD)')

plt.subplot(3, 1, 3)
sns.boxplot(x='clarity', y='price', data=diamonds, palette='viridis')
plt.title('Diamond Price by Clarity Grade')
plt.ylabel('Price (USD)')

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'diamond_price_factors.png'))
plt.close()



2. Relationship between carat and price with facets


In [None]:
sns.lmplot(
    data=diamonds.sample(1000),  # Sample for better performance
    x='carat',
    y='price',
    hue='cut',
    col='color',
    col_wrap=4,
    height=3,
    aspect=1.2,
    palette='coolwarm',
    scatter_kws={'alpha': 0.5, 's': 30},
    line_kws={'linewidth': 2}
)
plt.suptitle('Carat vs. Price by Cut and Color', y=1.02)
plt.savefig(os.path.join(output_dir, 'diamond_carat_price_relationship.png'))
plt.close()



3. Create a custom paired plot


In [None]:
custom_params = {
    'scatter_kws': {'alpha': 0.3, 's': 10},
    'line_kws': {'linewidth': 1},
}

g = sns.pairplot(
    diamonds.sample(1000),
    vars=['carat', 'depth', 'table', 'price'],
    hue='cut',
    palette='Set1',
    diag_kind='kde',
    height=2.5,
    plot_kws=custom_params
)
g.fig.suptitle('Relationships Between Diamond Attributes', y=1.02)
g.savefig(os.path.join(output_dir, 'diamond_attributes_pairplot.png'))
plt.close()



4. Custom heatmap of correlations


In [None]:
corr = diamonds.select_dtypes(include=[np.number]).corr()
mask = np.triu(np.ones_like(corr, dtype=bool))



Custom colormap


In [None]:
cmap = LinearSegmentedColormap.from_list('custom_diverging', 
                                         ['#3498db', '#f1f1f1', '#e74c3c'], 
                                         N=256)

plt.figure(figsize=(10, 8))
sns.heatmap(
    corr,
    mask=mask,
    cmap=cmap,
    vmax=1.0,
    vmin=-1.0,
    center=0,
    square=True,
    linewidths=.5,
    cbar_kws={'shrink': .8},
    annot=True,
    fmt='.2f'
)
plt.title('Correlation Matrix of Diamond Attributes')
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'diamond_correlation_matrix.png'))
plt.close()

print(f"\nProject completed successfully! All visualizations saved to '{output_dir}' directory.")
print("This project demonstrates a comprehensive data visualization workflow using Seaborn.") 
