# Area Chart - Stacked Proportion Visualization

**Use Case**: Shows how quantities change over time or across categories (composition over time, cumulative values)

This notebook demonstrates how to create effective area charts for visualizing stacked proportions and changes over time.


In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set style for better-looking plots
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("Set2")

# Set random seed for reproducibility
np.random.seed(42)

print("Area chart visualization libraries loaded!")


In [None]:
# Create sample datasets for area charts
# 1. Revenue composition over time
dates = pd.date_range('2020-01-01', '2023-12-01', freq='M')
n_months = len(dates)

# Generate realistic revenue data with trends
base_growth = np.linspace(1, 1.5, n_months)  # Overall growth trend
seasonal = 1 + 0.2 * np.sin(2 * np.pi * np.arange(n_months) / 12)  # Seasonal variation

# Different product lines with different growth patterns
product_a = (50000 + np.random.normal(0, 5000, n_months)) * base_growth * seasonal
product_b = (30000 + np.random.normal(0, 3000, n_months)) * (base_growth ** 1.2) * seasonal
product_c = (20000 + np.random.normal(0, 2000, n_months)) * (base_growth ** 0.8) * seasonal
services = (15000 + np.random.normal(0, 1500, n_months)) * (base_growth ** 1.5) * seasonal

# Ensure positive values
product_a = np.maximum(product_a, 10000)
product_b = np.maximum(product_b, 5000)
product_c = np.maximum(product_c, 5000)
services = np.maximum(services, 2000)

revenue_data = pd.DataFrame({
    'date': dates,
    'Product A': product_a,
    'Product B': product_b,
    'Product C': product_c,
    'Services': services
})

# 2. Website traffic sources over time
traffic_dates = pd.date_range('2023-01-01', '2023-12-31', freq='D')
n_days = len(traffic_dates)

# Generate traffic data with weekly patterns
weekly_pattern = 1 + 0.3 * np.sin(2 * np.pi * np.arange(n_days) / 7)
growth_trend = np.linspace(1, 1.8, n_days)

organic = (1000 + np.random.normal(0, 100, n_days)) * weekly_pattern * growth_trend
social = (800 + np.random.normal(0, 150, n_days)) * weekly_pattern * (growth_trend ** 1.3)
paid = (600 + np.random.normal(0, 80, n_days)) * weekly_pattern * (growth_trend ** 0.9)
direct = (400 + np.random.normal(0, 50, n_days)) * weekly_pattern * (growth_trend ** 0.7)
referral = (200 + np.random.normal(0, 30, n_days)) * weekly_pattern * growth_trend

# Ensure positive values
organic = np.maximum(organic, 100)
social = np.maximum(social, 50)
paid = np.maximum(paid, 50)
direct = np.maximum(direct, 50)
referral = np.maximum(referral, 10)

traffic_data = pd.DataFrame({
    'date': traffic_dates,
    'Organic Search': organic,
    'Social Media': social,
    'Paid Ads': paid,
    'Direct': direct,
    'Referral': referral
})

# 3. Energy consumption by source
energy_dates = pd.date_range('2022-01-01', '2023-12-31', freq='M')
n_energy_months = len(energy_dates)

# Seasonal energy patterns (higher consumption in summer/winter)
seasonal_factor = 1 + 0.4 * np.cos(2 * np.pi * np.arange(n_energy_months) / 12)
efficiency_trend = np.linspace(1, 0.9, n_energy_months)  # Improving efficiency

coal = (40 + np.random.normal(0, 5, n_energy_months)) * seasonal_factor * efficiency_trend * 0.8  # Decreasing
natural_gas = (35 + np.random.normal(0, 4, n_energy_months)) * seasonal_factor * efficiency_trend
solar = (10 + np.random.normal(0, 2, n_energy_months)) * (1/efficiency_trend) * 1.5  # Increasing
wind = (8 + np.random.normal(0, 1.5, n_energy_months)) * (1/efficiency_trend) * 1.3  # Increasing
hydro = (5 + np.random.normal(0, 1, n_energy_months)) * seasonal_factor * 0.9
nuclear = (2 + np.random.normal(0, 0.5, n_energy_months)) * 0.95  # Slightly decreasing

# Ensure positive values
coal = np.maximum(coal, 5)
natural_gas = np.maximum(natural_gas, 5)
solar = np.maximum(solar, 1)
wind = np.maximum(wind, 1)
hydro = np.maximum(hydro, 1)
nuclear = np.maximum(nuclear, 0.5)

energy_data = pd.DataFrame({
    'date': energy_dates,
    'Coal': coal,
    'Natural Gas': natural_gas,
    'Solar': solar,
    'Wind': wind,
    'Hydro': hydro,
    'Nuclear': nuclear
})

print("Sample datasets created:")
print(f"Revenue data: {len(revenue_data)} months, 4 product lines")
print(f"Traffic data: {len(traffic_data)} days, 5 traffic sources")
print(f"Energy data: {len(energy_data)} months, 6 energy sources")


In [None]:
# Basic area charts
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle('Area Chart Visualizations', fontsize=16, fontweight='bold')

# 1. Stacked area chart - Revenue composition
ax1 = axes[0, 0]
revenue_columns = ['Product A', 'Product B', 'Product C', 'Services']
ax1.stackplot(revenue_data['date'], 
              revenue_data['Product A'], 
              revenue_data['Product B'], 
              revenue_data['Product C'], 
              revenue_data['Services'],
              labels=revenue_columns,
              alpha=0.8)

ax1.set_title('Revenue Composition Over Time\n(Stacked Area Chart)')
ax1.set_xlabel('Date')
ax1.set_ylabel('Revenue ($)')
ax1.legend(loc='upper left')
ax1.tick_params(axis='x', rotation=45)

# Format y-axis to show values in thousands
ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x/1000:.0f}K'))

# 2. Percentage area chart - Traffic sources
ax2 = axes[0, 1]
traffic_columns = ['Organic Search', 'Social Media', 'Paid Ads', 'Direct', 'Referral']

# Calculate percentages
traffic_totals = traffic_data[traffic_columns].sum(axis=1)
traffic_percentages = traffic_data[traffic_columns].div(traffic_totals, axis=0) * 100

ax2.stackplot(traffic_data['date'],
              traffic_percentages['Organic Search'],
              traffic_percentages['Social Media'],
              traffic_percentages['Paid Ads'],
              traffic_percentages['Direct'],
              traffic_percentages['Referral'],
              labels=traffic_columns,
              alpha=0.8)

ax2.set_title('Traffic Source Composition\n(100% Stacked Area Chart)')
ax2.set_xlabel('Date')
ax2.set_ylabel('Percentage (%)')
ax2.set_ylim(0, 100)
ax2.legend(loc='upper left', bbox_to_anchor=(1.05, 1))
ax2.tick_params(axis='x', rotation=45)

# 3. Individual area charts (unstacked)
ax3 = axes[1, 0]
for i, column in enumerate(revenue_columns):
    ax3.fill_between(revenue_data['date'], 0, revenue_data[column], 
                     alpha=0.3, label=column)
    ax3.plot(revenue_data['date'], revenue_data[column], linewidth=2)

ax3.set_title('Individual Product Revenue Trends\n(Overlapping Area Charts)')
ax3.set_xlabel('Date')
ax3.set_ylabel('Revenue ($)')
ax3.legend()
ax3.tick_params(axis='x', rotation=45)
ax3.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x/1000:.0f}K'))

# 4. Energy mix with trend annotations
ax4 = axes[1, 1]
energy_columns = ['Coal', 'Natural Gas', 'Solar', 'Wind', 'Hydro', 'Nuclear']
colors = ['#8B4513', '#FF4500', '#FFD700', '#87CEEB', '#4169E1', '#800080']

ax4.stackplot(energy_data['date'],
              energy_data['Coal'],
              energy_data['Natural Gas'],
              energy_data['Solar'],
              energy_data['Wind'],
              energy_data['Hydro'],
              energy_data['Nuclear'],
              labels=energy_columns,
              colors=colors,
              alpha=0.8)

ax4.set_title('Energy Mix Evolution\n(Renewable vs Non-Renewable)')
ax4.set_xlabel('Date')
ax4.set_ylabel('Energy Units')
ax4.legend(loc='upper left', bbox_to_anchor=(1.05, 1))
ax4.tick_params(axis='x', rotation=45)

# Add trend annotations
ax4.annotate('Renewables\nIncreasing', 
            xy=(energy_data['date'].iloc[-1], 30), 
            xytext=(energy_data['date'].iloc[-10], 50),
            arrowprops=dict(arrowstyle='->', color='green', lw=2),
            fontsize=10, color='green', fontweight='bold')

ax4.annotate('Fossil Fuels\nDecreasing', 
            xy=(energy_data['date'].iloc[-1], 70), 
            xytext=(energy_data['date'].iloc[-10], 85),
            arrowprops=dict(arrowstyle='->', color='red', lw=2),
            fontsize=10, color='red', fontweight='bold')

plt.tight_layout()
plt.show()


In [None]:
# Advanced area chart techniques
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle('Advanced Area Chart Techniques', fontsize=16, fontweight='bold')

# 1. Area chart with confidence bands
ax1 = axes[0, 0]

# Calculate rolling means and confidence intervals for revenue
window = 6  # 6-month rolling window
revenue_total = revenue_data[revenue_columns].sum(axis=1)
rolling_mean = revenue_total.rolling(window=window, center=True).mean()
rolling_std = revenue_total.rolling(window=window, center=True).std()

# Create confidence bands
upper_band = rolling_mean + 1.96 * rolling_std
lower_band = rolling_mean - 1.96 * rolling_std

# Plot actual data
ax1.plot(revenue_data['date'], revenue_total, 'b-', linewidth=2, label='Actual Revenue', alpha=0.8)

# Plot rolling mean
ax1.plot(revenue_data['date'], rolling_mean, 'r-', linewidth=2, label='6-Month Average')

# Fill confidence band
ax1.fill_between(revenue_data['date'], lower_band, upper_band, 
                alpha=0.3, color='red', label='95% Confidence Band')

ax1.set_title('Total Revenue with Confidence Bands')
ax1.set_xlabel('Date')
ax1.set_ylabel('Total Revenue ($)')
ax1.legend()
ax1.tick_params(axis='x', rotation=45)
ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x/1000:.0f}K'))

# 2. Streamgraph (symmetric area chart)
ax2 = axes[0, 1]

# Create symmetric areas around center line
traffic_columns_subset = ['Organic Search', 'Social Media', 'Paid Ads']
traffic_subset = traffic_data[traffic_columns_subset]

# Normalize and create symmetric layout
traffic_normalized = traffic_subset.div(traffic_subset.sum(axis=1), axis=0)
cumulative = traffic_normalized.cumsum(axis=1)

# Center the streamgraph
for i, col in enumerate(traffic_columns_subset):
    if i == 0:
        bottom = -traffic_normalized[col] / 2
        top = traffic_normalized[col] / 2
    else:
        prev_height = cumulative.iloc[:, i-1] - cumulative.iloc[:, 0] / 2
        bottom = prev_height
        top = bottom + traffic_normalized[col]
    
    ax2.fill_between(traffic_data['date'], bottom, top, 
                    alpha=0.7, label=col)

ax2.set_title('Traffic Sources Streamgraph\n(Symmetric Area Chart)')
ax2.set_xlabel('Date')
ax2.set_ylabel('Relative Proportion')
ax2.legend()
ax2.tick_params(axis='x', rotation=45)
ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)

# 3. Ridgeline plot (stacked areas with offset)
ax3 = axes[1, 0]

# Create ridgeline effect with energy data
energy_monthly_avg = energy_data.groupby(energy_data['date'].dt.month)[energy_columns].mean()
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

offset = 0
colors = plt.cm.viridis(np.linspace(0, 1, len(months)))

for i, month in enumerate(range(1, 13)):
    if month in energy_monthly_avg.index:
        month_data = energy_monthly_avg.loc[month]
        x_positions = range(len(energy_columns))
        
        # Create area for this month
        ax3.fill_between(x_positions, offset, offset + month_data.values, 
                        alpha=0.7, color=colors[i], label=months[i])
        ax3.plot(x_positions, offset + month_data.values, color='black', alpha=0.5)
        
        offset += 20  # Offset for next ridge

ax3.set_title('Energy Consumption by Month\n(Ridgeline Plot)')
ax3.set_xlabel('Energy Source')
ax3.set_ylabel('Consumption (stacked by month)')
ax3.set_xticks(range(len(energy_columns)))
ax3.set_xticklabels(energy_columns, rotation=45)
ax3.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

# 4. Area chart with annotations and milestones
ax4 = axes[1, 1]

# Plot revenue composition with milestones
ax4.stackplot(revenue_data['date'], 
              revenue_data['Product A'], 
              revenue_data['Product B'], 
              revenue_data['Product C'], 
              revenue_data['Services'],
              labels=revenue_columns,
              alpha=0.7)

# Add milestone annotations
milestones = [
    (datetime(2020, 6, 1), 'Product B Launch', 'Product B launch drives growth'),
    (datetime(2021, 3, 1), 'Expansion', 'International expansion begins'),
    (datetime(2022, 9, 1), 'New Services', 'Services division launched'),
    (datetime(2023, 6, 1), 'Peak Season', 'Record quarterly performance')
]

total_revenue = revenue_data[revenue_columns].sum(axis=1)
for date, title, description in milestones:
    # Find closest date in data
    closest_idx = (revenue_data['date'] - date).abs().idxmin()
    revenue_at_date = total_revenue.iloc[closest_idx]
    
    ax4.annotate(title, 
                xy=(date, revenue_at_date), 
                xytext=(date, revenue_at_date + 50000),
                arrowprops=dict(arrowstyle='->', color='red', lw=1.5),
                fontsize=9, fontweight='bold', color='red',
                ha='center')
    
    # Add vertical line
    ax4.axvline(x=date, color='red', linestyle='--', alpha=0.5)

ax4.set_title('Revenue Growth with Key Milestones')
ax4.set_xlabel('Date')
ax4.set_ylabel('Revenue ($)')
ax4.legend(loc='upper left')
ax4.tick_params(axis='x', rotation=45)
ax4.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x/1000:.0f}K'))

plt.tight_layout()
plt.show()


In [None]:
# Statistical analysis of area chart data
print("Area Chart Data Statistical Analysis:")
print("=" * 50)

# 1. Revenue Analysis
print("1. REVENUE COMPOSITION ANALYSIS:")
revenue_totals = revenue_data[revenue_columns].sum()
total_revenue_all = revenue_totals.sum()

print(f"   Total Revenue (All Time): ${total_revenue_all/1000000:.2f}M")
print(f"   Product Line Contribution:")
for product, total in revenue_totals.sort_values(ascending=False).items():
    percentage = (total / total_revenue_all) * 100
    print(f"     {product}: ${total/1000000:.2f}M ({percentage:.1f}%)")

# Growth analysis
revenue_data['total'] = revenue_data[revenue_columns].sum(axis=1)
revenue_data['month'] = revenue_data['date'].dt.to_period('M')

# Calculate month-over-month growth
revenue_data['growth_rate'] = revenue_data['total'].pct_change() * 100
avg_growth_rate = revenue_data['growth_rate'].mean()
print(f"   Average Monthly Growth Rate: {avg_growth_rate:.2f}%")

# Seasonal analysis
revenue_by_month = revenue_data.groupby(revenue_data['date'].dt.month)['total'].mean()
peak_month = revenue_by_month.idxmax()
low_month = revenue_by_month.idxmin()
seasonal_variation = ((revenue_by_month.max() - revenue_by_month.min()) / revenue_by_month.mean()) * 100

print(f"   Seasonal Pattern:")
print(f"     Peak month: {peak_month} (${revenue_by_month[peak_month]/1000:.0f}K avg)")
print(f"     Low month: {low_month} (${revenue_by_month[low_month]/1000:.0f}K avg)")
print(f"     Seasonal variation: {seasonal_variation:.1f}%")

# 2. Traffic Source Analysis
print(f"\n2. WEBSITE TRAFFIC ANALYSIS:")
traffic_totals = traffic_data[traffic_columns].sum()
total_traffic_all = traffic_totals.sum()

print(f"   Total Traffic (All Time): {total_traffic_all:,.0f} visitors")
print(f"   Traffic Source Breakdown:")
for source, total in traffic_totals.sort_values(ascending=False).items():
    percentage = (total / total_traffic_all) * 100
    print(f"     {source}: {total:,.0f} ({percentage:.1f}%)")

# Traffic growth analysis
traffic_data['total_traffic'] = traffic_data[traffic_columns].sum(axis=1)
traffic_data['weekly_avg'] = traffic_data['total_traffic'].rolling(window=7).mean()

start_traffic = traffic_data['weekly_avg'].iloc[7]  # First valid rolling average
end_traffic = traffic_data['weekly_avg'].iloc[-1]
traffic_growth = ((end_traffic - start_traffic) / start_traffic) * 100

print(f"   Annual Traffic Growth: {traffic_growth:.1f}%")

# Day of week analysis
traffic_data['day_of_week'] = traffic_data['date'].dt.dayofweek
traffic_by_dow = traffic_data.groupby('day_of_week')['total_traffic'].mean()
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

print(f"   Traffic by Day of Week:")
for i, day in enumerate(days):
    avg_traffic = traffic_by_dow[i]
    print(f"     {day}: {avg_traffic:,.0f} avg visitors")

# 3. Energy Mix Analysis
print(f"\n3. ENERGY SOURCE ANALYSIS:")
energy_totals = energy_data[energy_columns].sum()
total_energy_all = energy_totals.sum()

# Categorize energy sources
renewable_sources = ['Solar', 'Wind', 'Hydro']
non_renewable_sources = ['Coal', 'Natural Gas', 'Nuclear']

renewable_total = energy_data[renewable_sources].sum().sum()
non_renewable_total = energy_data[non_renewable_sources].sum().sum()

print(f"   Total Energy Production: {total_energy_all:.0f} units")
print(f"   Energy Mix:")
print(f"     Renewable: {renewable_total:.0f} units ({(renewable_total/total_energy_all)*100:.1f}%)")
print(f"     Non-Renewable: {non_renewable_total:.0f} units ({(non_renewable_total/total_energy_all)*100:.1f}%)")

# Trend analysis for renewables vs non-renewables
energy_data['renewable_total'] = energy_data[renewable_sources].sum(axis=1)
energy_data['non_renewable_total'] = energy_data[non_renewable_sources].sum(axis=1)
energy_data['renewable_percentage'] = (energy_data['renewable_total'] / 
                                     (energy_data['renewable_total'] + energy_data['non_renewable_total'])) * 100

start_renewable_pct = energy_data['renewable_percentage'].iloc[0]
end_renewable_pct = energy_data['renewable_percentage'].iloc[-1]
renewable_growth = end_renewable_pct - start_renewable_pct

print(f"   Renewable Energy Transition:")
print(f"     Starting renewable %: {start_renewable_pct:.1f}%")
print(f"     Ending renewable %: {end_renewable_pct:.1f}%")
print(f"     Change: +{renewable_growth:.1f} percentage points")

# Individual source trends
print(f"   Individual Source Trends (first vs last month):")
for source in energy_columns:
    start_value = energy_data[source].iloc[0]
    end_value = energy_data[source].iloc[-1]
    change = ((end_value - start_value) / start_value) * 100
    trend = "↑" if change > 0 else "↓" if change < 0 else "→"
    print(f"     {source}: {change:+.1f}% {trend}")

# 4. Area Chart Effectiveness Analysis
print(f"\n4. AREA CHART EFFECTIVENESS METRICS:")

# Calculate data density and overlap
print(f"   Data Visualization Metrics:")

# For revenue data - check for overlapping trends
revenue_correlations = revenue_data[revenue_columns].corr()
high_correlations = []
for i in range(len(revenue_columns)):
    for j in range(i+1, len(revenue_columns)):
        corr_value = revenue_correlations.iloc[i, j]
        if abs(corr_value) > 0.7:
            high_correlations.append((revenue_columns[i], revenue_columns[j], corr_value))

if high_correlations:
    print(f"   High correlations in revenue data (may cause visual overlap):")
    for col1, col2, corr in high_correlations:
        print(f"     {col1} ↔ {col2}: {corr:.3f}")
else:
    print(f"   Revenue streams show good visual separation (low correlation)")

# Volatility analysis
print(f"\n   Data Volatility (affects area chart readability):")
for dataset_name, data, columns in [('Revenue', revenue_data, revenue_columns),
                                   ('Traffic', traffic_data, traffic_columns),
                                   ('Energy', energy_data, energy_columns)]:
    volatilities = []
    for col in columns:
        volatility = data[col].std() / data[col].mean()  # Coefficient of variation
        volatilities.append(volatility)
    
    avg_volatility = np.mean(volatilities)
    print(f"     {dataset_name}: {avg_volatility:.3f} (lower = smoother areas)")

print(f"\nArea Chart Design Guidelines:")
print("✓ Use stacked areas for part-to-whole relationships")
print("✓ Use percentage stacking when proportions matter more than totals")
print("✓ Limit to 5-7 categories to avoid visual clutter")
print("✓ Order categories by size or importance")
print("✓ Use consistent colors across related charts")
print("✓ Add transparency (alpha) to reduce visual weight")
print("✓ Consider streamgraphs for symmetric visualization")
print("✓ Include trend lines for key insights")

print(f"\nWhen to Use Area Charts:")
print("• Time series data with multiple categories")
print("• Showing composition changes over time")
print("• Cumulative values or running totals")
print("• Part-to-whole relationships with temporal aspect")
print("• Budget breakdowns over time")
print("• Resource allocation visualization")

print(f"\nArea Chart Limitations:")
print("• Difficult to read exact values")
print("• Bottom categories easier to interpret than middle ones")
print("• Can be misleading if categories have very different scales")
print("• Not suitable for negative values (without special handling)")
