# Grouped/Stacked Bar Chart - Categories × Categories

**Use Case**: Show relationships in categories (quarterly sales by product, ratings by age and gender)

This notebook demonstrates how to create effective grouped and stacked bar charts for comparing multiple categorical variables.


In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set style for better-looking plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Set random seed for reproducibility
np.random.seed(42)


In [None]:
# Sample data for grouped/stacked bars
quarters = ['Q1', 'Q2', 'Q3', 'Q4']
products = ['Product A', 'Product B', 'Product C']

# Quarterly sales by product
sales_data = {
    'Product A': [120, 135, 155, 145],
    'Product B': [90, 95, 120, 110],
    'Product C': [75, 85, 90, 95]
}

# Age groups and gender ratings
age_groups = ['18-24', '25-34', '35-44', '45+']
male_ratings = [4.2, 4.5, 4.3, 4.1]
female_ratings = [4.6, 4.4, 4.5, 4.3]

# Create figure with subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))

# Grouped bar chart
x = np.arange(len(quarters))
width = 0.25

ax1.bar(x - width, sales_data['Product A'], width, label='Product A', color='#1f77b4')
ax1.bar(x, sales_data['Product B'], width, label='Product B', color='#ff7f0e')
ax1.bar(x + width, sales_data['Product C'], width, label='Product C', color='#2ca02c')

ax1.set_title('Quarterly Sales by Product', fontsize=14, fontweight='bold')
ax1.set_xlabel('Quarter')
ax1.set_ylabel('Sales (thousands $)')
ax1.set_xticks(x)
ax1.set_xticklabels(quarters)
ax1.legend()
ax1.grid(True, axis='y', alpha=0.3)

# Stacked bar chart
bottom_b = np.array(sales_data['Product A'])
bottom_c = bottom_b + np.array(sales_data['Product B'])

ax2.bar(quarters, sales_data['Product A'], label='Product A', color='#9467bd')
ax2.bar(quarters, sales_data['Product B'], bottom=bottom_b, label='Product B', color='#8c564b')
ax2.bar(quarters, sales_data['Product C'], bottom=bottom_c, label='Product C', color='#e377c2')

ax2.set_title('Quarterly Sales - Stacked View', fontsize=14, fontweight='bold')
ax2.set_xlabel('Quarter')
ax2.set_ylabel('Sales (thousands $)')
ax2.legend()
ax2.grid(True, axis='y', alpha=0.3)

# Grouped bar with different categories
x2 = np.arange(len(age_groups))
width2 = 0.35

ax3.bar(x2 - width2/2, male_ratings, width2, label='Male', color='steelblue')
ax3.bar(x2 + width2/2, female_ratings, width2, label='Female', color='coral')

ax3.set_title('Ratings by Age Group and Gender', fontsize=14, fontweight='bold')
ax3.set_xlabel('Age Group')
ax3.set_ylabel('Average Rating')
ax3.set_xticks(x2)
ax3.set_xticklabels(age_groups)
ax3.legend()
ax3.grid(True, axis='y', alpha=0.3)

# 100% stacked bar (normalized)
total_sales = [sum(x) for x in zip(sales_data['Product A'], 
                                   sales_data['Product B'], 
                                   sales_data['Product C'])]

prod_a_norm = [a/t * 100 for a, t in zip(sales_data['Product A'], total_sales)]
prod_b_norm = [b/t * 100 for b, t in zip(sales_data['Product B'], total_sales)]
prod_c_norm = [c/t * 100 for c, t in zip(sales_data['Product C'], total_sales)]

ax4.bar(quarters, prod_a_norm, label='Product A', color='#1f77b4')
ax4.bar(quarters, prod_b_norm, bottom=prod_a_norm, label='Product B', color='#ff7f0e')

bottom_c_norm = [a + b for a, b in zip(prod_a_norm, prod_b_norm)]
ax4.bar(quarters, prod_c_norm, bottom=bottom_c_norm, label='Product C', color='#2ca02c')

ax4.set_title('Quarterly Sales - 100% Stacked', fontsize=14, fontweight='bold')
ax4.set_xlabel('Quarter')
ax4.set_ylabel('Percentage (%)')
ax4.legend()
ax4.grid(True, axis='y', alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Advanced grouped/stacked chart examples using Seaborn
# Create more complex dataset
np.random.seed(42)

# Generate department performance data
departments = ['Sales', 'Marketing', 'Engineering', 'HR']
quarters_long = ['Q1', 'Q2', 'Q3', 'Q4']
metrics = ['Revenue', 'Customer_Satisfaction', 'Employee_Satisfaction']

data = []
for dept in departments:
    for quarter in quarters_long:
        for metric in metrics:
            base_values = {
                'Revenue': {'Sales': 150, 'Marketing': 80, 'Engineering': 200, 'HR': 50},
                'Customer_Satisfaction': {'Sales': 4.2, 'Marketing': 4.0, 'Engineering': 4.5, 'HR': 4.1},
                'Employee_Satisfaction': {'Sales': 3.8, 'Marketing': 4.2, 'Engineering': 4.3, 'HR': 4.4}
            }
            
            base = base_values[metric][dept]
            if metric == 'Revenue':
                # Add quarterly variation
                q_multiplier = {'Q1': 0.9, 'Q2': 1.0, 'Q3': 1.1, 'Q4': 1.2}[quarter]
                value = base * q_multiplier + np.random.normal(0, base * 0.1)
            else:
                # Ratings with less variation
                value = base + np.random.normal(0, 0.2)
                value = np.clip(value, 1, 5)  # Keep ratings in 1-5 range
            
            data.append({
                'Department': dept,
                'Quarter': quarter,
                'Metric': metric,
                'Value': value
            })

df_complex = pd.DataFrame(data)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Grouped bar chart with Seaborn
revenue_data = df_complex[df_complex['Metric'] == 'Revenue']
sns.barplot(data=revenue_data, x='Quarter', y='Value', hue='Department', ax=ax1)
ax1.set_title('Revenue by Department and Quarter', fontsize=14, fontweight='bold')
ax1.set_ylabel('Revenue (thousands $)')
ax1.legend(title='Department', bbox_to_anchor=(1.05, 1), loc='upper left')

# Stacked bar chart for employee vs customer satisfaction
satisfaction_data = df_complex[df_complex['Metric'].isin(['Customer_Satisfaction', 'Employee_Satisfaction'])]
satisfaction_pivot = satisfaction_data.pivot_table(
    index=['Department', 'Quarter'], 
    columns='Metric', 
    values='Value'
).reset_index()

# Manual stacked bar for satisfaction
dept_quarters = [f"{row['Department']}\n{row['Quarter']}" for _, row in satisfaction_pivot.iterrows()]
x_pos = np.arange(len(dept_quarters))

ax2.bar(x_pos, satisfaction_pivot['Customer_Satisfaction'], 
        label='Customer Satisfaction', color='lightblue', alpha=0.8)
ax2.bar(x_pos, satisfaction_pivot['Employee_Satisfaction'], 
        bottom=satisfaction_pivot['Customer_Satisfaction'],
        label='Employee Satisfaction', color='lightcoral', alpha=0.8)

ax2.set_title('Satisfaction Scores by Department', fontsize=14, fontweight='bold')
ax2.set_xlabel('Department & Quarter')
ax2.set_ylabel('Satisfaction Score')
ax2.set_xticks(x_pos[::4])  # Show every 4th label to avoid crowding
ax2.set_xticklabels([dept_quarters[i] for i in range(0, len(dept_quarters), 4)], rotation=45)
ax2.legend()
ax2.grid(True, axis='y', alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Statistical analysis and insights
print("Grouped/Stacked Bar Chart Analysis:")
print("=" * 50)

# Product sales analysis
print("Product Performance Analysis:")
total_by_product = {product: sum(values) for product, values in sales_data.items()}
for product, total in total_by_product.items():
    avg_quarterly = total / len(quarters)
    print(f"  {product}: Total = {total}k, Avg Quarterly = {avg_quarterly:.1f}k")

best_product = max(total_by_product, key=total_by_product.get)
print(f"\nBest Performing Product: {best_product}")

# Quarter analysis
print(f"\nQuarterly Performance:")
for i, quarter in enumerate(quarters):
    quarter_total = sum(sales_data[product][i] for product in products)
    print(f"  {quarter}: {quarter_total}k total sales")

# Gender rating analysis
print(f"\nRating Analysis by Gender:")
male_avg = np.mean(male_ratings)
female_avg = np.mean(female_ratings)
print(f"  Male Average Rating: {male_avg:.2f}")
print(f"  Female Average Rating: {female_avg:.2f}")
print(f"  Gender Difference: {abs(female_avg - male_avg):.2f} points")

# Statistical significance test
from scipy.stats import ttest_ind
t_stat, p_value = ttest_ind(male_ratings, female_ratings)
print(f"  T-test p-value: {p_value:.4f}")
significance = "significant" if p_value < 0.05 else "not significant"
print(f"  Gender difference is {significance}")

# Market share analysis (from stacked chart)
print(f"\nMarket Share Analysis:")
for i, quarter in enumerate(quarters):
    total = sum(sales_data[product][i] for product in products)
    print(f"  {quarter} Market Share:")
    for product in products:
        share = sales_data[product][i] / total * 100
        print(f"    {product}: {share:.1f}%")

print(f"\nChart Selection Guidelines:")
print("✓ Use GROUPED bars when comparing categories across groups")
print("✓ Use STACKED bars when showing part-to-whole relationships")
print("✓ Use 100% STACKED when proportions matter more than absolute values")
print("✓ Limit to 3-4 categories to maintain readability")
