# 📊 Project 1: Sales Data Analysis

Welcome to your first comprehensive data analysis project! In this notebook, you'll analyze sales data to generate business insights.

## 🎯 Project Objectives
- Load and explore sales data
- Perform statistical analysis
- Analyze performance by product, region, and salesperson
- Create meaningful visualizations
- Generate actionable business insights

## 📋 Skills You'll Practice
- Data loading and cleaning
- Grouping and aggregation
- Statistical analysis
- Data visualization
- Business intelligence

Let's get started! 🚀

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("📚 Libraries imported successfully!")
print(f"Pandas version: {pd.__version__}")
print(f"NumPy version: {np.__version__}")

## 📥 Step 1: Load and Explore the Data

First, let's load our sales dataset and get familiar with its structure.

In [None]:
# Load the sales data
try:
    df = pd.read_csv('../datasets/sample_sales.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    print(f"✅ Successfully loaded {len(df)} sales records")
except FileNotFoundError:
    print("❌ Dataset not found. Please ensure sample_sales.csv is in the datasets folder.")
    # Create sample data for demonstration
    dates = pd.date_range('2024-01-01', periods=30)
    df = pd.DataFrame({
        'Date': np.repeat(dates, 3),
        'Product': ['Laptop', 'Phone', 'Tablet'] * 30,
        'Sales': np.random.randint(1000, 5000, 90),
        'Region': np.random.choice(['North', 'South', 'East', 'West'], 90),
        'Salesperson': np.random.choice(['Alice', 'Bob', 'Charlie', 'Diana'], 90)
    })
    print(f"📊 Created sample dataset with {len(df)} records for demonstration")

# Display basic information about the dataset
print("\n📋 Dataset Overview:")
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print("\n🔍 First 5 rows:")
print(df.head())

In [None]:
# Data quality check
print("🔍 Data Quality Check:")
print(f"Missing values:\n{df.isnull().sum()}")
print(f"\nData types:\n{df.dtypes}")
print(f"\nDate range: {df['Date'].min()} to {df['Date'].max()}")
print(f"Number of unique products: {df['Product'].nunique()}")
print(f"Number of unique regions: {df['Region'].nunique()}")
print(f"Number of unique salespeople: {df['Salesperson'].nunique()}")

## 📊 Step 2: Basic Statistical Analysis

Let's calculate key statistics to understand our sales performance.

In [None]:
# Basic statistics
print("📈 BASIC SALES STATISTICS")
print("=" * 50)
print(f"Total sales: ${df['Sales'].sum():,.2f}")
print(f"Average transaction: ${df['Sales'].mean():.2f}")
print(f"Median transaction: ${df['Sales'].median():.2f}")
print(f"Highest transaction: ${df['Sales'].max():,.2f}")
print(f"Lowest transaction: ${df['Sales'].min():,.2f}")
print(f"Standard deviation: ${df['Sales'].std():.2f}")
print(f"Total transactions: {len(df):,}")

# Calculate additional metrics
total_days = (df['Date'].max() - df['Date'].min()).days + 1
daily_average = df['Sales'].sum() / total_days
print(f"\n📅 Time Period Analysis:")
print(f"Analysis period: {total_days} days")
print(f"Average daily sales: ${daily_average:,.2f}")

In [None]:
# Descriptive statistics
print("📊 Detailed Sales Statistics:")
print(df['Sales'].describe())

# Sales distribution
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.hist(df['Sales'], bins=20, alpha=0.7, color='skyblue', edgecolor='black')
plt.axvline(df['Sales'].mean(), color='red', linestyle='--', label=f'Mean: ${df["Sales"].mean():.0f}')
plt.axvline(df['Sales'].median(), color='green', linestyle='--', label=f'Median: ${df["Sales"].median():.0f}')
plt.title('Sales Distribution')
plt.xlabel('Sales Amount ($)')
plt.ylabel('Frequency')
plt.legend()

plt.subplot(1, 2, 2)
plt.boxplot(df['Sales'])
plt.title('Sales Box Plot')
plt.ylabel('Sales Amount ($)')

plt.tight_layout()
plt.show()

print("✅ Statistical analysis completed!")

## 🛍️ Step 3: Product Analysis

Let's analyze sales performance by product to identify top performers.

In [None]:
# Product performance analysis
print("🛍️ PRODUCT ANALYSIS")
print("=" * 50)

product_stats = df.groupby('Product').agg({
    'Sales': ['sum', 'mean', 'count', 'std', 'min', 'max']
}).round(2)

# Flatten column names
product_stats.columns = ['Total_Sales', 'Avg_Sales', 'Count', 'Std_Dev', 'Min_Sales', 'Max_Sales']
product_stats = product_stats.sort_values('Total_Sales', ascending=False)

print("Sales Performance by Product:")
print(product_stats)

# Identify best and worst performers
best_product = product_stats.index[0]
worst_product = product_stats.index[-1]

print(f"\n🏆 Best performing product: {best_product}")
print(f"   Total sales: ${product_stats.loc[best_product, 'Total_Sales']:,.2f}")
print(f"   Average transaction: ${product_stats.loc[best_product, 'Avg_Sales']:,.2f}")

print(f"\n📉 Lowest performing product: {worst_product}")
print(f"   Total sales: ${product_stats.loc[worst_product, 'Total_Sales']:,.2f}")
print(f"   Average transaction: ${product_stats.loc[worst_product, 'Avg_Sales']:,.2f}")

In [None]:
# Product visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Product Performance Analysis', fontsize=16, fontweight='bold')

# 1. Total sales by product
product_totals = product_stats['Total_Sales'].sort_values(ascending=True)
axes[0, 0].barh(product_totals.index, product_totals.values, color='skyblue')
axes[0, 0].set_title('Total Sales by Product')
axes[0, 0].set_xlabel('Total Sales ($)')

# Add value labels
for i, v in enumerate(product_totals.values):
    axes[0, 0].text(v + max(product_totals.values)*0.01, i, f'${v:,.0f}', va='center')

# 2. Average sales by product
product_avg = product_stats['Avg_Sales'].sort_values(ascending=False)
axes[0, 1].bar(product_avg.index, product_avg.values, color='lightgreen')
axes[0, 1].set_title('Average Sales by Product')
axes[0, 1].set_ylabel('Average Sales ($)')
axes[0, 1].tick_params(axis='x', rotation=45)

# 3. Sales count by product
product_count = product_stats['Count'].sort_values(ascending=False)
axes[1, 0].bar(product_count.index, product_count.values, color='orange')
axes[1, 0].set_title('Number of Transactions by Product')
axes[1, 0].set_ylabel('Transaction Count')
axes[1, 0].tick_params(axis='x', rotation=45)

# 4. Sales distribution by product (box plot)
product_data = [df[df['Product'] == product]['Sales'].values for product in df['Product'].unique()]
axes[1, 1].boxplot(product_data, labels=df['Product'].unique())
axes[1, 1].set_title('Sales Distribution by Product')
axes[1, 1].set_ylabel('Sales ($)')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

print("📊 Product analysis visualizations completed!")

## 🌍 Step 4: Regional Analysis

Now let's analyze sales performance across different regions.

In [None]:
# Regional analysis
print("🌍 REGIONAL ANALYSIS")
print("=" * 50)

regional_stats = df.groupby('Region').agg({
    'Sales': ['sum', 'mean', 'count', 'std']
}).round(2)

regional_stats.columns = ['Total_Sales', 'Avg_Sales', 'Count', 'Std_Dev']
regional_stats = regional_stats.sort_values('Total_Sales', ascending=False)

# Calculate market share
total_sales = df['Sales'].sum()
regional_stats['Market_Share_%'] = (regional_stats['Total_Sales'] / total_sales * 100).round(2)

print("Sales Performance by Region:")
print(regional_stats)

print("\n🏆 Regional Rankings:")
for i, (region, stats) in enumerate(regional_stats.iterrows(), 1):
    print(f"{i}. {region}: ${stats['Total_Sales']:,.2f} ({stats['Market_Share_%']}% market share)")

In [None]:
# Regional visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Regional Performance Analysis', fontsize=16, fontweight='bold')

# 1. Market share pie chart
axes[0, 0].pie(regional_stats['Market_Share_%'], labels=regional_stats.index, 
               autopct='%1.1f%%', startangle=90)
axes[0, 0].set_title('Market Share by Region')

# 2. Total sales by region
regional_totals = regional_stats['Total_Sales'].sort_values(ascending=True)
axes[0, 1].barh(regional_totals.index, regional_totals.values, color='lightcoral')
axes[0, 1].set_title('Total Sales by Region')
axes[0, 1].set_xlabel('Total Sales ($)')

# 3. Average transaction by region
regional_avg = regional_stats['Avg_Sales'].sort_values(ascending=False)
axes[1, 0].bar(regional_avg.index, regional_avg.values, color='lightblue')
axes[1, 0].set_title('Average Transaction by Region')
axes[1, 0].set_ylabel('Average Sales ($)')
axes[1, 0].tick_params(axis='x', rotation=45)

# 4. Transaction count by region
regional_count = regional_stats['Count'].sort_values(ascending=False)
axes[1, 1].bar(regional_count.index, regional_count.values, color='gold')
axes[1, 1].set_title('Transaction Count by Region')
axes[1, 1].set_ylabel('Number of Transactions')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

print("🌍 Regional analysis completed!")

## 👥 Step 5: Salesperson Performance Analysis

Let's evaluate individual salesperson performance.

In [None]:
# Salesperson analysis
print("👥 SALESPERSON PERFORMANCE ANALYSIS")
print("=" * 50)

salesperson_stats = df.groupby('Salesperson').agg({
    'Sales': ['sum', 'mean', 'count', 'std']
}).round(2)

salesperson_stats.columns = ['Total_Sales', 'Avg_Sales', 'Count', 'Std_Dev']
salesperson_stats = salesperson_stats.sort_values('Total_Sales', ascending=False)

# Calculate performance metrics
salesperson_stats['Sales_per_Transaction'] = salesperson_stats['Avg_Sales']
salesperson_stats['Performance_Score'] = (
    salesperson_stats['Total_Sales'] / salesperson_stats['Total_Sales'].max() * 50 +
    salesperson_stats['Avg_Sales'] / salesperson_stats['Avg_Sales'].max() * 50
).round(2)

print("Salesperson Performance:")
print(salesperson_stats)

print("\n🏆 Top Performers:")
for i, (person, stats) in enumerate(salesperson_stats.head(3).iterrows(), 1):
    print(f"{i}. {person}:")
    print(f"   Total sales: ${stats['Total_Sales']:,.2f}")
    print(f"   Transactions: {stats['Count']:.0f}")
    print(f"   Avg per transaction: ${stats['Avg_Sales']:,.2f}")
    print(f"   Performance score: {stats['Performance_Score']:.1f}/100")
    print()

In [None]:
# Salesperson visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Salesperson Performance Analysis', fontsize=16, fontweight='bold')

# 1. Total sales by salesperson
sales_totals = salesperson_stats['Total_Sales'].sort_values(ascending=True)
axes[0, 0].barh(sales_totals.index, sales_totals.values, color='mediumpurple')
axes[0, 0].set_title('Total Sales by Salesperson')
axes[0, 0].set_xlabel('Total Sales ($)')

# 2. Average transaction by salesperson
sales_avg = salesperson_stats['Avg_Sales'].sort_values(ascending=False)
axes[0, 1].bar(sales_avg.index, sales_avg.values, color='lightseagreen')
axes[0, 1].set_title('Average Transaction by Salesperson')
axes[0, 1].set_ylabel('Average Sales ($)')
axes[0, 1].tick_params(axis='x', rotation=45)

# 3. Performance score
performance_scores = salesperson_stats['Performance_Score'].sort_values(ascending=False)
axes[1, 0].bar(performance_scores.index, performance_scores.values, color='orange')
axes[1, 0].set_title('Overall Performance Score')
axes[1, 0].set_ylabel('Performance Score (0-100)')
axes[1, 0].tick_params(axis='x', rotation=45)

# 4. Sales vs Transaction count scatter
axes[1, 1].scatter(salesperson_stats['Count'], salesperson_stats['Total_Sales'], 
                  s=100, alpha=0.7, color='red')
for person, stats in salesperson_stats.iterrows():
    axes[1, 1].annotate(person, (stats['Count'], stats['Total_Sales']), 
                       xytext=(5, 5), textcoords='offset points')
axes[1, 1].set_title('Sales vs Transaction Count')
axes[1, 1].set_xlabel('Number of Transactions')
axes[1, 1].set_ylabel('Total Sales ($)')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("👥 Salesperson analysis completed!")

## 📅 Step 6: Time Series Analysis

Let's analyze sales trends over time to identify patterns.

In [None]:
# Time series analysis
print("📅 TIME SERIES ANALYSIS")
print("=" * 50)

# Daily sales analysis
daily_sales = df.groupby('Date')['Sales'].agg(['sum', 'count', 'mean']).round(2)
daily_sales.columns = ['Total_Sales', 'Transactions', 'Avg_Sales']

print("Daily Sales Summary:")
print(f"Best sales day: {daily_sales['Total_Sales'].idxmax().strftime('%Y-%m-%d')}")
print(f"Best sales amount: ${daily_sales['Total_Sales'].max():,.2f}")
print(f"Worst sales day: {daily_sales['Total_Sales'].idxmin().strftime('%Y-%m-%d')}")
print(f"Worst sales amount: ${daily_sales['Total_Sales'].min():,.2f}")

# Calculate growth metrics
if len(daily_sales) > 1:
    first_day_sales = daily_sales['Total_Sales'].iloc[0]
    last_day_sales = daily_sales['Total_Sales'].iloc[-1]
    growth_rate = ((last_day_sales - first_day_sales) / first_day_sales * 100)
    print(f"\n📈 Growth Analysis:")
    print(f"First day sales: ${first_day_sales:,.2f}")
    print(f"Last day sales: ${last_day_sales:,.2f}")
    print(f"Overall growth rate: {growth_rate:.2f}%")

print(f"\n📊 Daily Averages:")
print(f"Average daily sales: ${daily_sales['Total_Sales'].mean():,.2f}")
print(f"Average daily transactions: {daily_sales['Transactions'].mean():.1f}")

In [None]:
# Time series visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Time Series Analysis', fontsize=16, fontweight='bold')

# 1. Daily sales trend
axes[0, 0].plot(daily_sales.index, daily_sales['Total_Sales'], marker='o', linewidth=2)
axes[0, 0].set_title('Daily Sales Trend')
axes[0, 0].set_xlabel('Date')
axes[0, 0].set_ylabel('Total Sales ($)')
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].grid(True, alpha=0.3)

# 2. Daily transaction count
axes[0, 1].plot(daily_sales.index, daily_sales['Transactions'], marker='s', 
                color='green', linewidth=2)
axes[0, 1].set_title('Daily Transaction Count')
axes[0, 1].set_xlabel('Date')
axes[0, 1].set_ylabel('Number of Transactions')
axes[0, 1].tick_params(axis='x', rotation=45)
axes[0, 1].grid(True, alpha=0.3)

# 3. Average transaction value over time
axes[1, 0].plot(daily_sales.index, daily_sales['Avg_Sales'], marker='^', 
                color='red', linewidth=2)
axes[1, 0].set_title('Average Transaction Value Over Time')
axes[1, 0].set_xlabel('Date')
axes[1, 0].set_ylabel('Average Sales ($)')
axes[1, 0].tick_params(axis='x', rotation=45)
axes[1, 0].grid(True, alpha=0.3)

# 4. Sales by day of week (if we have enough data)
df['DayOfWeek'] = df['Date'].dt.day_name()
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
daily_pattern = df.groupby('DayOfWeek')['Sales'].mean().reindex(day_order, fill_value=0)

axes[1, 1].bar(range(len(daily_pattern)), daily_pattern.values, color='purple', alpha=0.7)
axes[1, 1].set_title('Average Sales by Day of Week')
axes[1, 1].set_xlabel('Day of Week')
axes[1, 1].set_ylabel('Average Sales ($)')
axes[1, 1].set_xticks(range(len(daily_pattern)))
axes[1, 1].set_xticklabels([day[:3] for day in daily_pattern.index], rotation=45)

plt.tight_layout()
plt.show()

print("📅 Time series analysis completed!")

## 🔍 Step 7: Advanced Analysis & Insights

Let's perform some advanced analysis to generate deeper business insights.

In [None]:
# Advanced analysis
print("🔍 ADVANCED ANALYSIS & INSIGHTS")
print("=" * 50)

# 1. Product-Region performance matrix
product_region_matrix = df.pivot_table(values='Sales', index='Product', 
                                       columns='Region', aggfunc='mean').round(2)
print("Average Sales by Product and Region:")
print(product_region_matrix)

# 2. Top performing combinations
print("\n🏆 Top Product-Region Combinations:")
product_region_totals = df.groupby(['Product', 'Region'])['Sales'].sum().sort_values(ascending=False)
for i, ((product, region), sales) in enumerate(product_region_totals.head(5).items(), 1):
    print(f"{i}. {product} in {region}: ${sales:,.2f}")

# 3. Salesperson-Product specialization
print("\n👥 Salesperson Product Specialization:")
salesperson_product = df.groupby(['Salesperson', 'Product'])['Sales'].sum().unstack(fill_value=0)
print(salesperson_product)

# Find each salesperson's best product
print("\nEach salesperson's strongest product:")
for salesperson in salesperson_product.index:
    best_product = salesperson_product.loc[salesperson].idxmax()
    best_sales = salesperson_product.loc[salesperson].max()
    print(f"{salesperson}: {best_product} (${best_sales:,.2f})")

In [None]:
# Advanced visualizations
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Advanced Analysis Visualizations', fontsize=16, fontweight='bold')

# 1. Heatmap of product-region performance
im1 = axes[0, 0].imshow(product_region_matrix.values, cmap='YlOrRd', aspect='auto')
axes[0, 0].set_xticks(range(len(product_region_matrix.columns)))
axes[0, 0].set_yticks(range(len(product_region_matrix.index)))
axes[0, 0].set_xticklabels(product_region_matrix.columns)
axes[0, 0].set_yticklabels(product_region_matrix.index)
axes[0, 0].set_title('Product-Region Performance Heatmap')

# Add values to heatmap
for i in range(len(product_region_matrix.index)):
    for j in range(len(product_region_matrix.columns)):
        value = product_region_matrix.iloc[i, j]
        if not pd.isna(value):
            axes[0, 0].text(j, i, f'${value:.0f}', ha='center', va='center', 
                           color='white' if value > product_region_matrix.values.mean() else 'black')

# 2. Salesperson product specialization
salesperson_product_pct = salesperson_product.div(salesperson_product.sum(axis=1), axis=0) * 100
bottom = np.zeros(len(salesperson_product_pct))
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99']

for i, product in enumerate(salesperson_product_pct.columns):
    axes[0, 1].bar(salesperson_product_pct.index, salesperson_product_pct[product], 
                   bottom=bottom, label=product, color=colors[i % len(colors)])
    bottom += salesperson_product_pct[product]

axes[0, 1].set_title('Salesperson Product Mix (%)')
axes[0, 1].set_ylabel('Percentage of Sales')
axes[0, 1].legend()
axes[0, 1].tick_params(axis='x', rotation=45)

# 3. Sales performance scatter (Total vs Average)
for region in df['Region'].unique():
    region_data = df[df['Region'] == region]
    region_stats = region_data.groupby('Product').agg({'Sales': ['sum', 'mean']})
    region_stats.columns = ['Total', 'Average']
    axes[1, 0].scatter(region_stats['Total'], region_stats['Average'], 
                      label=region, s=100, alpha=0.7)

axes[1, 0].set_title('Total vs Average Sales by Product & Region')
axes[1, 0].set_xlabel('Total Sales ($)')
axes[1, 0].set_ylabel('Average Sales ($)')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# 4. Performance distribution
all_performance = []
labels = []
for region in df['Region'].unique():
    region_sales = df[df['Region'] == region]['Sales'].values
    all_performance.append(region_sales)
    labels.append(region)

axes[1, 1].boxplot(all_performance, labels=labels)
axes[1, 1].set_title('Sales Distribution by Region')
axes[1, 1].set_ylabel('Sales ($)')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

print("🔍 Advanced analysis completed!")

## 💡 Step 8: Key Insights & Recommendations

Based on our analysis, let's generate actionable business insights.

In [None]:
# Generate key insights
print("💡 KEY BUSINESS INSIGHTS & RECOMMENDATIONS")
print("=" * 60)

# Product insights
best_product = product_stats.index[0]
best_product_sales = product_stats.loc[best_product, 'Total_Sales']
worst_product = product_stats.index[-1]
worst_product_sales = product_stats.loc[worst_product, 'Total_Sales']

print("🛍️ PRODUCT INSIGHTS:")
print(f"1. {best_product} is the star performer with ${best_product_sales:,.2f} in total sales")
print(f"2. {worst_product} needs attention with only ${worst_product_sales:,.2f} in sales")
print(f"3. Performance gap: {((best_product_sales - worst_product_sales) / worst_product_sales * 100):.1f}% difference")

# Regional insights
best_region = regional_stats.index[0]
best_region_share = regional_stats.loc[best_region, 'Market_Share_%']
worst_region = regional_stats.index[-1]
worst_region_share = regional_stats.loc[worst_region, 'Market_Share_%']

print(f"\n🌍 REGIONAL INSIGHTS:")
print(f"1. {best_region} dominates with {best_region_share}% market share")
print(f"2. {worst_region} region is underperforming at {worst_region_share}% market share")
print(f"3. Market concentration: Top region has {best_region_share/worst_region_share:.1f}x more sales than bottom region")

# Salesperson insights
top_salesperson = salesperson_stats.index[0]
top_sales = salesperson_stats.loc[top_salesperson, 'Total_Sales']
bottom_salesperson = salesperson_stats.index[-1]
bottom_sales = salesperson_stats.loc[bottom_salesperson, 'Total_Sales']

print(f"\n👥 SALESPERSON INSIGHTS:")
print(f"1. {top_salesperson} is the top performer with ${top_sales:,.2f} in sales")
print(f"2. {bottom_salesperson} needs coaching with ${bottom_sales:,.2f} in sales")
print(f"3. Performance gap: {((top_sales - bottom_sales) / bottom_sales * 100):.1f}% difference between top and bottom")

# Time-based insights
best_day = daily_sales['Total_Sales'].idxmax()
best_day_sales = daily_sales['Total_Sales'].max()
avg_daily_sales = daily_sales['Total_Sales'].mean()

print(f"\n📅 TEMPORAL INSIGHTS:")
print(f"1. Best sales day: {best_day.strftime('%A, %B %d')} with ${best_day_sales:,.2f}")
print(f"2. Daily average: ${avg_daily_sales:,.2f}")
print(f"3. Best day performed {(best_day_sales/avg_daily_sales):.1f}x better than average")

print(f"\n🎯 STRATEGIC RECOMMENDATIONS:")
print(f"1. 📈 GROWTH: Focus marketing budget on {best_product} to maximize ROI")
print(f"2. 🔧 IMPROVEMENT: Develop action plan for {worst_product} - consider pricing or promotion")
print(f"3. 🌍 EXPANSION: Investigate why {best_region} performs well and replicate in {worst_region}")
print(f"4. 👥 TRAINING: Pair {bottom_salesperson} with {top_salesperson} for mentoring")
print(f"5. 📊 MONITORING: Set up weekly dashboards to track these key metrics")

# Calculate overall business health score
health_score = (
    (df['Sales'].mean() / df['Sales'].max() * 25) +  # Average transaction strength
    (len(df) / (len(df) + 100) * 25) +  # Transaction volume
    (1 - (df['Sales'].std() / df['Sales'].mean()) * 25) +  # Consistency
    (regional_stats['Market_Share_%'].std() / 100 * 25)  # Market balance
)

print(f"\n📊 OVERALL BUSINESS HEALTH SCORE: {health_score:.1f}/100")
if health_score >= 80:
    print("🟢 Excellent performance - maintain current strategies")
elif health_score >= 60:
    print("🟡 Good performance - focus on identified improvement areas")
else:
    print("🔴 Needs improvement - implement recommendations urgently")

## 📋 Step 9: Executive Summary Dashboard

Let's create a final executive summary with key metrics.

In [None]:
# Executive Summary Dashboard
fig, axes = plt.subplots(3, 3, figsize=(20, 15))
fig.suptitle('📊 EXECUTIVE SALES DASHBOARD', fontsize=20, fontweight='bold', y=0.98)

# Key metrics boxes
metrics = {
    'Total Sales': f"${df['Sales'].sum():,.0f}",
    'Avg Transaction': f"${df['Sales'].mean():.0f}",
    'Total Transactions': f"{len(df):,}",
    'Best Product': best_product,
    'Top Region': best_region,
    'Top Salesperson': top_salesperson
}

# Create metric boxes
for i, (metric, value) in enumerate(metrics.items()):
    row, col = i // 3, i % 3
    axes[row, col].text(0.5, 0.5, f"{metric}\n{value}", 
                       ha='center', va='center', fontsize=14, fontweight='bold',
                       bbox=dict(boxstyle='round,pad=0.5', facecolor='lightblue', alpha=0.8))
    axes[row, col].set_xlim(0, 1)
    axes[row, col].set_ylim(0, 1)
    axes[row, col].axis('off')

# Product performance chart
product_totals = product_stats['Total_Sales'].sort_values(ascending=True)
axes[2, 0].barh(product_totals.index, product_totals.values, color='skyblue')
axes[2, 0].set_title('Sales by Product', fontweight='bold')
axes[2, 0].set_xlabel('Total Sales ($)')

# Regional market share
axes[2, 1].pie(regional_stats['Market_Share_%'], labels=regional_stats.index, 
               autopct='%1.1f%%', startangle=90)
axes[2, 1].set_title('Market Share by Region', fontweight='bold')

# Sales trend
axes[2, 2].plot(daily_sales.index, daily_sales['Total_Sales'], marker='o', linewidth=2, color='green')
axes[2, 2].set_title('Sales Trend', fontweight='bold')
axes[2, 2].set_ylabel('Daily Sales ($)')
axes[2, 2].tick_params(axis='x', rotation=45)
axes[2, 2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("📊 Executive dashboard created successfully!")
print("\n" + "="*60)
print("🎉 SALES ANALYSIS PROJECT COMPLETED!")
print("="*60)
print("\n✅ What you accomplished:")
print("   • Loaded and explored sales data")
print("   • Performed comprehensive statistical analysis")
print("   • Analyzed performance by product, region, and salesperson")
print("   • Created time series analysis")
print("   • Generated actionable business insights")
print("   • Built executive dashboard")
print("\n🚀 Next steps:")
print("   • Try Project 2: Student Performance Analysis")
print("   • Experiment with your own datasets")
print("   • Explore advanced visualization techniques")
print("\nGreat job! You're now ready for more complex data analysis projects! 🎯")