## Import Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (15, 6)

print("Libraries loaded!")

## Load Data

# Load all relevant data
df_cleaned = pd.read_csv('../data/sales_cleaned.csv')
df_cleaned['Date'] = pd.to_datetime(df_cleaned['Date'])

df_daily = pd.read_csv('../data/sales_daily.csv')
df_daily['Date'] = pd.to_datetime(df_daily['Date'])

df_forecasts = pd.read_csv('../exports/sales_with_forecasts.csv')
df_forecasts['Date'] = pd.to_datetime(df_forecasts['Date'])

df_monthly_forecast = pd.read_csv('../exports/monthly_forecasts.csv')

print(f"Historical data: {len(df_cleaned):,} transactions")
print(f"Daily aggregation: {len(df_daily)} days")
print(f"Forecasts: {len(df_forecasts)} periods")
print(f"Monthly forecasts: {len(df_monthly_forecast)} months")

## Calculate Key Performance Indicators (KPIs)

# Historical Performance KPIs
historical_period = df_cleaned[df_cleaned['Date'] >= df_cleaned['Date'].max() - timedelta(days=365)]

kpi_total_revenue = df_cleaned['Sales_Amount'].sum()
kpi_avg_daily = df_daily['Total_Sales'].mean()
kpi_total_transactions = len(df_cleaned)
kpi_avg_transaction = df_cleaned['Sales_Amount'].mean()
kpi_units_sold = df_cleaned['Quantity'].sum()
kpi_recent_year_revenue = historical_period['Sales_Amount'].sum()

# Growth metrics
first_year = df_cleaned[df_cleaned['Date'].dt.year == 2022]['Sales_Amount'].sum()
second_year = df_cleaned[df_cleaned['Date'].dt.year == 2023]['Sales_Amount'].sum()
third_year = df_cleaned[df_cleaned['Date'].dt.year == 2024]['Sales_Amount'].sum()

yoy_growth_2023 = ((second_year - first_year) / first_year * 100) if first_year > 0 else 0
yoy_growth_2024 = ((third_year - second_year) / second_year * 100) if second_year > 0 else 0

print("="*70)
print("KEY PERFORMANCE INDICATORS (Historical Data)")
print("="*70)
print(f"\nRevenue Metrics:")
print(f"  ‚Ä¢ Total Historical Revenue: ${kpi_total_revenue:,.2f}")
print(f"  ‚Ä¢ Average Daily Sales: ${kpi_avg_daily:,.2f}")
print(f"  ‚Ä¢ Average Transaction Value: ${kpi_avg_transaction:.2f}")

print(f"\nVolume Metrics:")
print(f"  ‚Ä¢ Total Transactions: {kpi_total_transactions:,}")
print(f"  ‚Ä¢ Total Units Sold: {kpi_units_sold:,}")

print(f"\nYear-over-Year Performance:")
print(f"  ‚Ä¢ 2022 Revenue: ${first_year:,.2f}")
print(f"  ‚Ä¢ 2023 Revenue: ${second_year:,.2f} ({yoy_growth_2023:+.1f}% YoY)")
print(f"  ‚Ä¢ 2024 Revenue: ${third_year:,.2f} ({yoy_growth_2024:+.1f}% YoY)")

print("="*70)

## Forecast KPIs

# Forecast summary
forecast_total = df_monthly_forecast['Forecast_Sales'].sum()
forecast_avg = df_monthly_forecast['Forecast_Sales'].mean()
forecast_min = df_monthly_forecast['Forecast_Sales'].min()
forecast_max = df_monthly_forecast['Forecast_Sales'].max()

# Compare to historical average
historical_avg_daily = df_daily['Total_Sales'].mean()
historical_monthly = historical_avg_daily * 30  # Approximate monthly

future_growth_rate = ((forecast_total / (historical_monthly * 12)) - 1) * 100

print("\n" + "="*70)
print("FORECAST METRICS (Next 12 Months)")
print("="*70)
print(f"\nForecast Summary:")
print(f"  ‚Ä¢ Projected 12-Month Revenue: ${forecast_total:,.2f}")
print(f"  ‚Ä¢ Average Monthly Forecast: ${forecast_avg:,.2f}")
print(f"  ‚Ä¢ Monthly Range: ${forecast_min:,.2f} to ${forecast_max:,.2f}")
print(f"  ‚Ä¢ Expected Growth vs Historical: {future_growth_rate:+.1f}%")

print(f"\nMonthly Breakdown:")
for idx, row in df_monthly_forecast.iterrows():
    print(f"  {row['YearMonth']}: ${row['Forecast_Sales']:,.0f} (CI: ${row['Forecast_Lower']:,.0f} - ${row['Forecast_Upper']:,.0f})")

print("="*70)

## Segment Analysis - By Category

# Category performance
category_analysis = df_cleaned.groupby('Category').agg({
    'Sales_Amount': ['sum', 'mean', 'count'],
    'Quantity': 'sum'
}).round(2)

category_analysis.columns = ['Total_Sales', 'Avg_Transaction', 'Transaction_Count', 'Units_Sold']
category_analysis = category_analysis.sort_values('Total_Sales', ascending=False)
category_analysis['% of Total'] = (category_analysis['Total_Sales'] / category_analysis['Total_Sales'].sum() * 100).round(1)

print("\n" + "="*70)
print("CATEGORY PERFORMANCE ANALYSIS")
print("="*70)
print("\n", category_analysis)

# Top category
top_category = category_analysis.index[0]
top_sales = category_analysis.iloc[0]['Total_Sales']
top_pct = category_analysis.iloc[0]['% of Total']

print(f"\n‚úì Top Category: {top_category}")
print(f"  Revenue: ${top_sales:,.2f} ({top_pct:.1f}% of total)")

## Segment Analysis - By Store

# Store performance
store_analysis = df_cleaned.groupby('Store').agg({
    'Sales_Amount': ['sum', 'mean', 'count'],
    'Quantity': 'sum'
}).round(2)

store_analysis.columns = ['Total_Sales', 'Avg_Transaction', 'Transaction_Count', 'Units_Sold']
store_analysis = store_analysis.sort_values('Total_Sales', ascending=False)
store_analysis['% of Total'] = (store_analysis['Total_Sales'] / store_analysis['Total_Sales'].sum() * 100).round(1)

print("\n" + "="*70)
print("STORE PERFORMANCE ANALYSIS")
print("="*70)
print("\n", store_analysis)

# Identify best and worst performers
best_store = store_analysis.index[0]
worst_store = store_analysis.index[-1]
performance_gap = (store_analysis.iloc[0]['Total_Sales'] / store_analysis.iloc[-1]['Total_Sales']) - 1

print(f"\n‚úì Best Performing Store: {best_store}")
print(f"  Revenue: ${store_analysis.iloc[0]['Total_Sales']:,.2f}")
print(f"\n‚ö† Underperforming Store: {worst_store}")
print(f"  Revenue: ${store_analysis.iloc[-1]['Total_Sales']:,.2f}")
print(f"  Gap: {performance_gap*100:.1f}% below best performer")

## Segment Analysis - By Region

# Region performance
region_analysis = df_cleaned.groupby('Region').agg({
    'Sales_Amount': ['sum', 'mean', 'count'],
    'Quantity': 'sum'
}).round(2)

region_analysis.columns = ['Total_Sales', 'Avg_Transaction', 'Transaction_Count', 'Units_Sold']
region_analysis = region_analysis.sort_values('Total_Sales', ascending=False)
region_analysis['% of Total'] = (region_analysis['Total_Sales'] / region_analysis['Total_Sales'].sum() * 100).round(1)

print("\n" + "="*70)
print("REGIONAL PERFORMANCE ANALYSIS")
print("="*70)
print("\n", region_analysis)

# Insights
total_regions = len(region_analysis)
leading_region = region_analysis.index[0]
leading_sales = region_analysis.iloc[0]['Total_Sales']

print(f"\n‚úì Leading Region: {leading_region}")
print(f"  Revenue: ${leading_sales:,.2f}")

## Seasonal Insights

# Monthly seasonality
df_cleaned['Month'] = df_cleaned['Date'].dt.month
monthly_pattern = df_cleaned.groupby('Month')['Sales_Amount'].agg(['sum', 'mean', 'count']).round(2)
monthly_pattern.columns = ['Total_Sales', 'Avg_Sales', 'Transaction_Count']

month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
monthly_pattern['Month'] = month_names

print("\n" + "="*70)
print("SEASONAL PATTERNS")
print("="*70)
print("\nMonthly Sales Patterns:")
print(monthly_pattern.to_string())

# Peak and low seasons
peak_month_idx = monthly_pattern['Total_Sales'].idxmax()
low_month_idx = monthly_pattern['Total_Sales'].idxmin()

peak_month = monthly_pattern.loc[peak_month_idx, 'Month']
peak_sales = monthly_pattern.loc[peak_month_idx, 'Total_Sales']
low_month = monthly_pattern.loc[low_month_idx, 'Month']
low_sales = monthly_pattern.loc[low_month_idx, 'Total_Sales']
seasonality_factor = (peak_sales / low_sales) - 1

print(f"\nüîù Peak Season: {peak_month}")
print(f"   Sales: ${peak_sales:,.2f}")
print(f"\nüìâ Low Season: {low_month}")
print(f"   Sales: ${low_sales:,.2f}")
print(f"\nSeasonality Factor: {seasonality_factor*100:.1f}% variation")

## Visualize Key Insights

# Create comprehensive dashboard view
fig = plt.figure(figsize=(18, 12))
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

# 1. Revenue by Category
ax1 = fig.add_subplot(gs[0, 0])
category_analysis_sorted = category_analysis.sort_values('Total_Sales')
colors = plt.cm.Set3(np.linspace(0, 1, len(category_analysis_sorted)))
ax1.barh(category_analysis_sorted.index, category_analysis_sorted['Total_Sales'], color=colors)
ax1.set_title('Revenue by Category', fontweight='bold')
ax1.set_xlabel('Sales ($)')

# 2. Revenue by Store
ax2 = fig.add_subplot(gs[0, 1])
store_analysis_sorted = store_analysis.sort_values('Total_Sales')
colors = plt.cm.Set2(np.linspace(0, 1, len(store_analysis_sorted)))
ax2.barh(store_analysis_sorted.index, store_analysis_sorted['Total_Sales'], color=colors)
ax2.set_title('Revenue by Store', fontweight='bold')
ax2.set_xlabel('Sales ($)')

# 3. Revenue by Region
ax3 = fig.add_subplot(gs[0, 2])
region_analysis_sorted = region_analysis.sort_values('Total_Sales')
colors = plt.cm.Pastel1(np.linspace(0, 1, len(region_analysis_sorted)))
ax3.barh(region_analysis_sorted.index, region_analysis_sorted['Total_Sales'], color=colors)
ax3.set_title('Revenue by Region', fontweight='bold')
ax3.set_xlabel('Sales ($)')

# 4. Monthly Seasonality
ax4 = fig.add_subplot(gs[1, :])
monthly_sorted = monthly_pattern.sort_values('Month')
colors_seasonal = plt.cm.RdYlGn(np.linspace(0.3, 0.7, len(monthly_sorted)))
ax4.bar(range(len(monthly_sorted)), monthly_sorted['Total_Sales'], color=colors_seasonal, edgecolor='black')
ax4.set_xticks(range(len(monthly_sorted)))
ax4.set_xticklabels(monthly_sorted['Month'])
ax4.set_title('Monthly Sales Seasonality', fontweight='bold', fontsize=12)
ax4.set_ylabel('Sales ($)')
ax4.grid(True, alpha=0.3, axis='y')

# 5. Actual vs Forecast
ax5 = fig.add_subplot(gs[2, :])
actual_data = df_forecasts[df_forecasts['Type'] == 'Actual'].tail(100)
forecast_data = df_forecasts[df_forecasts['Type'] == 'Forecast']

ax5.plot(actual_data['Date'], actual_data['Forecast'], label='Historical Actuals', linewidth=2, color='steelblue')
ax5.plot(forecast_data['Date'], forecast_data['Forecast'], label='12-Month Forecast', linewidth=2, color='darkorange', linestyle='--')
ax5.fill_between(forecast_data['Date'], forecast_data['Forecast_Lower'], forecast_data['Forecast_Upper'],
                  alpha=0.2, color='orange', label='95% Confidence Interval')

ax5.set_title('Recent History & Forecast', fontweight='bold', fontsize=12)
ax5.set_xlabel('Date')
ax5.set_ylabel('Daily Sales ($)')
ax5.legend(loc='upper left')
ax5.grid(True, alpha=0.3)

plt.suptitle('Sales Analytics Dashboard - Key Insights', fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout()
plt.show()

## Business Recommendations

print("\n" + "="*70)
print("ACTIONABLE BUSINESS RECOMMENDATIONS")
print("="*70)

recommendations = []

# 1. Category focus
print(f"\n1. CATEGORY OPTIMIZATION")
print(f"   ‚úì Focus on {top_category} (${top_sales:,.0f}, {top_pct:.1f}% of revenue)")
print(f"   ‚úì Increase inventory allocation for top-performing categories")
print(f"   ‚úì Develop targeted marketing campaigns for underperforming categories")

# 2. Regional strategy
print(f"\n2. REGIONAL EXPANSION")
print(f"   ‚úì Replicate success from {leading_region} in other regions")
print(f"   ‚úì Investigate underperforming regions for improvement opportunities")
for i, (idx, row) in enumerate(region_analysis.iterrows()):
    if i > 0:
        gap = ((region_analysis.iloc[0]['Total_Sales'] / row['Total_Sales']) - 1) * 100
        print(f"   ‚ö† {idx} lagging by {gap:.1f}% - needs growth strategy")

# 3. Store optimization
print(f"\n3. STORE OPERATIONS")
print(f"   ‚úì {best_store}: Maintain excellence and increase staffing/inventory")
print(f"   ‚ö† {worst_store}: Needs operational review - {performance_gap*100:.1f}% underperformance")
print(f"   ‚úì Implement best practices from top store to others")

# 4. Seasonal planning
print(f"\n4. SEASONAL INVENTORY PLANNING")
print(f"   üîù {peak_month}: Increase inventory by 30-40% for peak season")
print(f"   üìâ {low_month}: Reduce inventory, focus on clearance sales")
print(f"   ‚úì Plan Black Friday/Holiday campaigns early (Q3 prep for Q4 peak)")
print(f"   ‚úì Seasonality factor of {seasonality_factor*100:.1f}% indicates strong seasonal patterns")

# 5. Forecast insights
print(f"\n5. FORECAST-DRIVEN ACTIONS")
if future_growth_rate > 0:
    print(f"   ‚úì Positive outlook: Expected {future_growth_rate:+.1f}% growth in next 12 months")
    print(f"   ‚úì Prepare for increased demand - secure supplier commitments")
    print(f"   ‚úì Invest in supply chain capacity expansion")
else:
    print(f"   ‚ö† Revenue decline forecasted: {future_growth_rate:.1f}%")
    print(f"   ‚úì Implement cost optimization initiatives")
    print(f"   ‚úì Develop marketing initiatives to boost sales")

print(f"\n6. MARKETING & PROMOTIONS")
print(f"   ‚úì Target Q4 (Oct-Dec): Highest seasonal period - maximize marketing spend")
print(f"   ‚úì Run counter-seasonal promotions during {low_month}: Stimulate demand")
print(f"   ‚úì Focus promotions on {top_category} to maintain momentum")
print(f"   ‚úì Consider bundle deals across categories to boost underperformers")

print("\n" + "="*70)

## Export Summary Report

# Create KPI summary export
kpi_summary = pd.DataFrame({
    'Metric': [
        'Total Historical Revenue',
        'Average Daily Sales',
        'Total Transactions',
        'Units Sold',
        'Forecast 12-Month Revenue',
        'Expected Growth Rate',
        'Top Category',
        'Top Store',
        'Top Region',
        'Peak Month',
        'Seasonality Factor'
    ],
    'Value': [
        f"${kpi_total_revenue:,.2f}",
        f"${kpi_avg_daily:,.2f}",
        f"{kpi_total_transactions:,}",
        f"{kpi_units_sold:,}",
        f"${forecast_total:,.2f}",
        f"{future_growth_rate:+.1f}%",
        top_category,
        best_store,
        leading_region,
        peak_month,
        f"{seasonality_factor*100:.1f}%"
    ]
})

kpi_summary.to_csv('../exports/kpi_summary.csv', index=False)

print("‚úì KPI Summary exported")
print("\n", kpi_summary.to_string(index=False))

# Export segment analysis
category_analysis.to_csv('../exports/category_analysis.csv')
store_analysis.to_csv('../exports/store_analysis.csv')
region_analysis.to_csv('../exports/region_analysis.csv')

print("\n‚úì Segment analysis exported:")
print("  ‚Ä¢ category_analysis.csv")
print("  ‚Ä¢ store_analysis.csv")
print("  ‚Ä¢ region_analysis.csv")

## Executive Summary

print("\n" + "="*70)
print("EXECUTIVE SUMMARY")
print("="*70)

print(f"\nüìä HISTORICAL PERFORMANCE (3-Year Period)")
print(f"   ‚Ä¢ Total Revenue: ${kpi_total_revenue:,.2f}")
print(f"   ‚Ä¢ Transactions: {kpi_total_transactions:,}")
print(f"   ‚Ä¢ Average Daily: ${kpi_avg_daily:,.2f}")
print(f"   ‚Ä¢ YoY Growth 2022‚Üí2023: {yoy_growth_2023:+.1f}%")
print(f"   ‚Ä¢ YoY Growth 2023‚Üí2024: {yoy_growth_2024:+.1f}%")

print(f"\nüìà 12-MONTH FORECAST")
print(f"   ‚Ä¢ Projected Revenue: ${forecast_total:,.2f}")
print(f"   ‚Ä¢ Growth vs Historical Average: {future_growth_rate:+.1f}%")
print(f"   ‚Ä¢ Monthly Range: ${forecast_min:,.2f} to ${forecast_max:,.2f}")

print(f"\nüéØ SEGMENT HIGHLIGHTS")
print(f"   ‚Ä¢ Leading Category: {top_category} ({top_pct:.1f}% of revenue)")
print(f"   ‚Ä¢ Best Store: {best_store} (${store_analysis.iloc[0]['Total_Sales']:,.2f})")
print(f"   ‚Ä¢ Leading Region: {leading_region}")
print(f"   ‚Ä¢ Peak Season: {peak_month}")

print(f"\n‚ö†Ô∏è  KEY CHALLENGES")
print(f"   ‚Ä¢ Store performance gap: {performance_gap*100:.1f}%")
print(f"   ‚Ä¢ Monthly seasonality: {seasonality_factor*100:.1f}% variation")
print(f"   ‚Ä¢ Need focused attention on underperforming stores/regions")

print(f"\n‚úÖ KEY OPPORTUNITIES")
print(f"   ‚Ä¢ Leverage strong Q4 (Oct-Dec) seasonality for maximum impact")
print(f"   ‚Ä¢ Replicate best store practices to underperformers")
print(f"   ‚Ä¢ Expand {top_category} offerings")
print(f"   ‚Ä¢ Implement counter-seasonal promotions for {low_month}")

print(f"\nüí° RECOMMENDED ACTIONS")
print(f"   1. Increase Q4 inventory by 30-40%")
print(f"   2. Deploy marketing resources to peak selling periods")
print(f"   3. Conduct operational review of underperforming stores")
print(f"   4. Develop category-specific growth strategies")
print(f"   5. Plan supply chain capacity for forecasted growth")

print("\n" + "="*70)