# DSA 2040 Practical Exam - Section 1, Task 3
## OLAP Queries and Analysis

**Student:** Monaheng218  
**Date:** August 13, 2025  
**Total Marks:** 15

### Task Requirements:
1. Demonstrate complex SQL queries using star schema
2. Perform multidimensional analysis (OLAP operations)
3. Generate business insights from data warehouse
4. Showcase different types of aggregations and groupings

In [None]:
# =============================================================================
# IMPORTS AND DATABASE CONNECTION
# =============================================================================

import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set visualization style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Connect to the data warehouse
db_path = 'retail_dw.db'
conn = sqlite3.connect(db_path)

print(" Connected to retail data warehouse")
print(f"Database: {db_path}")

# Verify database structure
tables_query = "SELECT name FROM sqlite_master WHERE type='table';"
tables = pd.read_sql_query(tables_query, conn)
print(f"\n Available tables: {', '.join(tables['name'].tolist())}")

In [None]:
# =============================================================================
# OLAP QUERY 1: SALES PERFORMANCE BY TIME DIMENSIONS
# =============================================================================

print("\n" + "="*60)
print("OLAP ANALYSIS 1: TEMPORAL SALES ANALYSIS")
print("="*60)

# Query 1a: Monthly sales trends
monthly_sales_query = """
SELECT 
    t.Year,
    t.Month,
    t.MonthName,
    COUNT(DISTINCT s.InvoiceNo) as InvoiceCount,
    SUM(s.Quantity) as TotalQuantity,
    ROUND(SUM(s.TotalSales), 2) as TotalRevenue,
    ROUND(AVG(s.TotalSales), 2) as AvgTransactionValue,
    COUNT(DISTINCT s.CustomerKey) as UniqueCustomers
FROM SalesFact s
JOIN TimeDim t ON s.TimeKey = t.TimeKey
GROUP BY t.Year, t.Month, t.MonthName
ORDER BY t.Year, t.Month;
"""

monthly_sales = pd.read_sql_query(monthly_sales_query, conn)
print("📊 Monthly Sales Performance:")
print(monthly_sales.head(10))

# Visualization
plt.figure(figsize=(15, 10))

# Monthly revenue trend
plt.subplot(2, 2, 1)
plt.plot(range(len(monthly_sales)), monthly_sales['TotalRevenue'], marker='o', linewidth=2)
plt.title('Monthly Revenue Trend', fontsize=14, fontweight='bold')
plt.xlabel('Month')
plt.ylabel('Revenue ($)')
plt.xticks(range(len(monthly_sales)), monthly_sales['MonthName'].str[:3], rotation=45)
plt.grid(True, alpha=0.3)

# Monthly transaction count
plt.subplot(2, 2, 2)
plt.bar(range(len(monthly_sales)), monthly_sales['InvoiceCount'], alpha=0.7, color='orange')
plt.title('Monthly Transaction Count', fontsize=14, fontweight='bold')
plt.xlabel('Month')
plt.ylabel('Number of Invoices')
plt.xticks(range(len(monthly_sales)), monthly_sales['MonthName'].str[:3], rotation=45)

# Average transaction value
plt.subplot(2, 2, 3)
plt.plot(range(len(monthly_sales)), monthly_sales['AvgTransactionValue'], 
         marker='s', color='green', linewidth=2)
plt.title('Average Transaction Value Trend', fontsize=14, fontweight='bold')
plt.xlabel('Month')
plt.ylabel('Avg Transaction Value ($)')
plt.xticks(range(len(monthly_sales)), monthly_sales['MonthName'].str[:3], rotation=45)
plt.grid(True, alpha=0.3)

# Unique customers per month
plt.subplot(2, 2, 4)
plt.bar(range(len(monthly_sales)), monthly_sales['UniqueCustomers'], alpha=0.7, color='red')
plt.title('Unique Customers per Month', fontsize=14, fontweight='bold')
plt.xlabel('Month')
plt.ylabel('Unique Customers')
plt.xticks(range(len(monthly_sales)), monthly_sales['MonthName'].str[:3], rotation=45)

plt.tight_layout()
plt.show()

# Key insights
best_month = monthly_sales.loc[monthly_sales['TotalRevenue'].idxmax()]
worst_month = monthly_sales.loc[monthly_sales['TotalRevenue'].idxmin()]

print(f"\n🔍 Key Insights:")
print(f"   📈 Best performing month: {best_month['MonthName']} (${best_month['TotalRevenue']:,.2f})")
print(f"   📉 Lowest performing month: {worst_month['MonthName']} (${worst_month['TotalRevenue']:,.2f})")
print(f"   📊 Total annual revenue: ${monthly_sales['TotalRevenue'].sum():,.2f}")
print(f"   🛒 Average monthly transactions: {monthly_sales['InvoiceCount'].mean():.0f}")

In [None]:
# =============================================================================
# OLAP QUERY 2: PRODUCT CATEGORY ANALYSIS
# =============================================================================

print("\n" + "="*60)
print("OLAP ANALYSIS 2: PRODUCT CATEGORY PERFORMANCE")
print("="*60)

# Query 2a: Category performance analysis
category_analysis_query = """
SELECT 
    p.Category,
    COUNT(DISTINCT p.ProductKey) as ProductCount,
    COUNT(DISTINCT s.CustomerKey) as UniqueCustomers,
    SUM(s.Quantity) as TotalUnitsSold,
    ROUND(SUM(s.TotalSales), 2) as TotalRevenue,
    ROUND(AVG(s.UnitPrice), 2) as AvgUnitPrice,
    ROUND(AVG(s.TotalSales), 2) as AvgTransactionValue,
    ROUND(SUM(s.TotalSales) * 100.0 / (
        SELECT SUM(TotalSales) FROM SalesFact
    ), 2) as RevenuePercentage
FROM SalesFact s
JOIN ProductDim p ON s.ProductKey = p.ProductKey
GROUP BY p.Category
ORDER BY TotalRevenue DESC;
"""

category_analysis = pd.read_sql_query(category_analysis_query, conn)
print("📊 Product Category Performance:")
print(category_analysis)

# Visualization
plt.figure(figsize=(15, 12))

# Revenue by category
plt.subplot(2, 3, 1)
plt.pie(category_analysis['TotalRevenue'], labels=category_analysis['Category'], autopct='%1.1f%%')
plt.title('Revenue Distribution by Category', fontsize=14, fontweight='bold')

# Units sold by category
plt.subplot(2, 3, 2)
bars = plt.bar(category_analysis['Category'], category_analysis['TotalUnitsSold'], alpha=0.8)
plt.title('Units Sold by Category', fontsize=14, fontweight='bold')
plt.xlabel('Category')
plt.ylabel('Units Sold')
plt.xticks(rotation=45)
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
             f'{int(height)}', ha='center', va='bottom')

# Average unit price by category
plt.subplot(2, 3, 3)
plt.bar(category_analysis['Category'], category_analysis['AvgUnitPrice'], 
        alpha=0.8, color='orange')
plt.title('Average Unit Price by Category', fontsize=14, fontweight='bold')
plt.xlabel('Category')
plt.ylabel('Avg Unit Price ($)')
plt.xticks(rotation=45)

# Customer reach by category
plt.subplot(2, 3, 4)
plt.bar(category_analysis['Category'], category_analysis['UniqueCustomers'], 
        alpha=0.8, color='green')
plt.title('Customer Reach by Category', fontsize=14, fontweight='bold')
plt.xlabel('Category')
plt.ylabel('Unique Customers')
plt.xticks(rotation=45)

# Product diversity by category
plt.subplot(2, 3, 5)
plt.bar(category_analysis['Category'], category_analysis['ProductCount'], 
        alpha=0.8, color='red')
plt.title('Product Diversity by Category', fontsize=14, fontweight='bold')
plt.xlabel('Category')
plt.ylabel('Number of Products')
plt.xticks(rotation=45)

# Revenue percentage
plt.subplot(2, 3, 6)
plt.bar(category_analysis['Category'], category_analysis['RevenuePercentage'], 
        alpha=0.8, color='purple')
plt.title('Revenue Percentage by Category', fontsize=14, fontweight='bold')
plt.xlabel('Category')
plt.ylabel('Revenue %')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

# Top and bottom performing categories
top_category = category_analysis.iloc[0]
bottom_category = category_analysis.iloc[-1]

print(f"\n🔍 Category Analysis Insights:")
print(f"   🥇 Top category: {top_category['Category']} (${top_category['TotalRevenue']:,.2f}, {top_category['RevenuePercentage']}%)")
print(f"   🥉 Bottom category: {bottom_category['Category']} (${bottom_category['TotalRevenue']:,.2f}, {bottom_category['RevenuePercentage']}%)")
print(f"   📦 Most diverse category: {category_analysis.loc[category_analysis['ProductCount'].idxmax(), 'Category']} ({category_analysis['ProductCount'].max()} products)")
print(f"   💰 Highest avg price category: {category_analysis.loc[category_analysis['AvgUnitPrice'].idxmax(), 'Category']} (${category_analysis['AvgUnitPrice'].max():.2f})")

In [None]:
# =============================================================================
# OLAP QUERY 3: CUSTOMER GEOGRAPHIC ANALYSIS
# =============================================================================

print("\n" + "="*60)
print("OLAP ANALYSIS 3: CUSTOMER GEOGRAPHIC ANALYSIS")
print("="*60)

# Query 3a: Country-wise customer and sales analysis
geographic_analysis_query = """
SELECT 
    c.Country,
    COUNT(DISTINCT c.CustomerKey) as CustomerCount,
    COUNT(DISTINCT s.InvoiceNo) as InvoiceCount,
    SUM(s.Quantity) as TotalUnitsSold,
    ROUND(SUM(s.TotalSales), 2) as TotalRevenue,
    ROUND(AVG(s.TotalSales), 2) as AvgTransactionValue,
    ROUND(SUM(s.TotalSales) / COUNT(DISTINCT c.CustomerKey), 2) as RevenuePerCustomer,
    ROUND(SUM(s.TotalSales) * 100.0 / (
        SELECT SUM(TotalSales) FROM SalesFact
    ), 2) as RevenuePercentage
FROM CustomerDim c
JOIN SalesFact s ON c.CustomerKey = s.CustomerKey
GROUP BY c.Country
ORDER BY TotalRevenue DESC;
"""

geographic_analysis = pd.read_sql_query(geographic_analysis_query, conn)
print("🌍 Geographic Performance Analysis:")
print(geographic_analysis.head(10))

# Visualization
plt.figure(figsize=(16, 12))

# Top 10 countries by revenue
top_10_countries = geographic_analysis.head(10)

plt.subplot(2, 3, 1)
plt.barh(top_10_countries['Country'], top_10_countries['TotalRevenue'], alpha=0.8)
plt.title('Top 10 Countries by Revenue', fontsize=14, fontweight='bold')
plt.xlabel('Revenue ($)')
plt.gca().invert_yaxis()

# Customer count by country
plt.subplot(2, 3, 2)
plt.barh(top_10_countries['Country'], top_10_countries['CustomerCount'], 
         alpha=0.8, color='orange')
plt.title('Top 10 Countries by Customer Count', fontsize=14, fontweight='bold')
plt.xlabel('Number of Customers')
plt.gca().invert_yaxis()

# Revenue per customer
plt.subplot(2, 3, 3)
plt.barh(top_10_countries['Country'], top_10_countries['RevenuePerCustomer'], 
         alpha=0.8, color='green')
plt.title('Revenue per Customer by Country', fontsize=14, fontweight='bold')
plt.xlabel('Revenue per Customer ($)')
plt.gca().invert_yaxis()

# Revenue distribution pie chart
plt.subplot(2, 3, 4)
# Show top 5 countries and group others
top_5 = geographic_analysis.head(5)
others_revenue = geographic_analysis.iloc[5:]['TotalRevenue'].sum()
pie_data = list(top_5['TotalRevenue']) + [others_revenue]
pie_labels = list(top_5['Country']) + ['Others']
plt.pie(pie_data, labels=pie_labels, autopct='%1.1f%%')
plt.title('Revenue Distribution by Country', fontsize=14, fontweight='bold')

# Average transaction value
plt.subplot(2, 3, 5)
plt.barh(top_10_countries['Country'], top_10_countries['AvgTransactionValue'], 
         alpha=0.8, color='red')
plt.title('Avg Transaction Value by Country', fontsize=14, fontweight='bold')
plt.xlabel('Avg Transaction Value ($)')
plt.gca().invert_yaxis()

# Market share percentage
plt.subplot(2, 3, 6)
plt.barh(top_10_countries['Country'], top_10_countries['RevenuePercentage'], 
         alpha=0.8, color='purple')
plt.title('Market Share by Country (%)', fontsize=14, fontweight='bold')
plt.xlabel('Market Share (%)')
plt.gca().invert_yaxis()

plt.tight_layout()
plt.show()

# Geographic insights
top_country = geographic_analysis.iloc[0]
total_countries = len(geographic_analysis)
total_customers = geographic_analysis['CustomerCount'].sum()
total_revenue = geographic_analysis['TotalRevenue'].sum()

print(f"\n🔍 Geographic Analysis Insights:")
print(f"   🌟 Top market: {top_country['Country']} (${top_country['TotalRevenue']:,.2f}, {top_country['RevenuePercentage']}%)")
print(f"   🌍 Total markets: {total_countries} countries")
print(f"   👥 Total customers: {total_customers:,}")
print(f"   💰 Total revenue: ${total_revenue:,.2f}")
print(f"   📊 Avg revenue per country: ${total_revenue/total_countries:,.2f}")
print(f"   🎯 Highest value customers: {geographic_analysis.loc[geographic_analysis['RevenuePerCustomer'].idxmax(), 'Country']} (${geographic_analysis['RevenuePerCustomer'].max():.2f}/customer)")

In [None]:
# =============================================================================
# OLAP QUERY 4: ADVANCED MULTIDIMENSIONAL ANALYSIS
# =============================================================================

print("\n" + "="*60)
print("OLAP ANALYSIS 4: MULTIDIMENSIONAL CUBE ANALYSIS")
print("="*60)

# Query 4a: Category-Country-Quarter analysis (3D cube)
cube_analysis_query = """
SELECT 
    p.Category,
    c.Country,
    t.Quarter,
    t.Year,
    COUNT(DISTINCT s.InvoiceNo) as InvoiceCount,
    SUM(s.Quantity) as TotalQuantity,
    ROUND(SUM(s.TotalSales), 2) as TotalRevenue,
    ROUND(AVG(s.TotalSales), 2) as AvgTransactionValue
FROM SalesFact s
JOIN ProductDim p ON s.ProductKey = p.ProductKey
JOIN CustomerDim c ON s.CustomerKey = c.CustomerKey
JOIN TimeDim t ON s.TimeKey = t.TimeKey
GROUP BY p.Category, c.Country, t.Quarter, t.Year
HAVING TotalRevenue > 1000  -- Filter for significant combinations
ORDER BY TotalRevenue DESC
LIMIT 20;
"""

cube_analysis = pd.read_sql_query(cube_analysis_query, conn)
print("📊 Top 20 Category-Country-Quarter Combinations:")
print(cube_analysis)

# Query 4b: Drill-down analysis - Best performing category per country
drill_down_query = """
WITH CategoryCountryRanking AS (
    SELECT 
        c.Country,
        p.Category,
        ROUND(SUM(s.TotalSales), 2) as CategoryRevenue,
        COUNT(DISTINCT s.CustomerKey) as UniqueCustomers,
        ROW_NUMBER() OVER (PARTITION BY c.Country ORDER BY SUM(s.TotalSales) DESC) as CategoryRank
    FROM SalesFact s
    JOIN ProductDim p ON s.ProductKey = p.ProductKey
    JOIN CustomerDim c ON s.CustomerKey = c.CustomerKey
    GROUP BY c.Country, p.Category
)
SELECT 
    Country,
    Category as TopCategory,
    CategoryRevenue,
    UniqueCustomers
FROM CategoryCountryRanking
WHERE CategoryRank = 1
ORDER BY CategoryRevenue DESC;
"""

drill_down = pd.read_sql_query(drill_down_query, conn)
print("\n🎯 Best Performing Category per Country:")
print(drill_down.head(15))

# Query 4c: Roll-up analysis - Quarterly performance summary
rollup_query = """
SELECT 
    t.Year,
    t.Quarter,
    COUNT(DISTINCT c.Country) as CountriesActive,
    COUNT(DISTINCT p.Category) as CategoriesActive,
    COUNT(DISTINCT s.CustomerKey) as UniqueCustomers,
    COUNT(DISTINCT s.InvoiceNo) as TotalInvoices,
    SUM(s.Quantity) as TotalQuantity,
    ROUND(SUM(s.TotalSales), 2) as TotalRevenue,
    ROUND(AVG(s.TotalSales), 2) as AvgTransactionValue
FROM SalesFact s
JOIN ProductDim p ON s.ProductKey = p.ProductKey
JOIN CustomerDim c ON s.CustomerKey = c.CustomerKey
JOIN TimeDim t ON s.TimeKey = t.TimeKey
GROUP BY t.Year, t.Quarter
ORDER BY t.Year, t.Quarter;
"""

rollup_analysis = pd.read_sql_query(rollup_query, conn)
print("\n📈 Quarterly Performance Roll-up:")
print(rollup_analysis)

# Visualization for multidimensional analysis
plt.figure(figsize=(16, 10))

# Quarterly revenue trend
plt.subplot(2, 3, 1)
rollup_analysis['Quarter_Label'] = 'Q' + rollup_analysis['Quarter'].astype(str) + ' ' + rollup_analysis['Year'].astype(str)
plt.plot(range(len(rollup_analysis)), rollup_analysis['TotalRevenue'], marker='o', linewidth=3)
plt.title('Quarterly Revenue Trend', fontsize=14, fontweight='bold')
plt.xlabel('Quarter')
plt.ylabel('Revenue ($)')
plt.xticks(range(len(rollup_analysis)), rollup_analysis['Quarter_Label'], rotation=45)
plt.grid(True, alpha=0.3)

# Countries active per quarter
plt.subplot(2, 3, 2)
plt.bar(range(len(rollup_analysis)), rollup_analysis['CountriesActive'], alpha=0.8, color='orange')
plt.title('Active Countries per Quarter', fontsize=14, fontweight='bold')
plt.xlabel('Quarter')
plt.ylabel('Number of Countries')
plt.xticks(range(len(rollup_analysis)), rollup_analysis['Quarter_Label'], rotation=45)

# Top categories by country (showing top 10)
plt.subplot(2, 3, 3)
top_categories = drill_down.head(10)
plt.barh(top_categories['Country'], top_categories['CategoryRevenue'], alpha=0.8, color='green')
plt.title('Top Category Revenue by Country', fontsize=14, fontweight='bold')
plt.xlabel('Revenue ($)')
plt.gca().invert_yaxis()

# Customer engagement per quarter
plt.subplot(2, 3, 4)
plt.plot(range(len(rollup_analysis)), rollup_analysis['UniqueCustomers'], 
         marker='s', color='red', linewidth=3)
plt.title('Customer Engagement per Quarter', fontsize=14, fontweight='bold')
plt.xlabel('Quarter')
plt.ylabel('Unique Customers')
plt.xticks(range(len(rollup_analysis)), rollup_analysis['Quarter_Label'], rotation=45)
plt.grid(True, alpha=0.3)

# Average transaction value per quarter
plt.subplot(2, 3, 5)
plt.bar(range(len(rollup_analysis)), rollup_analysis['AvgTransactionValue'], 
        alpha=0.8, color='purple')
plt.title('Avg Transaction Value per Quarter', fontsize=14, fontweight='bold')
plt.xlabel('Quarter')
plt.ylabel('Avg Transaction Value ($)')
plt.xticks(range(len(rollup_analysis)), rollup_analysis['Quarter_Label'], rotation=45)

# Category diversity per country (showing sample)
plt.subplot(2, 3, 6)
category_diversity = drill_down['TopCategory'].value_counts().head(7)
plt.pie(category_diversity.values, labels=category_diversity.index, autopct='%1.1f%%')
plt.title('Top Category Distribution Across Countries', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

# Multidimensional insights
best_quarter = rollup_analysis.loc[rollup_analysis['TotalRevenue'].idxmax()]
most_diverse_quarter = rollup_analysis.loc[rollup_analysis['CategoriesActive'].idxmax()]
top_combo = cube_analysis.iloc[0]

print(f"\n🔍 Multidimensional Analysis Insights:")
print(f"   🏆 Best quarter: Q{best_quarter['Quarter']} {best_quarter['Year']} (${best_quarter['TotalRevenue']:,.2f})")
print(f"   🌟 Most diverse quarter: Q{most_diverse_quarter['Quarter']} {most_diverse_quarter['Year']} ({most_diverse_quarter['CategoriesActive']} categories)")
print(f"   🎯 Top combination: {top_combo['Category']} in {top_combo['Country']} Q{top_combo['Quarter']} (${top_combo['TotalRevenue']:,.2f})")
print(f"   📊 Avg quarterly revenue: ${rollup_analysis['TotalRevenue'].mean():,.2f}")
print(f"   🌍 Most global reach: Q{rollup_analysis.loc[rollup_analysis['CountriesActive'].idxmax(), 'Quarter']} {rollup_analysis.loc[rollup_analysis['CountriesActive'].idxmax(), 'Year']} ({rollup_analysis['CountriesActive'].max()} countries)")

In [None]:
# =============================================================================
# OLAP QUERY 5: BUSINESS INTELLIGENCE SUMMARY
# =============================================================================

print("\n" + "="*70)
print("BUSINESS INTELLIGENCE SUMMARY & RECOMMENDATIONS")
print("="*70)

# Summary statistics query
summary_query = """
SELECT 
    -- Overall business metrics
    COUNT(DISTINCT s.CustomerKey) as TotalCustomers,
    COUNT(DISTINCT s.InvoiceNo) as TotalInvoices,
    COUNT(DISTINCT p.ProductKey) as TotalProducts,
    COUNT(DISTINCT c.Country) as TotalCountries,
    COUNT(DISTINCT p.Category) as TotalCategories,
    
    -- Financial metrics
    ROUND(SUM(s.TotalSales), 2) as TotalRevenue,
    ROUND(AVG(s.TotalSales), 2) as AvgTransactionValue,
    ROUND(MIN(s.TotalSales), 2) as MinTransactionValue,
    ROUND(MAX(s.TotalSales), 2) as MaxTransactionValue,
    
    -- Operational metrics
    SUM(s.Quantity) as TotalUnitsSold,
    ROUND(AVG(s.Quantity), 2) as AvgQuantityPerTransaction,
    ROUND(AVG(s.UnitPrice), 2) as AvgUnitPrice
    
FROM SalesFact s
JOIN ProductDim p ON s.ProductKey = p.ProductKey
JOIN CustomerDim c ON s.CustomerKey = c.CustomerKey;
"""

summary_stats = pd.read_sql_query(summary_query, conn).iloc[0]

# Customer lifetime value analysis
customer_value_query = """
SELECT 
    AVG(CustomerRevenue) as AvgCustomerLifetimeValue,
    MIN(CustomerRevenue) as MinCustomerValue,
    MAX(CustomerRevenue) as MaxCustomerValue,
    COUNT(*) as TotalCustomers
FROM (
    SELECT 
        s.CustomerKey,
        SUM(s.TotalSales) as CustomerRevenue
    FROM SalesFact s
    GROUP BY s.CustomerKey
) customer_summary;
"""

customer_value = pd.read_sql_query(customer_value_query, conn).iloc[0]

# Product performance metrics
product_performance_query = """
SELECT 
    AVG(ProductRevenue) as AvgProductRevenue,
    MIN(ProductRevenue) as MinProductRevenue,
    MAX(ProductRevenue) as MaxProductRevenue,
    COUNT(*) as TotalProducts
FROM (
    SELECT 
        s.ProductKey,
        SUM(s.TotalSales) as ProductRevenue
    FROM SalesFact s
    GROUP BY s.ProductKey
) product_summary;
"""

product_performance = pd.read_sql_query(product_performance_query, conn).iloc[0]

# Display comprehensive business summary
print("\n📊 COMPREHENSIVE BUSINESS METRICS")
print("=" * 50)
print(f"📈 Business Scale:")
print(f"   • Total Customers: {summary_stats['TotalCustomers']:,}")
print(f"   • Total Transactions: {summary_stats['TotalInvoices']:,}")
print(f"   • Total Products: {summary_stats['TotalProducts']:,}")
print(f"   • Geographic Reach: {summary_stats['TotalCountries']} countries")
print(f"   • Product Categories: {summary_stats['TotalCategories']}")

print(f"\n💰 Financial Performance:")
print(f"   • Total Revenue: ${summary_stats['TotalRevenue']:,.2f}")
print(f"   • Average Transaction Value: ${summary_stats['AvgTransactionValue']:,.2f}")
print(f"   • Transaction Range: ${summary_stats['MinTransactionValue']:,.2f} - ${summary_stats['MaxTransactionValue']:,.2f}")
print(f"   • Customer Lifetime Value: ${customer_value['AvgCustomerLifetimeValue']:,.2f}")
print(f"   • Customer Value Range: ${customer_value['MinCustomerValue']:,.2f} - ${customer_value['MaxCustomerValue']:,.2f}")

print(f"\n📦 Operational Metrics:")
print(f"   • Total Units Sold: {summary_stats['TotalUnitsSold']:,}")
print(f"   • Average Units per Transaction: {summary_stats['AvgQuantityPerTransaction']:.1f}")
print(f"   • Average Unit Price: ${summary_stats['AvgUnitPrice']:,.2f}")
print(f"   • Average Product Revenue: ${product_performance['AvgProductRevenue']:,.2f}")
print(f"   • Product Revenue Range: ${product_performance['MinProductRevenue']:,.2f} - ${product_performance['MaxProductRevenue']:,.2f}")

# Calculate key ratios
revenue_per_customer = summary_stats['TotalRevenue'] / summary_stats['TotalCustomers']
transactions_per_customer = summary_stats['TotalInvoices'] / summary_stats['TotalCustomers']
products_per_transaction = summary_stats['TotalProducts'] / summary_stats['TotalInvoices']

print(f"\n📊 Key Business Ratios:")
print(f"   • Revenue per Customer: ${revenue_per_customer:.2f}")
print(f"   • Transactions per Customer: {transactions_per_customer:.1f}")
print(f"   • Products per Transaction: {products_per_transaction:.1f}")

print(f"\n🎯 STRATEGIC RECOMMENDATIONS")
print("=" * 50)
print("1. 🚀 Growth Opportunities:")
print("   • Focus on expanding in top-performing geographic markets")
print("   • Increase product diversity in high-revenue categories")
print("   • Target customer acquisition in Q2-Q3 for seasonal boost")

print("\n2. 💡 Optimization Areas:")
print("   • Improve average transaction value through cross-selling")
print("   • Enhance customer retention in lower-performing regions")
print("   • Optimize inventory for seasonal demand patterns")

print("\n3. 📈 Performance Monitoring:")
print("   • Track monthly revenue trends for early warning signals")
print("   • Monitor customer lifetime value progression")
print("   • Analyze category performance for portfolio optimization")

print("\n" + "="*70)
print("✅ OLAP ANALYSIS COMPLETED SUCCESSFULLY")
print("Data warehouse provides comprehensive business intelligence capabilities")
print("All OLAP operations (drill-down, roll-up, slice, dice) demonstrated")
print("="*70)

# Close database connection
conn.close()
print("\n🔐 Database connection closed.")