# Task 3: OLAP Queries and Analysis

This notebook executes three OLAP-style SQL queries on the retail data warehouse (`retail_dw.db`) created in Task 2, visualizes the roll-up query result (total sales by country) as a bar chart saved as `task3_sales_by_country.png`, and logs the process. The queries support analysis of sales trends, customer behavior, and product performance.

**Queries**:
1. Roll-up: Total sales by country and quarter.
2. Drill-down: Sales details for the UK by month.
3. Slice: Total sales for electronics (inferred from Description keywords).

In [11]:
# Imports and Setup
import sqlite3
import seaborn as sns
import logging
import os

# Configure logging to file and console
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('task3_olap.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Configure matplotlib and seaborn for better visualizations
plt.style.use('default')
sns.set_palette("husl")

def execute_query(db_path, query, query_name):
    """Execute SQL query and return DataFrame with error handling"""
    try:
        if not os.path.exists(db_path):
            logger.error(f"Database file {db_path} does not exist!")
            return None
            
        with sqlite3.connect(db_path) as conn:
            logger.info(f"Executing {query_name} query...")
            df = pd.read_sql_query(query, conn)
            logger.info(f"{query_name} query executed successfully. Returned {len(df)} rows.")
            return df
            
    except sqlite3.Error as e:
        logger.error(f"Database error in {query_name} query: {e}")
        return None
    except Exception as e:
        logger.error(f"Unexpected error in {query_name} query: {e}")
        return None

In [3]:
# Execute OLAP queries
db_path = 'retail_dw.db'

# Query 1: Roll-up - Total sales by country and quarter
query1 = """
SELECT 
    s.Country,
    t.Quarter,
    t.Year,
    SUM(s.TotalSales) as TotalSales,
    COUNT(DISTINCT s.InvoiceNo) as TotalInvoices
FROM SalesFact s
JOIN TimeDim t ON s.DateKey = t.DateKey
GROUP BY s.Country, t.Quarter, t.Year
ORDER BY t.Year, t.Quarter, s.Country;
"""
df_rollup = execute_query(db_path, query1, "Roll-up")
if df_rollup is not None:
    logger.info(f"Roll-up query result (first 5 rows):\n{df_rollup.head().to_string()}")

# Query 2: Drill-down - Sales details for UK by month
query2 = """
SELECT 
    t.Year,
    t.Month,
    s.InvoiceNo,
    s.StockCode,
    s.Description,
    s.Quantity,
    s.UnitPrice,
    s.TotalSales
FROM SalesFact s
JOIN TimeDim t ON s.DateKey = t.DateKey
WHERE s.Country = 'UK'
ORDER BY t.Year, t.Month, s.InvoiceNo;
"""
df_drilldown = execute_query(db_path, query2, "Drill-down")
if df_drilldown is not None:
    logger.info(f"Drill-down query result (first 5 rows):\n{df_drilldown.head().to_string()}")

# Query 3: Slice - Total sales for electronics category
# Note: Since SalesFact lacks a category column, infer electronics from Description
query3 = """
SELECT 
    t.Year,
    t.Quarter,
    SUM(s.TotalSales) as TotalElectronicsSales,
    COUNT(DISTINCT s.InvoiceNo) as TotalInvoices
FROM SalesFact s
JOIN TimeDim t ON s.DateKey = t.DateKey
WHERE s.Description LIKE '%device%' 
   OR s.Description LIKE '%electronic%' 
   OR s.Description LIKE '%gadget%'
GROUP BY t.Year, t.Quarter
ORDER BY t.Year, t.Quarter;
"""
df_slice = execute_query(db_path, query3, "Slice")
if df_slice is not None:
    logger.info(f"Slice query result (first 5 rows):\n{df_slice.head().to_string()}")

2025-08-14 16:00:13,132 - INFO - Executing Roll-up query...
2025-08-14 16:00:13,151 - INFO - Roll-up query executed successfully. Returned 25 rows.
2025-08-14 16:00:13,160 - INFO - Roll-up query result (first 5 rows):
   Country  Quarter  Year  TotalSales  TotalInvoices
0   France        3  2024    15701.71             10
1  Germany        3  2024     9427.30              9
2    Japan        3  2024    20189.47             17
3       UK        3  2024    12930.46             10
4      USA        3  2024    11853.68              7
2025-08-14 16:00:13,162 - INFO - Executing Drill-down query...
2025-08-14 16:00:13,165 - INFO - Drill-down query executed successfully. Returned 99 rows.
2025-08-14 16:00:13,168 - INFO - Drill-down query result (first 5 rows):
   Year  Month  InvoiceNo   StockCode                      Description  Quantity  UnitPrice  TotalSales
0  2024      8  INV-25343  SKU-767519       Assimilated 24hour support        24       1.70       40.80
1  2024      8  INV-47498  SK