In [0]:
-- Exploratory Data Analysis (EDA) on Bronze

-- Preview products
SELECT * FROM bronze_products LIMIT 10;

-- Preview orders
SELECT * FROM bronze_orders LIMIT 10;

-- Check for NULLs or missing values in orders
SELECT 
  COUNT(*) AS total_rows,
  SUM(CASE WHEN product_id IS NULL THEN 1 ELSE 0 END) AS null_product_id,
  SUM(CASE WHEN order_date IS NULL THEN 1 ELSE 0 END) AS null_order_date,
  SUM(CASE WHEN sales_rep IS NULL THEN 1 ELSE 0 END) AS null_sales_rep
FROM bronze_orders;

-- Status distribution
SELECT status, COUNT(*) AS count
FROM bronze_orders
GROUP BY status;

-- Join preview: order with product name and category
SELECT o.order_id, o.order_date, p.product_name, p.category, o.quantity, o.unit_price, o.status, o.sales_rep
FROM bronze_orders o
LEFT JOIN bronze_products p
  ON o.product_id = p.product_id
LIMIT 15;

-- silver - Clean & Transform
-- Clean orders: remove null product_id or order_date, cast types, normalize columns
CREATE OR REPLACE TABLE silver_orders_products AS
SELECT
  o.order_id,
  o.customer_id,
  o.product_id,
  p.product_name,
  p.category,
  p.subcategory,
  CAST(o.order_date AS DATE) AS order_date,
  DAY(o.order_date) AS order_day,
  MONTH(o.order_date) AS order_month,
  YEAR(o.order_date) AS order_year,
  CAST(o.quantity AS INT) AS quantity,
  CAST(o.unit_price AS DOUBLE) AS unit_price,
  o.status,
  o.sales_rep,
  o.quantity * o.unit_price AS total_value
FROM bronze_orders o
JOIN bronze_products p
  ON o.product_id = p.product_id
WHERE o.product_id IS NOT NULL
  AND o.order_date IS NOT NULL;


SELECT * FROM silver_orders_products LIMIT 10


-- Analysis queries (using silver_orders_products )

SELECT * 
FROM silver_orders_products
WHERE category = :selected_category
AND order_date BETWEEN DATE(:start_date) AND DATE(:end_date);


WITH filtered_data AS (
  
SELECT * 
FROM silver_orders_products
WHERE category = :selected_category
AND order_date BETWEEN DATE(:start_date) AND DATE(:end_date))

SELECT 
sales_rep,
category,
SUM(total_value) AS total_revenue
FROM filtered_data
GROUP BY sales_rep, category
ORDER BY total_revenue DESC;


/*  Pivot example (status counts per sales rep)
A pivot table is a way to summarize and reorganize data to make it easier to analyze.
It rotates rows into columns, grouping data by categories and aggregating values like sums or counts. */




SELECT sales_rep, COALESCE(`Shipped`, 0) as shipped_count, COALESCE(`Pending`, 0) as pending_count, COALESCE(`Cancelled`, 0) as canceled_count 
FROM (

  SELECT sales_rep, status
  FROM silver_orders_products
  WHERE order_date BETWEEN DATE(:start_date) AND DATE(:end_date)
)

PIVOT(
  COUNT(status) FOR status IN ('Shipped', 'Pending', 'Cancelled' )
)
ORDER BY sales_rep;


/* Window function — Rank products by revenue within category
 Window functions perform calculations across a "window" or set of rows related to the current row, without collapsing the rows into a single result.
They let you do things like ranking, running totals, moving averages, and comparisons between rows while keeping all the rows visible. */

WITH filtered_data AS (
  
SELECT * 
FROM silver_orders_products
WHERE category = :selected_category
AND order_date BETWEEN DATE(:start_date) AND DATE(:end_date))

SELECT 
category,
product_name,
SUM(total_value) AS revenue,
RANK() OVER (PARTITION BY category ORDER BY SUM(total_value) DESC) AS rank_in_category
FROM filtered_data
GROUP BY category, product_name
ORDER BY category, rank_in_category;


SELECT * FROM silver_orders_products;


-- Summary statistics
CREATE OR REPLACE TABLE sales_summary_gold AS
SELECT 
  category,
  sales_rep,
  COUNT(DISTINCT order_id) AS order_count,
  SUM(total_value) AS total_sales,
  ROUND(AVG(total_value), 2) AS avg_order_value,
  MIN(total_value) AS min_order_value,
  MAX(total_value) AS max_order_value
FROM silver_orders_products
GROUP BY category, sales_rep
ORDER BY category, total_sales DESC;

-- visualisations

SELECT category, SUM(total_value) as total_sales
FROM silver_orders_products
GROUP BY category
ORDER BY total_sales DESC;


-- order ststus distribution

SELECT sales_rep, status, COUNT(*) AS order_count
FROM silver_orders_products
GROUP BY sales_rep, status
ORDER BY sales_rep;








Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.