Create a report that shows summarized number of sales and revenue for each state 

In [0]:
spark.sql("""      
SELECT
    -- Column 1: Select the state or province from the customer dimension table
    c.cust_address_state_province AS states,
    -- Column 2: Count of order IDs (Total Amount of Sales)
    COUNT(f.order_id) AS total_amount_of_sales,
    -- Column 3: Sum of revenue (Unit Price * Quantity) rounded to two decimal places (Total Revenue)
    ROUND(SUM(f.unit_price * f.quantity),2) AS total_revenue
FROM
-- Fact table: stoyan.fact_orders (contains order data)
    stoyan.fact_orders f
-- Join the fact table with the customer dimension table
LEFT JOIN    
    -- Dimension table: stoyan.dim_customers (contains customer data)
    stoyan.dim_customers c 
-- Join condition: match the natural key for customer (cust_nk)
ON 
    f.cust_nk = c.cust_nk
-- Group the result by the customer state/province (cust_address_state_province)
GROUP BY
    c.cust_address_state_province
-- Order the results by total revenue in descending order
ORDER BY
    total_revenue DESC          

""").display()

states,total_amount_of_sales,total_revenue
WI,16,110529.4
Indiana,3,89409.2
,28,27530.7
IN,4,14080.0
Colorado,3,13443.2
Kansas,3,6237.0
IA,3,5252.0
South Carolina,3,5252.0
Michigan,3,1440.0
North Dakota,1,
