
Create a report that shows top 3 customers for each state based on the  
spent amount and their most expensive item bought 


In [0]:
spark.sql("""
WITH cust_spend AS (
    SELECT
        c.cust_address_state_province AS state,                 -- Customer's state or province
        f.cust_nk,                                              -- Customer's natural key (unique identifier)
        c.cust_first_name,                                      -- Customer's first name
        c.cust_last_name,                                       -- Customer's last name
        ROUND(SUM(f.unit_price * f.quantity),2) AS total_spent, -- Total spending calculation
        MAX(f.unit_price) AS most_exp_item                      -- Most expensive item purchased
    FROM
        stoyan.fact_orders f                                    -- Fact table containing order details
    JOIN
        stoyan.dim_customers c ON f.cust_sk = c.cust_sk AND row_num = 1 -- Joining with the latest customer information
    GROUP BY 1,2,3,4                                            -- Grouping by state and customer
    ORDER BY total_spent DESC                                   -- Ordering by total spending (descending order)
),
customers_rank AS (
    SELECT
        state,
        cust_nk,
        cust_first_name,
        cust_last_name,
        total_spent,
        most_exp_item,
        ROW_NUMBER() OVER (PARTITION BY state ORDER BY total_spent DESC) AS rank -- Ranking customers within each state by total_spent
    FROM
        cust_spend                                              -- Data from the cust_spend CTE
)
SELECT
    state,
    cust_nk,
    cust_first_name,
    cust_last_name,
    total_spent,
    most_exp_item
FROM
    customers_rank                                              -- Data from the customers_rank CTE
WHERE
    rank <= 3                                                   -- Only select the top 3 customers from each state
ORDER BY
    state,                                                      -- Order by state
    rank,                                                       -- Order by rank within the state
    most_exp_item                                               -- Order by most expensive item within the same rank
    
""").display()

state,cust_nk,cust_first_name,cust_last_name,total_spent,most_exp_item
IA,144,Sivaji,Landis,5252.0,74.0
IN,102,Harrison,Pacino,14080.0,880.0
IN,101,Constantin,Welles,,
WI,149,Markus,Rampling,89409.2,949.3
WI,146,Elia,Fawcett,13443.2,488.4
WI,145,Mammutti,Pacino,6237.0,115.5
