In [0]:
spark.sql("""
WITH grouped_customers AS (
    SELECT
        c.cust_sk,                                                              -- Customer surrogate key
        c.cust_nk,                                                              -- Customer natural key
        c.cust_first_name,                                                      -- Customer's first name
        c.cust_last_name,                                                       -- Customer's last name
        c.cust_address_state_province,                                          -- Customer's state or province
        c.date_of_birth,                                                        -- Customer's date of birth
        CASE
            WHEN YEAR(CURRENT_DATE) - YEAR(c.date_of_birth) <= 30 THEN '0-30'   -- Age group 0-30 years
            WHEN YEAR(CURRENT_DATE) - YEAR(c.date_of_birth) <= 60 THEN '30-60'  -- Age group 30-60 years
            ELSE '60+'                                                          -- Age group 60+ years
        END AS age_group                                                        -- Determining the age group
    FROM
        stoyan.dim_customers c
    WHERE
        c.row_num = 1                                                           -- Only the most recent row for each customer
        AND  c.cust_address_state_province = 'NY'                               -- Filter customers from New York
),
 sales_by_categ AS (
    SELECT
        f.cust_sk,                                                              -- Customer surrogate key
        f.cust_nk,                                                              -- Customer natural key
        f.order_id,                                                             -- Order identifier
        f.order_date,                                                           -- Date of the order
        f.prod_nk,                                                              -- Product natural key
        p.category_name,                                                        -- Product category name
        f.unit_price * f.quantity AS sales_amount,                              -- Calculating the sales amount
        c.age_group                                                             -- Customer's age group 
    FROM
        stoyan.fact_orders f
    LEFT JOIN
        grouped_customers c ON f.cust_sk = c.cust_sk                            -- Join on customer surrogate key
    LEFT JOIN
        stoyan.dim_products p ON f.prod_sk = p.prod_sk AND row_num = 1          -- Join on product surrogate key, ensuring the most recent product info
)
SELECT
    category_name,-- Product category
    DATE_FORMAT(order_date, 'yyyy-MM') AS month,                                -- Extracting year-month format from order date
    age_group,                                                                  -- Customer's age group
    ROUND(SUM(sales_amount),2) AS total_sales                                   -- Total sales amount 
FROM
    sales_by_categ                                                              -- Data from the sales_by_categ CTE
GROUP BY
    category_name,                                                              -- Grouping by product category
    month,                                                                      -- Grouping by month
    age_group                                                                   -- Grouping by age group
ORDER BY
    category_name DESC,                                                         -- Sorting by product category 
    month ASC,                                                                  -- Sorting by month
    age_group                                                                   -- Sorting by age group
    
""").display()

category_name,month,age_group,total_sales
hardware4,2007-03,,6644.0
hardware4,2007-08,,1440.0
hardware2,2007-05,,11233.2
hardware2,2007-06,,2795.1
hardware1,2006-01,,12012.0
hardware1,2007-09,,79741.2
,2006-01,,3572.0
,2006-02,,
,2007-02,,1684.0
,2007-05,,465.0
