# 🧪 TPC-DS Benchmark Queries 1–5
Databricks SQL with Unity Catalog via setup notebook

In [0]:
%run ./specify_catalog_schema

In [0]:
%sql
-- Query 1: Top Customers by Return Amount
-- TPC-DS Query 1
WITH customer_total_return AS (
    SELECT
        sr_customer_sk AS ctr_customer_sk,
        sr_store_sk AS ctr_store_sk,
        SUM(sr_return_amt) AS ctr_total_return
    FROM store_returns, date_dim
    WHERE sr_returned_date_sk = d_date_sk
      AND d_year = 2000
    GROUP BY sr_customer_sk, sr_store_sk
)
SELECT c_customer_id
FROM customer_total_return ctr1,
     store,
     customer
WHERE ctr1.ctr_total_return > (
        SELECT AVG(ctr_total_return) * 1.2
        FROM customer_total_return ctr2
        WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk
    )
  AND s_store_sk = ctr1.ctr_store_sk
  AND s_state = 'TN'
  AND ctr1.ctr_customer_sk = c_customer_sk
ORDER BY c_customer_id
LIMIT 100;

In [0]:
%sql
-- Query 2: Revenue Ratio by Item Class
-- TPC-DS Query 2
SELECT
    i_item_desc,
    i_category,
    i_class,
    i_current_price,
    SUM(ss_ext_sales_price) AS itemrevenue,
    SUM(ss_ext_sales_price) * 100 / SUM(SUM(ss_ext_sales_price)) OVER (PARTITION BY i_class) AS revenueratio
FROM store_sales, item, date_dim
WHERE ss_item_sk = i_item_sk
  AND ss_sold_date_sk = d_date_sk
  AND d_year = 1999
  AND i_category IN ('Books', 'Children', 'Electronics')
GROUP BY
    i_item_id, i_item_desc, i_category, i_class, i_current_price
ORDER BY
    i_category, i_class, i_item_id, i_item_desc, revenueratio
LIMIT 100;

In [0]:
%sql
-- Query 3: Brand Revenue in November
-- TPC-DS Query 3
SELECT
    d_year,
    i_brand_id AS brand_id,
    i_brand AS brand,
    SUM(ss_ext_sales_price) AS sum_agg
FROM date_dim, store_sales, item
WHERE d_date_sk = ss_sold_date_sk
  AND ss_item_sk = i_item_sk
  AND i_manufact_id = 128
  AND d_moy = 11
GROUP BY d_year, i_brand, i_brand_id
ORDER BY d_year, sum_agg DESC, brand_id
LIMIT 100;

In [0]:
%sql
-- Query 4: Top Customers by Net Profit
-- TPC-DS Query 4
WITH year_total AS (
    SELECT
        c_customer_id AS customer_id,
        c_first_name AS customer_first_name,
        c_last_name AS customer_last_name,
        c_preferred_cust_flag AS customer_preferred_cust_flag,
        c_birth_country AS customer_birth_country,
        c_login AS customer_login,
        c_email_address AS customer_email_address,
        d_year AS dyear,
        SUM(ss_net_profit) AS year_total,
        's' AS sale_type
    FROM customer, store_sales, date_dim
    WHERE c_customer_sk = ss_customer_sk
      AND ss_sold_date_sk = d_date_sk
    GROUP BY c_customer_id, c_first_name, c_last_name,
             c_preferred_cust_flag, c_birth_country,
             c_login, c_email_address, d_year
    UNION ALL
    SELECT
        c_customer_id,
        c_first_name,
        c_last_name,
        c_preferred_cust_flag,
        c_birth_country,
        c_login,
        c_email_address,
        d_year,
        SUM(ws_net_profit),
        'w'
    FROM customer, web_sales, date_dim
    WHERE c_customer_sk = ws_bill_customer_sk
      AND ws_sold_date_sk = d_date_sk
    GROUP BY c_customer_id, c_first_name, c_last_name,
             c_preferred_cust_flag, c_birth_country,
             c_login, c_email_address, d_year
)
SELECT
    customer_id,
    customer_first_name,
    customer_last_name,
    customer_preferred_cust_flag,
    customer_birth_country,
    customer_login,
    customer_email_address,
    dyear,
    SUM(year_total) AS year_total
FROM year_total
GROUP BY customer_id, customer_first_name, customer_last_name,
         customer_preferred_cust_flag, customer_birth_country,
         customer_login, customer_email_address, dyear
ORDER BY dyear, year_total DESC, customer_id
LIMIT 100;

In [0]:
%sql
-- Query 5: Frequent Customer Transactions
-- TPC-DS Query 5
SELECT
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag,
    ss_ticket_number,
    cnt
FROM customer, store_sales,
     (SELECT ss_ticket_number AS tkt,
             ss_customer_sk AS csk,
             COUNT(*) AS cnt
      FROM store_sales
      GROUP BY ss_ticket_number, ss_customer_sk
      HAVING COUNT(*) > 1) sls
WHERE ss_ticket_number = tkt
  AND ss_customer_sk = csk
  AND ss_customer_sk = c_customer_sk
ORDER BY cnt DESC, c_last_name, c_first_name, ss_ticket_number
LIMIT 100;