# 🧪 TPC-DS Benchmark Queries 21–25
Databricks SQL using `%run ./specify_catalog_schema`

In [0]:
%run ./specify_catalog_schema

In [0]:
%sql
-- Query 21: Inventory Evaluation
-- TPC-DS Query 21
SELECT
    w_warehouse_name,
    i_item_id,
    SUM(CASE WHEN d_date < '2000-05-13' THEN inv_quantity_on_hand ELSE 0 END) AS inv_before,
    SUM(CASE WHEN d_date >= '2000-05-13' THEN inv_quantity_on_hand ELSE 0 END) AS inv_after
FROM inventory
JOIN warehouse ON inv_warehouse_sk = w_warehouse_sk
JOIN item ON inv_item_sk = i_item_sk
JOIN date_dim ON inv_date_sk = d_date_sk
WHERE i_current_price BETWEEN 0.99 AND 1.49
  AND d_date BETWEEN DATE_SUB('2000-05-13', 30) AND DATE_ADD('2000-05-13', 30)
GROUP BY w_warehouse_name, i_item_id
HAVING CASE WHEN SUM(CASE WHEN d_date < '2000-05-13' THEN inv_quantity_on_hand ELSE 0 END) > 0
             THEN SUM(CASE WHEN d_date >= '2000-05-13' THEN inv_quantity_on_hand ELSE 0 END) /
                  SUM(CASE WHEN d_date < '2000-05-13' THEN inv_quantity_on_hand ELSE 0 END)
             ELSE NULL END BETWEEN 2.0/3.0 AND 3.0/2.0
ORDER BY w_warehouse_name, i_item_id
LIMIT 100;

In [0]:
%sql
-- Query 22: Average Inventory Quantity on Hand
-- TPC-DS Query 22
SELECT
    i_product_name,
    i_brand,
    i_class,
    i_category,
    AVG(inv_quantity_on_hand) AS qoh
FROM inventory
JOIN date_dim ON inv_date_sk = d_date_sk
JOIN item ON inv_item_sk = i_item_sk
JOIN warehouse ON inv_warehouse_sk = w_warehouse_sk
WHERE d_month_seq BETWEEN 1205 AND 1216
GROUP BY ROLLUP(i_product_name, i_brand, i_class, i_category)
ORDER BY qoh, i_product_name, i_brand, i_class, i_category
LIMIT 100;

In [0]:
%sql
-- Query 23: Frequent Store Sales Items
-- TPC-DS Query 23
WITH frequent_ss_items AS (
    SELECT
        SUBSTR(i_item_desc, 1, 30) AS itemdesc,
        i_item_sk AS item_sk,
        d_date AS solddate,
        COUNT(*) AS cnt
    FROM store_sales
    JOIN date_dim ON ss_sold_date_sk = d_date_sk
    JOIN item ON ss_item_sk = i_item_sk
    WHERE d_date BETWEEN '2000-01-27' AND DATE_ADD('2000-01-27', 30)
    GROUP BY i_item_desc, i_item_sk, d_date
    HAVING COUNT(*) > 4
)
SELECT
    itemdesc,
    item_sk,
    solddate,
    cnt
FROM frequent_ss_items
ORDER BY cnt DESC, itemdesc, item_sk, solddate
LIMIT 100;

In [0]:
%sql
-- Query 24: Sales and Returns Analysis
-- TPC-DS Query 24
WITH ssales AS (
    SELECT
        c_last_name,
        c_first_name,
        s_store_name,
        ca_state,
        s_state,
        i_color,
        i_current_price,
        i_manager_id,
        i_units,
        i_size,
        SUM(ss_net_paid) AS netpaid
    FROM store_sales
    JOIN store_returns ON ss_ticket_number = sr_ticket_number AND ss_item_sk = sr_item_sk
    JOIN store ON ss_store_sk = s_store_sk
    JOIN item ON ss_item_sk = i_item_sk
    JOIN customer ON ss_customer_sk = c_customer_sk
    JOIN customer_address ON c_current_addr_sk = ca_address_sk
    WHERE c_birth_country <> UPPER(ca_country)
      AND s_zip = ca_zip
      AND s_market_id = 8
    GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size
)
SELECT
    c_last_name,
    c_first_name,
    s_store_name,
    SUM(netpaid) AS paid
FROM ssales
WHERE i_color = 'pale'
GROUP BY c_last_name, c_first_name, s_store_name
HAVING SUM(netpaid) > (SELECT 0.05 * AVG(netpaid) FROM ssales)
ORDER BY c_last_name, c_first_name, s_store_name;

In [0]:
%sql
-- Query 25: Profit and Loss Analysis
-- TPC-DS Query 25
SELECT
    i_item_id,
    i_item_desc,
    s_store_id,
    s_store_name,
    MAX(ss_net_profit) AS store_sales_profit,
    MAX(sr_net_loss) AS store_returns_loss,
    MAX(cs_net_profit) AS catalog_sales_profit
FROM store_sales
JOIN store_returns ON ss_customer_sk = sr_customer_sk AND ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number
JOIN catalog_sales ON sr_customer_sk = cs_bill_customer_sk AND sr_item_sk = cs_item_sk
JOIN date_dim d1 ON ss_sold_date_sk = d1.d_date_sk
JOIN date_dim d2 ON sr_returned_date_sk = d2.d_date_sk
JOIN date_dim d3 ON cs_sold_date_sk = d3.d_date_sk
JOIN store ON ss_store_sk = s_store_sk
JOIN item ON i_item_sk = ss_item_sk
WHERE d1.d_moy = 4
  AND d1.d_year = 2001
  AND d2.d_moy BETWEEN 4 AND 10
  AND d2.d_year = 2001
  AND d3.d_moy BETWEEN 4 AND 10
  AND d3.d_year = 2001
GROUP BY i_item_id, i_item_desc, s_store_id, s_store_name
ORDER BY i_item_id, i_item_desc, s_store_id, s_store_name
LIMIT 100;