# 🧪 TPC-DS Benchmark Queries 31–35
Databricks SQL using `%run ./specify_catalog_schema`

In [0]:
%run ./specify_catalog_schema

In [0]:
%sql
-- Query 31: Sales Comparison by County and Quarter
-- TPC-DS Query 31
WITH ss AS (
    SELECT
        ca_county,
        d_qoy,
        d_year,
        SUM(ss_ext_sales_price) AS store_sales
    FROM store_sales
    JOIN date_dim ON ss_sold_date_sk = d_date_sk
    JOIN customer_address ON ss_addr_sk = ca_address_sk
    GROUP BY ca_county, d_qoy, d_year
),
ws AS (
    SELECT
        ca_county,
        d_qoy,
        d_year,
        SUM(ws_ext_sales_price) AS web_sales
    FROM web_sales
    JOIN date_dim ON ws_sold_date_sk = d_date_sk
    JOIN customer_address ON ws_bill_addr_sk = ca_address_sk
    GROUP BY ca_county, d_qoy, d_year
)
SELECT
    ss.ca_county,
    ss.d_year,
    ss.d_qoy,
    ss.store_sales,
    ws.web_sales,
    ws.web_sales / ss.store_sales AS ratio
FROM ss
JOIN ws ON ss.ca_county = ws.ca_county AND ss.d_qoy = ws.d_qoy AND ss.d_year = ws.d_year
ORDER BY ss.ca_county, ss.d_year, ss.d_qoy
LIMIT 100;

In [0]:
%sql
-- Query 32: Excess Discount Amount
-- TPC-DS Query 32
SELECT
    SUM(cs_ext_discount_amt) AS excess_discount_amount
FROM catalog_sales
JOIN item ON cs_item_sk = i_item_sk
JOIN date_dim ON cs_sold_date_sk = d_date_sk
WHERE i_manufact_id = 269
  AND d_date BETWEEN '1998-03-18' AND DATE_ADD('1998-03-18', 90)
  AND cs_ext_discount_amt > (
      SELECT 1.3 * AVG(cs_ext_discount_amt)
      FROM catalog_sales
      JOIN date_dim ON cs_sold_date_sk = d_date_sk
      WHERE i_manufact_id = 269
        AND d_date BETWEEN '1998-03-18' AND DATE_ADD('1998-03-18', 90)
  );

In [0]:
%sql
-- Query 33: Sales by Manufacturer
-- TPC-DS Query 33
WITH ss AS (
    SELECT i_manufact_id, SUM(ss_ext_sales_price) AS total_sales
    FROM store_sales
    JOIN date_dim ON ss_sold_date_sk = d_date_sk
    JOIN customer_address ON ss_addr_sk = ca_address_sk
    JOIN item ON ss_item_sk = i_item_sk
    WHERE i_manufact_id IN (SELECT i_manufact_id FROM item WHERE i_category = 'Books')
      AND d_year = 1999 AND d_moy = 3 AND ca_gmt_offset = -5
    GROUP BY i_manufact_id
),
cs AS (
    SELECT i_manufact_id, SUM(cs_ext_sales_price) AS total_sales
    FROM catalog_sales
    JOIN date_dim ON cs_sold_date_sk = d_date_sk
    JOIN customer_address ON cs_bill_addr_sk = ca_address_sk
    JOIN item ON cs_item_sk = i_item_sk
    WHERE i_manufact_id IN (SELECT i_manufact_id FROM item WHERE i_category = 'Books')
      AND d_year = 1999 AND d_moy = 3 AND ca_gmt_offset = -5
    GROUP BY i_manufact_id
),
ws AS (
    SELECT i_manufact_id, SUM(ws_ext_sales_price) AS total_sales
    FROM web_sales
    JOIN date_dim ON ws_sold_date_sk = d_date_sk
    JOIN customer_address ON ws_bill_addr_sk = ca_address_sk
    JOIN item ON ws_item_sk = i_item_sk
    WHERE i_manufact_id IN (SELECT i_manufact_id FROM item WHERE i_category = 'Books')
      AND d_year = 1999 AND d_moy = 3 AND ca_gmt_offset = -5
    GROUP BY i_manufact_id
)
SELECT i_manufact_id, SUM(total_sales) AS total_sales
FROM (
    SELECT * FROM ss
    UNION ALL
    SELECT * FROM cs
    UNION ALL
    SELECT * FROM ws
) all_sales
GROUP BY i_manufact_id
ORDER BY total_sales
LIMIT 100;

In [0]:
%sql
-- Query 34: Frequent Shoppers Analysis
-- TPC-DS Query 34
SELECT
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag,
    ss_ticket_number,
    cnt
FROM (
    SELECT
        ss_ticket_number,
        ss_customer_sk,
        COUNT(*) AS cnt
    FROM store_sales
    JOIN date_dim ON ss_sold_date_sk = d_date_sk
    JOIN store ON ss_store_sk = s_store_sk
    JOIN household_demographics ON ss_hdemo_sk = hd_demo_sk
    WHERE (d_dom BETWEEN 1 AND 3 OR d_dom BETWEEN 25 AND 28)
      AND (hd_buy_potential = '>10000' OR hd_buy_potential = 'unknown')
      AND hd_vehicle_count > 0
      AND (CASE WHEN hd_vehicle_count > 0 THEN hd_dep_count / hd_vehicle_count ELSE NULL END) > 1.2
      AND d_year IN (1999, 2000, 2001)
      AND s_county IN ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County')
    GROUP BY ss_ticket_number, ss_customer_sk
) dn
JOIN customer ON ss_customer_sk = c_customer_sk
WHERE cnt BETWEEN 15 AND 20
ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC;

In [0]:
%sql
-- Query 35: Customer Demographics Analysis
-- TPC-DS Query 35
SELECT
    ca_state,
    cd_gender,
    cd_marital_status,
    cd_dep_count,
    COUNT(*) AS cnt1,
    STDDEV_SAMP(cd_dep_count),
    AVG(cd_dep_count),
    MAX(cd_dep_count),
    cd_dep_employed_count,
    COUNT(*) AS cnt2,
    STDDEV_SAMP(cd_dep_employed_count),
    AVG(cd_dep_employed_count),
    MAX(cd_dep_employed_count),
    cd_dep_college_count,
    COUNT(*) AS cnt3,
    STDDEV_SAMP(cd_dep_college_count),
    AVG(cd_dep_college_count),
    MAX(cd_dep_college_count)
FROM customer c
JOIN customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk
JOIN customer_demographics ON c.c_current_cdemo_sk = cd_demo_sk
JOIN household_demographics ON c.c_current_hdemo_sk = hd_demo_sk
WHERE cd_gender = 'F'
  AND cd_marital_status = 'M'
  AND cd_education_status = 'Advanced Degree'
GROUP BY ca_state, cd_gender, cd_marital_status, cd_dep_count,
         cd_dep_employed_count, cd_dep_college_count
ORDER BY ca_state, cd_gender, cd_marital_status, cd_dep_count,
         cd_dep_employed_count, cd_dep_college_count
LIMIT 100;