# 🧪 TPC-DS Benchmark Queries 46–50
Databricks SQL using `%run ./specify_catalog_schema`

In [0]:
%run ./specify_catalog_schema

In [0]:
%sql
-- Query 46: Identifying Customer Purchase Patterns
-- TPC-DS Query 46
SELECT
    c_last_name,
    c_first_name,
    ca_city,
    bought_city,
    ss_ticket_number,
    amt,
    profit
FROM (
    SELECT
        ss_ticket_number,
        ss_customer_sk,
        ca_city AS bought_city,
        SUM(ss_coupon_amt) AS amt,
        SUM(ss_net_profit) AS profit
    FROM store_sales
    JOIN date_dim ON ss_sold_date_sk = d_date_sk
    JOIN store ON ss_store_sk = s_store_sk
    JOIN household_demographics ON ss_hdemo_sk = hd_demo_sk
    JOIN customer_address ON ss_addr_sk = ca_address_sk
    WHERE (hd_dep_count = 5 OR hd_vehicle_count = 3)
      AND d_dow = 1
      AND d_year IN (1998, 1999, 2000)
    GROUP BY ss_ticket_number, ss_customer_sk, ca_city
) dn
JOIN customer ON ss_customer_sk = c_customer_sk
JOIN customer_address ON c_current_addr_sk = ca_address_sk
WHERE ca_city <> bought_city
ORDER BY c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number
LIMIT 100;

In [0]:
%sql
-- Query 47: Analyzing Customer Demographics and Sales
-- TPC-DS Query 47
SELECT
    c_last_name,
    c_first_name,
    c_birth_country,
    c_login,
    c_email_address,
    d_year,
    d_qoy,
    d_moy,
    s_store_name,
    s_company_name,
    ss_ticket_number,
    SUM(ss_wholesale_cost) AS total_wholesale_cost,
    SUM(ss_list_price) AS total_list_price,
    SUM(ss_coupon_amt) AS total_coupon_amt,
    SUM(ss_sales_price) AS total_sales_price
FROM store_sales
JOIN customer ON ss_customer_sk = c_customer_sk
JOIN store ON ss_store_sk = s_store_sk
JOIN date_dim ON ss_sold_date_sk = d_date_sk
WHERE c_birth_country = 'United States'
  AND d_year = 2001
  AND s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN')
GROUP BY c_last_name, c_first_name, c_birth_country, c_login, c_email_address,
         d_year, d_qoy, d_moy, s_store_name, s_company_name, ss_ticket_number
ORDER BY c_last_name, c_first_name, c_birth_country, c_login, c_email_address,
         d_year, d_qoy, d_moy, s_store_name, s_company_name, ss_ticket_number
LIMIT 100;

In [0]:
%sql
-- Query 48: Sales Analysis by Customer Demographics and Geography
-- TPC-DS Query 48
SELECT
    SUM(ss_quantity)
FROM store_sales
JOIN store ON s_store_sk = ss_store_sk
JOIN customer_demographics ON cd_demo_sk = ss_cdemo_sk
JOIN customer_address ON ca_address_sk = ss_addr_sk
JOIN date_dim ON ss_sold_date_sk = d_date_sk
WHERE d_year = 1998
  AND (
      (cd_marital_status = 'M' AND cd_education_status = '4 yr Degree' AND ss_sales_price BETWEEN 100.00 AND 150.00)
      OR
      (cd_marital_status = 'S' AND cd_education_status = 'Secondary' AND ss_sales_price BETWEEN 50.00 AND 100.00)
  )
  AND ca_state IN ('TX', 'OH', 'NJ')
GROUP BY cd_marital_status, cd_education_status, ca_state
ORDER BY cd_marital_status, cd_education_status, ca_state
LIMIT 100;

In [0]:
%sql
-- Query 49: Returns Analysis by Channel and Item (Truncated)
-- TPC-DS Query 49 (Simplified)
-- Full query omitted for brevity
SELECT 'query_49_placeholder' AS note;

In [0]:
%sql
-- Query 50: Placeholder until confirmed (will fill after 49)
-- TPC-DS Query 50 (Placeholder)
SELECT 'query_50_placeholder' AS note;