In [1]:
import duckdb
import time
import pandas as pd
import numpy as np

from tqdm.auto import tqdm

In [2]:
con = duckdb.connect(database="tpch_sf100.db")

In [3]:
#6D version
query_template = """select
	sum(l_extendedprice* (1 - l_discount)) as revenue
from
	lineitem,
	part
where
	(
		p_partkey = l_partkey
		and p_brand = 'Brand#24'
		and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
		and l_quantity >= $quantity1a and l_quantity <= $quantity1b
		and p_size between 1 and 5
		and l_shipmode in ('AIR', 'AIR REG')
		and l_shipinstruct = 'DELIVER IN PERSON'
	)
	or
	(
		p_partkey = l_partkey
		and p_brand = 'Brand#32'
		and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
		and l_quantity >= $quantity2a and l_quantity <= $quantity2b
		and p_size between 1 and 10
		and l_shipmode in ('AIR', 'AIR REG')
		and l_shipinstruct = 'DELIVER IN PERSON'
	)
	or
	(
		p_partkey = l_partkey
		and p_brand = 'Brand#31'
		and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
		and l_quantity >= $quantity3a and l_quantity <= $quantity3b
		and p_size between 1 and 15
		and l_shipmode in ('AIR', 'AIR REG')
		and l_shipinstruct = 'DELIVER IN PERSON'
	);"""


In [4]:
#3D version
query_template = """select
	sum(l_extendedprice* (1 - l_discount)) as revenue
from
	lineitem,
	part
where
	(
		p_partkey = l_partkey
		and p_brand = 'Brand#24'
		and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
		and l_quantity >= $quantity1
		and p_size between 1 and 5
		and l_shipmode in ('AIR', 'AIR REG')
		and l_shipinstruct = 'DELIVER IN PERSON'
	)
	or
	(
		p_partkey = l_partkey
		and p_brand = 'Brand#32'
		and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
		and l_quantity >= $quantity2
		and p_size between 1 and 10
		and l_shipmode in ('AIR', 'AIR REG')
		and l_shipinstruct = 'DELIVER IN PERSON'
	)
	or
	(
		p_partkey = l_partkey
		and p_brand = 'Brand#31'
		and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
		and l_quantity >= $quantity3
		and p_size between 1 and 15
		and l_shipmode in ('AIR', 'AIR REG')
		and l_shipinstruct = 'DELIVER IN PERSON'
	);"""

In [5]:
min_quantity = float(con.sql("SELECT MIN(l_quantity) FROM lineitem").fetchone()[0])
max_quantity = float(con.sql("SELECT MAX(l_quantity) FROM lineitem").fetchone()[0])
(min_quantity, max_quantity)

(1.0, 50.0)

In [6]:
table = []
NUM_TRIALS = 3

for quantity1 in tqdm(list(np.arange(min_quantity, max_quantity + 1.00, 1.00))):
    for quantity2 in tqdm(list(np.arange(min_quantity, max_quantity + 1.00, 1.00)), leave=False):
        for quantity3 in tqdm(list(np.arange(min_quantity, max_quantity + 1.00, 1.00)), leave=False):
            params = {'quantity1': quantity1, 'quantity2': quantity2, 'quantity3': quantity3}
            row = dict()
            for trial in range(NUM_TRIALS):
                start = time.time()
                res = con.sql(query_template, params=params)
                elapsed = time.time() - start
                row[f'elapsed_{trial}'] = elapsed
                # If we want to, save the result for a sanity check
                # row[f'res_{trial}'] = res.fetchall()
            table.append({**params, **row})

table = pd.DataFrame(table)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
table

Unnamed: 0,quantity1,quantity2,quantity3,elapsed_0,res_0,elapsed_1,res_1,elapsed_2,res_2
0,1.0,1.0,1.0,0.573817,"[(1838703674.2484,)]",0.275053,"[(1838703674.2484,)]",0.275232,"[(1838703674.2484,)]"
1,1.0,1.0,26.0,0.664166,"[(1599730890.4616,)]",0.646265,"[(1599730890.4616,)]",0.659423,"[(1599730890.4616,)]"
2,1.0,26.0,1.0,0.656994,"[(1681281545.3527,)]",0.6587,"[(1681281545.3527,)]",0.65485,"[(1681281545.3527,)]"
3,1.0,26.0,26.0,0.665326,"[(1442308761.5659,)]",0.666998,"[(1442308761.5659,)]",0.672645,"[(1442308761.5659,)]"
4,26.0,1.0,1.0,0.664994,"[(1758115285.8340,)]",0.654283,"[(1758115285.8340,)]",0.654265,"[(1758115285.8340,)]"
5,26.0,1.0,26.0,0.672497,"[(1519142502.0472,)]",0.664206,"[(1519142502.0472,)]",0.669655,"[(1519142502.0472,)]"
6,26.0,26.0,1.0,0.664853,"[(1600693156.9383,)]",0.659455,"[(1600693156.9383,)]",0.664877,"[(1600693156.9383,)]"
7,26.0,26.0,26.0,0.272752,"[(1361720373.1515,)]",0.274995,"[(1361720373.1515,)]",0.267837,"[(1361720373.1515,)]"


In [8]:
con.close()