In [1]:
import duckdb
import time
import pandas as pd
from tqdm.auto import tqdm
import numpy as np

In [2]:
con = duckdb.connect(database="tpch_sf1.db")

In [3]:
#2D
query_template = """select
	l_shipmode,
	sum(case
		when o_orderpriority = '1-URGENT'
			or o_orderpriority = '2-HIGH'
			then 1
		else 0
	end) as high_line_count,
	sum(case
		when o_orderpriority <> '1-URGENT'
			and o_orderpriority <> '2-HIGH'
			then 1
		else 0
	end) as low_line_count
from
	orders,
	lineitem
where
	o_orderkey = l_orderkey
	and l_shipmode in ('AIR', 'REG AIR')
	and l_commitdate < l_receiptdate
	and l_shipdate < l_commitdate
	and l_receiptdate >= $receiptdate1
	and l_receiptdate < $receiptdate2
group by
	l_shipmode
order by
	l_shipmode;"""


In [4]:
min_receiptdate = con.sql("SELECT MIN(l_receiptdate) FROM lineitem").fetchone()[0]
max_receiptdate = con.sql("SELECT MAX(l_receiptdate) FROM lineitem").fetchone()[0]
(min_receiptdate, max_receiptdate)

(datetime.date(1992, 1, 4), datetime.date(1998, 12, 31))

In [5]:
table = []
NUM_TRIALS = 3

for receiptdate1 in tqdm(list(pd.date_range(min_receiptdate, max_receiptdate, freq='30D'))):
    for receiptdate2 in tqdm(list(pd.date_range(min_receiptdate, max_receiptdate, freq='30D')), leave=False):
        params = {'receiptdate1': receiptdate1, 'receiptdate2': receiptdate2}
        row = dict()
        for trial in range(NUM_TRIALS):
            start = time.time()
            res = con.sql(query_template, params=params)
            elapsed = time.time() - start
            row[f'elapsed_{trial}'] = elapsed
            # If we want to, save the result for a sanity check
            # row[f'res_{trial}'] = res.fetchall()
        table.append({**params, **row})

table = pd.DataFrame(table)
table

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

RuntimeError: Query interrupted

In [None]:
con.close()