From 7d7974d36dfe2c255bc1f1d5526bc77fd33ad792 Mon Sep 17 00:00:00 2001 From: Caetano Sauer Date: Tue, 11 Nov 2025 23:56:16 +0100 Subject: [PATCH] Fix Hyper scripts to perform hot runs A recent commit aimed at "Prevent cheating by Munich-based databases" (4a2f3fa) incorrectly modified Hyper's `query.py` scripts to restart the database server on each iteration, essentially making every run a cold run. This unfairly brought our results down by a large margin. This commit reverts the previous behavior which was already correct and there was absolutely no cheating involved. --- hyper-parquet/query.py | 20 ++++++++++---------- hyper/query.py | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/hyper-parquet/query.py b/hyper-parquet/query.py index f2351b364..2df4fb3b3 100755 --- a/hyper-parquet/query.py +++ b/hyper-parquet/query.py @@ -7,17 +7,17 @@ query = sys.stdin.read() -for try_num in range(3): - if try_num == 0: - # Flush OS page cache before first run of each query - subprocess.run(['sync'], check=True) - subprocess.run(['sudo', 'tee', '/proc/sys/vm/drop_caches'], input=b'3', check=True, stdout=subprocess.DEVNULL) +with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: + with Connection(hyper.endpoint) as connection: + # Hyper only supports temporary external tables, so we need to create them on every query + connection.execute_command(open("create.sql").read()) + for try_num in range(3): + if try_num == 0: + # Flush OS page cache before first run of each query + subprocess.run(['sync'], check=True) + subprocess.run(['sudo', 'tee', '/proc/sys/vm/drop_caches'], input=b'3', check=True, stdout=subprocess.DEVNULL) - start = timeit.default_timer() - with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint) as connection: - # Hyper only supports temporary external tables, so we need to create them on every query - connection.execute_command(open("create.sql").read()) + start = timeit.default_timer() try: connection.execute_list_query(query) print(round(timeit.default_timer() - start, 3)) diff --git a/hyper/query.py b/hyper/query.py index fd8179635..e1833c0e4 100755 --- a/hyper/query.py +++ b/hyper/query.py @@ -6,10 +6,10 @@ query = sys.stdin.read() -for _ in range(3): - start = timeit.default_timer() - with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint, 'hits.hyper', CreateMode.NONE) as connection: +with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: + with Connection(hyper.endpoint, 'hits.hyper', CreateMode.NONE) as connection: + for _ in range(3): + start = timeit.default_timer() try: connection.execute_list_query(query) print(round(timeit.default_timer() - start, 3))