# setup clickhouse client

In [1]:
pip install clickhouse-connect

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import clickhouse_connect
import glob
import time
from collections import defaultdict

In [3]:
ch_host='localhost'
ch_port=8123

In [4]:
client = clickhouse_connect.get_client(host=ch_host, port=ch_port)

# Execute Queries

In [9]:
activate_optimizer_setting = " SETTINGS yannakakis_optimizer = 1"
deactivate_optimizer_setting = " SETTINGS yannakakis_optimizer = 0"
test_query = "SELECT * FROM system.tables"

In [10]:
# Execute Test Query with and without optimization
client.query((test_query + activate_optimizer_setting)).result_rows
client.query((test_query + deactivate_optimizer_setting)).result_rows

[('INFORMATION_SCHEMA',
  'COLUMNS',
  UUID('00000000-0000-0000-0000-000000000000'),
  'View',
  0,
  [],
  '',
  datetime.datetime(1970, 1, 1, 2, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200), 'CEST')),
  [],
  [],
  "CREATE VIEW INFORMATION_SCHEMA.COLUMNS (`table_catalog` String, `table_schema` String, `table_name` String, `column_name` String, `ordinal_position` UInt64, `column_default` String, `is_nullable` String, `data_type` String, `character_maximum_length` Nullable(UInt64), `character_octet_length` Nullable(UInt64), `numeric_precision` Nullable(UInt64), `numeric_precision_radix` Nullable(UInt64), `numeric_scale` Nullable(UInt64), `datetime_precision` Nullable(UInt64), `character_set_catalog` Nullable(String), `character_set_schema` Nullable(String), `character_set_name` Nullable(String), `collation_catalog` Nullable(String), `collation_schema` Nullable(String), `collation_name` Nullable(String), `domain_catalog` Nullable(String), `domain_schema` Nullable(String)

In [11]:
directory_path = 'queries/imdb/'
#directory_path = 'queries/snap/'

# Use glob to find all files with a .sql extension in the specified directory
sql_files = glob.glob(directory_path + '*.sql')
queries = []

# Loop through each SQL file and read its contents
for sql_file in sql_files:
    with open(sql_file, 'r') as file:
        queries.append([file.read(), sql_file])

In [12]:
# key = filename
# value = [elapsed_time, query_result, optimizer_used, run_number]
results = defaultdict(list)

excluded = []
runs = 1

## Run without optimizer

In [13]:
for i in range(runs):
    for query, filename in queries:
        if any(excluded_file in filename for excluded_file in excluded):
            continue
        print(i, filename)
        start_time = time.time()
        result = client.query(query + deactivate_optimizer_setting)
        end_time = time.time()

        elapsed_time = end_time - start_time
        results[filename].append([elapsed_time, result.result_rows, deactivate_optimizer_setting, i])

0 queries/imdb/q2d.sql
0 queries/imdb/20a.sql
0 queries/imdb/q2c.sql
0 queries/imdb/3b.sql
0 queries/imdb/q5b.sql
0 queries/imdb/17d.sql
0 queries/imdb/3a.sql
0 queries/imdb/q5a.sql
0 queries/imdb/17e.sql
0 queries/imdb/q2a.sql
0 queries/imdb/17a.sql
0 queries/imdb/20b.sql


DatabaseError: :HTTPDriver for http://localhost:8123 returned response code 500)
 Code: 49. DB::Exception: Query FROM section cannot have more than 1 root table expression. (LOGICAL_ERROR) (version 23.9.1.1)


## Run with optimizer

In [17]:
for i in range(runs):
    for query, filename in queries:
        if any(excluded_file in filename for excluded_file in excluded):
            continue
        print(i, filename)
        start_time = time.time()
        result = client.query(query + activate_optimizer_setting)
        end_time = time.time()

        elapsed_time = end_time - start_time
        results[filename].append([elapsed_time, result.result_rows, activate_optimizer_setting, i])        

0 queries/imdb/q2d.sql
0 queries/imdb/20a.sql
0 queries/imdb/q2c.sql
0 queries/imdb/3b.sql
0 queries/imdb/q5b.sql
0 queries/imdb/17d.sql
0 queries/imdb/3a.sql
0 queries/imdb/q5a.sql
0 queries/imdb/17e.sql
0 queries/imdb/q2a.sql
0 queries/imdb/17a.sql
0 queries/imdb/20b.sql


DatabaseError: :HTTPDriver for http://localhost:8123 returned response code 500)
 Code: 49. DB::Exception: Query FROM section cannot have more than 1 root table expression. (LOGICAL_ERROR) (version 23.9.1.1)


In [None]:
for filename, result_objects in results.items():
    print(filename)
    for result_object in result_objects:
        print(result_object)   

In [None]:
result_adapted = defaultdict(list)
for filename, result_objects in results.items():
    for result_object in result_objects:
        time_elaps, result, optimizer, run = result_object  
        result_adapted[filename+str(optimizer)].append(time_elaps)

In [None]:
for filename, result_objects in result_adapted.items():
    print(filename)
    print(sum(result_objects)/len(result_objects))