In [1]:
!docker compose -f clickhouse_basic.yml down --remove-orphans -v && docker compose -f clickhouse_basic.yml up -d

[1A[1B[0G[?25l[+] Running 0/0
 ⠋ Network storage_research_default  [39mCreating[0m                              [34m0.1s [0m
[?25h[1A[1A[0G[?25l[34m[+] Running 1/1[0m
 [32m✔[0m Network storage_research_default  [32mCreated[0m                               [34m0.1s [0m
 ⠿ Container clickhouse_ugc          [39mStartin...[0m                            [34m0.1s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Network storage_research_default  [32mCreated[0m                               [34m0.1s [0m
 ⠿ Container clickhouse_ugc          [39mStartin...[0m                            [34m0.2s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Network storage_research_default  [32mCreated[0m                               [34m0.1s [0m
 ⠿ Container clickhouse_ugc          [39mStartin...[0m                            [34m0.3s [0m
[?25h[1A[1A[1A[0G[?25l[34m[+] Running 2/2[0m
 [32m✔[0m Network storage_research_default  [32mCreat

In [2]:
from asynch import connect

conn = await connect(
    host='127.0.0.1',
    port=9090,
    database='default',
    user='default',
    password='',
)

In [3]:
from typing import Iterable
from asynch.cursors import DictCursor
from more_itertools import chunked


async def create_table():
    sql = """
    CREATE TABLE IF NOT EXISTS view_history
    (
        created_at DateTime,
        user_id UUID,
        film_id UUID,
        timestamp UInt16
    )
    ENGINE = MergeTree
    ORDER BY created_at
    """
    async with conn.cursor() as cursor:
        await cursor.execute(sql)


async def insert_data(data: Iterable[dict], chunk: int = 1000):
    sql = """
    INSERT INTO view_history (created_at, user_id, film_id, timestamp)
    VALUES
    """
    async with conn.cursor(cursor=DictCursor) as cursor:
        for ch in chunked(data, chunk):
            await cursor.execute(sql, ch)


async def select_data():
    sql = """
    SELECT user_id, avg(timestamp) FROM view_history
    GROUP BY user_id
    """
    async with conn.cursor() as cursor:
        await cursor.execute(sql)


async def clear_table():
    sql = """
    TRUNCATE TABLE view_history
    """
    async with conn.cursor() as cursor:
        await cursor.execute(sql)


In [4]:
import time
from collections import defaultdict


async def bench(data: list[dict], chunk: int, n: int):
    elapsed = defaultdict(list)
    for _ in range(n):
        await clear_table()
        start_time = time.time()
        await insert_data(data, chunk)
        end_time = time.time()
        elapsed['write'].append(end_time - start_time)

        start_time = time.time()
        await select_data()
        end_time = time.time()
        elapsed['read'].append(end_time - start_time)

    return elapsed


In [5]:
await create_table()

In [6]:
from data import ViewHistoryCollection

total = 10_000_000
views = ViewHistoryCollection(total, int(total / 2), int(total / 4)).to_dict()

In [7]:
totals = [1_000, 10_000, 100_000, 1_000_000, 10_000_000]
elapsed = dict()
for total in totals:
    elapsed[total] = await bench(views[: total + 1], 500, 3)

In [8]:
elapsed

{1000: defaultdict(list,
             {'write': [0.01697707176208496,
               0.014722347259521484,
               0.014729499816894531],
              'read': [0.006701231002807617,
               0.003679513931274414,
               0.0031075477600097656]}),
 10000: defaultdict(list,
             {'write': [0.13099288940429688,
               0.14143872261047363,
               0.13764238357543945],
              'read': [0.012050151824951172,
               0.009212017059326172,
               0.012189626693725586]}),
 100000: defaultdict(list,
             {'write': [1.3720324039459229,
               1.3244221210479736,
               1.3215491771697998],
              'read': [0.0740363597869873,
               0.06766724586486816,
               0.07837438583374023]}),
 1000000: defaultdict(list,
             {'write': [12.65037226676941,
               12.645774841308594,
               13.020787239074707],
              'read': [0.7417571544647217,
               0.7697

In [9]:
import json

with open('result/clickhouse_basic.json', 'w') as f:
    json.dump(elapsed, f)


In [10]:
await conn.close()

In [11]:
!docker compose -f clickhouse_basic.yml down --remove-orphans -v

[1A[1B[0G[?25l[+] Running 0/0
 ⠋ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.1s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠙ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.2s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠹ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.3s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠸ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.4s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠼ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.5s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠴ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.6s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠦ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.7s [0m
[?25h[1A[1A[0G[?25l[+] Runni