In [1]:
!docker compose -f clickhouse_basic.yml down --remove-orphans -v && docker compose -f clickhouse_basic.yml up -d

[1A[1B[0G[?25l[+] Running 1/0
 [32m✔[0m Network storage_research_default  [32mCreated[0m                               [34m0.1s [0m
 ⠋ Container clickhouse_ugc          [39mCreatin...[0m                            [34m0.0s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Network storage_research_default  [32mCreated[0m                               [34m0.1s [0m
 ⠿ Container clickhouse_ugc          [39mStartin...[0m                            [34m0.1s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Network storage_research_default  [32mCreated[0m                               [34m0.1s [0m
 ⠿ Container clickhouse_ugc          [39mStartin...[0m                            [34m0.2s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Network storage_research_default  [32mCreated[0m                               [34m0.1s [0m
 ⠿ Container clickhouse_ugc          [39mStartin...[0m                            [34m0.3s [0m
[?25h

In [2]:
from asynch import connect

conn = await connect(
    host='127.0.0.1',
    port=9090,
    database='default',
    user='default',
    password='',
)

In [3]:
from typing import Iterable
from asynch.cursors import DictCursor
from more_itertools import chunked


async def create_table():
    sql = """
    CREATE TABLE IF NOT EXISTS view_history
    (
        created_at DateTime,
        user_id UUID,
        film_id UUID,
        timestamp UInt16
    )
    ENGINE = MergeTree
    ORDER BY created_at
    """
    async with conn.cursor() as cursor:
        await cursor.execute(sql)


async def insert_data(data: Iterable[dict], chunk: int = 1000):
    sql = """
    INSERT INTO view_history (created_at, user_id, film_id, timestamp)
    VALUES
    """
    async with conn.cursor(cursor=DictCursor) as cursor:
        for ch in chunked(data, chunk):
            await cursor.execute(sql, ch)


async def select_data():
    sql = """
    SELECT user_id, avg(timestamp) FROM view_history
    GROUP BY user_id
    """
    async with conn.cursor() as cursor:
        await cursor.execute(sql)


async def clear_table():
    sql = """
    TRUNCATE TABLE view_history
    """
    async with conn.cursor() as cursor:
        await cursor.execute(sql)


In [4]:
import time
from collections import defaultdict


async def bench(data: list[dict], chunk: int, n: int):
    elapsed = defaultdict(list)
    for _ in range(n):
        await clear_table()
        start_time = time.time()
        await insert_data(data, chunk)
        end_time = time.time()
        elapsed['write'].append(end_time - start_time)

        start_time = time.time()
        await select_data()
        end_time = time.time()
        elapsed['read'].append(end_time - start_time)

    return elapsed


In [5]:
await create_table()

In [6]:
from data import ViewHistoryCollection

total = 1_000_000
views = ViewHistoryCollection(total, int(total / 2), int(total / 4)).to_dict()

In [7]:
# totals = [1_000, 10_000, 100_000, 1_000_000, 10_000_000]
totals = [1_000, 10_000, 100_000, 1_000_000]
elapsed = dict()
for total in totals:
    elapsed[total] = await bench(views[: total + 1], 500, 3)

In [8]:
elapsed

{1000: defaultdict(list,
             {'write': [0.024312257766723633,
               0.01767444610595703,
               0.014029264450073242],
              'read': [0.004565238952636719,
               0.0031099319458007812,
               0.0029039382934570312]}),
 10000: defaultdict(list,
             {'write': [0.12547922134399414,
               0.12379240989685059,
               0.12362360954284668],
              'read': [0.011844158172607422,
               0.009453773498535156,
               0.008905172348022461]}),
 100000: defaultdict(list,
             {'write': [1.2888517379760742,
               1.2554771900177002,
               1.2521629333496094],
              'read': [0.08154726028442383,
               0.07345175743103027,
               0.08325028419494629]}),
 1000000: defaultdict(list,
             {'write': [12.72580885887146,
               12.6996488571167,
               13.123623609542847],
              'read': [1.4218800067901611,
               1.3551

In [9]:
await conn.close()

In [10]:
!docker compose -f clickhouse_basic.yml down --remove-orphans -v

[1A[1B[0G[?25l[+] Running 0/0
 ⠋ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.1s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠙ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.2s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠹ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.3s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠸ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.4s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠼ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.5s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠴ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.6s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠦ Container clickhouse_ugc  [39mStopping[0m                                      [34m0.7s [0m
[?25h[1A[1A[0G[?25l[+] Runni