In [32]:
!docker compose -f clickhouse_cluster.yml down --remove-orphans -v && docker compose -f clickhouse_cluster.yml up -d

[1A[1B[0G[?25l[+] Running 0/0
 ⠋ Container clickhouse-node2  [39mStopping[0m                                    [34m0.1s [0m
 ⠋ Container clickhouse-node1  [39mStopping[0m                                    [34m0.1s [0m
 ⠋ Container clickhouse-node4  [39mStopping[0m                                    [34m0.1s [0m
 ⠋ Container clickhouse-node3  [39mStopping[0m                                    [34m0.1s [0m
[?25h[1A[1A[1A[1A[1A[0G[?25l[+] Running 0/4
 ⠙ Container clickhouse-node2  [39mStopping[0m                                    [34m0.2s [0m
 ⠙ Container clickhouse-node1  [39mStopping[0m                                    [34m0.2s [0m
 ⠙ Container clickhouse-node4  [39mStopping[0m                                    [34m0.2s [0m
 ⠙ Container clickhouse-node3  [39mStopping[0m                                    [34m0.2s [0m
[?25h[1A[1A[1A[1A[1A[0G[?25l[+] Running 0/4
 ⠹ Container clickhouse-node2  [39mStopping[0m                      

In [33]:
from asynch import connect

conn1 = await connect(
    host='127.0.0.1',
    port=9091,
    database='default',
    user='default',
    password='',
)
conn3 = await connect(
    host='127.0.0.1',
    port=9093,
    database='default',
    user='default',
    password='',
)

In [34]:
from asynch.connection import Connection
from asynch.cursors import DictCursor
from more_itertools import chunked
from typing import Iterable


async def create_table(connection: Connection, shard: int, replica: int):
    sql = [
        'CREATE DATABASE shard;',
        'CREATE DATABASE replica;',
        f"""
        CREATE TABLE shard.view_history
        (
            created_at DateTime,
            user_id UUID,
            film_id UUID,
            timestamp UInt16
        )
        Engine=ReplicatedMergeTree('/clickhouse/tables/shard{shard}/view_history', 'replica_1')
        PARTITION BY toYYYYMMDD(created_at)
        ORDER BY created_at;
        """,
        f"""
        CREATE TABLE replica.view_history
        (
            created_at DateTime,
            user_id UUID,
            film_id UUID,
            timestamp UInt16
        )
        Engine=ReplicatedMergeTree('/clickhouse/tables/shard{replica}/view_history', 'replica_2')
        PARTITION BY toYYYYMMDD(created_at)
        ORDER BY created_at;
        """,
        """
        CREATE TABLE default.view_history
        (
            created_at DateTime,
            user_id UUID,
            film_id UUID,
            timestamp UInt16
        )
        ENGINE = Distributed('company_cluster', '', view_history, rand());
        """,
    ]
    async with connection.cursor() as cursor:
        for item in sql:
            await cursor.execute(item)


async def insert_data(connection: Connection, data: Iterable[dict], chunk: int = 1000):
    sql = """
    INSERT INTO default.view_history (created_at, user_id, film_id, timestamp)
    VALUES
    """
    async with connection.cursor(cursor=DictCursor) as cursor:
        for ch in chunked(data, chunk):
            await cursor.execute(sql, ch)


async def select_data(connection: Connection):
    sql = """
    SELECT user_id, avg(timestamp) FROM view_history
    GROUP BY user_id
    """
    async with connection.cursor() as cursor:
        await cursor.execute(sql)


async def clear_table(connection: Connection):
    sql = """
    TRUNCATE TABLE view_history
    """
    async with connection.cursor() as cursor:
        await cursor.execute(sql)


In [35]:
await create_table(conn1, 1, 2)
await create_table(conn3, 2, 1)


In [36]:
from data import ViewHistoryCollection

total = 1_000_000
views = ViewHistoryCollection(total, int(total / 2), int(total / 4)).to_dict()

In [37]:
import time
from collections import defaultdict


async def bench(data: list[dict], chunk: int, n: int):
    elapsed = defaultdict(list)
    for _ in range(n):
        await clear_table(conn1)
        start_time = time.time()
        await insert_data(conn1, data, chunk)
        end_time = time.time()
        elapsed['write'].append(end_time - start_time)

        start_time = time.time()
        await select_data(conn3)
        end_time = time.time()
        elapsed['read'].append(end_time - start_time)

    return elapsed


In [38]:
# totals = [1_000, 10_000, 100_000, 1_000_000, 10_000_000]
totals = [1_000, 10_000, 100_000, 1_000_000]
elapsed = dict()
for total in totals:
    elapsed[total] = await bench(views[: total + 1], 500, 3)


In [39]:
elapsed

{1000: defaultdict(list,
             {'write': [0.05670523643493652,
               0.03279995918273926,
               0.028614044189453125],
              'read': [0.007543325424194336,
               0.03269243240356445,
               0.008972406387329102]}),
 10000: defaultdict(list,
             {'write': [0.3017909526824951,
               0.2987844944000244,
               0.3058438301086426],
              'read': [0.018959999084472656,
               0.015421628952026367,
               0.019049644470214844]}),
 100000: defaultdict(list,
             {'write': [3.221774101257324,
               2.823084592819214,
               2.911376476287842],
              'read': [0.13977479934692383,
               0.13204145431518555,
               0.1908555030822754]}),
 1000000: defaultdict(list,
             {'write': [29.22710609436035,
               30.136340141296387,
               30.28061556816101],
              'read': [1.6178691387176514,
               1.59577751159667

In [26]:
await conn1.close()
await conn3.close()

In [27]:
!docker compose -f clickhouse_cluster.yml down

[1A[1B[0G[?25l[+] Running 0/0
 ⠋ Container clickhouse-node4  [39mStopping[0m                                    [34m0.1s [0m
 ⠋ Container clickhouse-node2  [39mStopping[0m                                    [34m0.1s [0m
 ⠋ Container clickhouse-node3  [39mStopping[0m                                    [34m0.1s [0m
 ⠋ Container clickhouse-node1  [39mStopping[0m                                    [34m0.1s [0m
[?25h[1A[1A[1A[1A[1A[0G[?25l[+] Running 0/4
 ⠙ Container clickhouse-node4  [39mStopping[0m                                    [34m0.2s [0m
 ⠙ Container clickhouse-node2  [39mStopping[0m                                    [34m0.2s [0m
 ⠙ Container clickhouse-node3  [39mStopping[0m                                    [34m0.2s [0m
 ⠙ Container clickhouse-node1  [39mStopping[0m                                    [34m0.2s [0m
[?25h[1A[1A[1A[1A[1A[0G[?25l[+] Running 0/4
 ⠹ Container clickhouse-node4  [39mStopping[0m                      