In [1]:
from time import time
from uuid import uuid4

from clickhouse_connect import get_client
from clickhouse_connect.driver import Client
from faker import Faker

In [2]:
fake: Faker = Faker()

client: Client = get_client(host='localhost', port=8123)

In [3]:
BATCH_SIZE: int = 10000
BATCHES: int = 1000
TOTAL_RECORDS: float = BATCH_SIZE * BATCHES

In [4]:
client.command(
    """
    CREATE TABLE IF NOT EXISTS test_user_progress
    (
        user_id UUID,
        film_id UUID,
        viewed_frame Int64,
        ts DateTime
    ) ENGINE MergeTree()
    ORDER BY (viewed_frame)
    """
)

''

In [5]:
start_time: float = time()

for batch in range(BATCHES):
    data = [
        (uuid4(), uuid4(), fake.random_int(min=0, max=1000), fake.date_time_between(start_date="-1y", end_date="now")) for _ in range(BATCH_SIZE)
    ]
    client.insert('test_user_progress', data, column_names=['user_id', 'film_id', 'viewed_frame', 'ts'])

insertion_time: float  = time() - start_time

insertion_speed: float  = round(TOTAL_RECORDS / insertion_time, 2)

print('Insertion speed: {:,} records/sec'.format(insertion_speed))

Insertion speed: 70,401.82 records/sec


In [6]:
start_time: float  = time()

client.query('SELECT * FROM test_user_progress')

reading_time: float  = time() - start_time

reading_speed: float  = round(TOTAL_RECORDS / reading_time, 2)

print('Reading speed: {:,} records/sec'.format(reading_speed))

Reading speed: 1,145,159.75 records/sec


In [7]:
client.command('DROP TABLE IF EXISTS test_user_progress')


''