In [1]:
import json
import time
from typing import Optional, Text

import pandas as pd
from clickhouse_driver import Client
from pydantic import BaseModel, ValidationError

In [2]:
client = Client(host="0.0.0.0")

In [3]:
client.execute("CREATE DATABASE IF NOT EXISTS test_db")

[]

In [4]:
client.execute("CREATE TABLE IF NOT EXISTS test_db.test_table (Id UInt64, user_id TEXT, movie_id TEXT, \
                stars smallint, viewed_frame Float64, likes boolean, event_time DateTime) Engine=MergeTree() ORDER BY Id PRIMARY KEY Id")

[]

In [5]:
df = pd.read_csv("./data/test.csv", delimiter=",", encoding="utf-8", low_memory=False)

In [6]:
class Data(BaseModel):
    Id: int
    user_id: Optional[Text]
    movie_id: Optional[Text]
    stars: Optional[int]
    viewed_frame: Optional[int]
    likes: Optional[Text]
    event_time: Optional[Text]

In [7]:
payload = json.loads(df.to_json(orient="records"))

In [8]:
for i in payload:
    try:
        d = Data(**i)
        client.execute(
            "INSERT INTO test_db.test_table (Id, user_id, movie_id, stars, viewed_frame, likes, event_time) \
            VALUES (%(Id)s, %(user_id)s, %(movie_id)s, %(stars)s, %(viewed_frame)s, %(likes)s, %(event_time)s)",
            {
                "Id": d.Id,
                "user_id": d.user_id,
                "movie_id": d.movie_id,
                "stars": d.stars,
                "viewed_frame": d.viewed_frame,
                "likes": d.likes,
                "event_time": d.event_time
            }
        )
    except ValidationError:
        pass

KeyboardInterrupt: 

In [9]:
start = time.time()
client.execute("SELECT * FROM test_db.test_table ORDER BY rand() LIMIT 1")
end = time.time()
print("Elapsed:", (end - start)*1000, "ms.")

Elapsed: 40.01164436340332 ms.


In [10]:
start = time.time()
result_1 = client.execute("SELECT * FROM test_db.test_table  WHERE Id='234000'")
end = time.time()
print("Elapsed:", (end - start)*1000, "ms.")

Elapsed: 8.498907089233398 ms.
