In [None]:
from sqlalchemy import create_engine
from dotenv import load_dotenv
from sqlalchemy import text
import pandas as pd
import os

load_dotenv()

DB_USER = os.getenv('DB_USER')
DB_PASS = os.getenv('DB_PASS')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')
PATH_TO_CERT = os.getenv('PATH_TO_CERT')

connection_str = f"postgresql+psycopg2://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"

engine = create_engine(
    connection_str,
    connect_args={
        "sslmode": "verify-full",
        "sslrootcert": PATH_TO_CERT,
        "target_session_attrs": "read-write"
    }
)

In [12]:
create_table_query = """
drop table if exists posts;
"""

try:
    with engine.begin() as conn:
        conn.execute(text(create_table_query))
except Exception as e:
    print("–û—à–∏–±–∫–∞:", e)


In [4]:
with engine.connect() as conn:
    df = pd.read_sql('''
select * from posts
    ''', conn)

df

Unnamed: 0,id,text,created_at


In [15]:
rbc = pd.read_csv("src/dataset/rbc/channel_rbc_news_posts.csv")
rbc["message_dt"] = pd.to_datetime(rbc["message_dt"]).dt.date
rbc = rbc.drop(columns=["original_author"])
rbc = rbc[["message_id", "channel_id", "message_dt", "views", "content"]]

In [16]:
rbc

Unnamed: 0,message_id,channel_id,message_dt,views,content
0,137230,rbc_news,2025-12-03,15453.0,–û—Ç–ø—Ä–∞–≤–∏—Ç—å –¥–µ–Ω—å–≥–∏ –∑–∞ —Ä—É–±–µ–∂ –∏ –ø–æ–º–æ—á—å –±–ª–∏–∑–∫–∏–º ‚Äî —Ç...
1,137228,rbc_news,2025-12-03,40045.0,–°—É–¥ –ø—Ä–∏–∑–Ω–∞–ª –ø–∏—Å–∞—Ç–µ–ª—è –ë–æ—Ä–∏—Å–∞ –ê–∫—É–Ω–∏–Ω–∞ (–Ω–∞—Å—Ç–æ—è—â–µ–µ...
2,137226,rbc_news,2025-12-03,53463.0,"–ù–∞ —Ö–∞—Ä–∞–∫—Ç–µ—Ä–µ –ø–µ—Ä–µ–≥–æ–≤–æ—Ä–æ–≤ —Å –°–®–ê, –ø—Ä–æ—à–µ–¥—à–∏—Ö –Ω–∞–∫–∞..."
3,137224,rbc_news,2025-12-03,56667.0,–ï–≤—Ä–æ–∫–æ–º–∏—Å—Å–∏—è –Ω–∞–º–µ—Ä–µ–Ω–∞ –∑–∞–ø—Ä–µ—Ç–∏—Ç—å –∏—Å–ø–æ–ª–Ω–µ–Ω–∏–µ –≤–Ω—É...
4,137223,rbc_news,2025-12-03,55604.0,üéô –ü—Ä—è–º–æ —Å–µ–π—á–∞—Å –≤ —ç—Ñ–∏—Ä–µ –†–∞–¥–∏–æ –†–ë–ö –æ–±—Å—É–∂–¥–∞–µ–º –ø–ª–∞...
...,...,...,...,...,...
4842,116045,rbc_news,2025-04-15,108803.0,–°—É–¥ –Ω–∞ –°–∞—Ö–∞–ª–∏–Ω–µ –≤—ã–Ω–µ—Å –ø–µ—Ä–≤–æ–µ —Ä–µ—à–µ–Ω–∏–µ –ø–æ –¥–µ–ª—É –æ...
4843,116044,rbc_news,2025-04-15,108074.0,–û–ø–µ—Ä–∞—Ç–æ—Ä –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω—ã—Ö –ª–æ—Ç–µ—Ä–µ–π –§—Ä–∞–Ω—Ü–∏–∏ —Å—Ç–∞–ª –æ—Ç–≤...
4844,116043,rbc_news,2025-04-15,143013.0,üêò –°–ª–æ–Ω—ã –≤–æ –≤—Ä–µ–º—è –∑–µ–º–ª–µ—Ç—Ä—è—Å–µ–Ω–∏—è –≤ –∑–æ–æ–ø–∞—Ä–∫–µ –°–∞–Ω-...
4845,116041,rbc_news,2025-04-15,125020.0,–ê—ç—Ä–æ–ø–æ—Ä—Ç —é–∂–Ω–æ–∫–æ—Ä–µ–π—Å–∫–æ–≥–æ –≥–æ—Ä–æ–¥–∞ –ú—É–∞–Ω —Ä–µ–≥—É–ª—è—Ä–Ω–æ ...


In [17]:
rbc.to_sql('posts', engine, if_exists='replace', index=False)

847

In [18]:
with engine.connect() as conn:
    df = pd.read_sql('''
select * from posts
    ''', conn)

df

Unnamed: 0,message_id,channel_id,message_dt,views,content
0,137230,rbc_news,2025-12-03,15453.0,–û—Ç–ø—Ä–∞–≤–∏—Ç—å –¥–µ–Ω—å–≥–∏ –∑–∞ —Ä—É–±–µ–∂ –∏ –ø–æ–º–æ—á—å –±–ª–∏–∑–∫–∏–º ‚Äî —Ç...
1,137228,rbc_news,2025-12-03,40045.0,–°—É–¥ –ø—Ä–∏–∑–Ω–∞–ª –ø–∏—Å–∞—Ç–µ–ª—è –ë–æ—Ä–∏—Å–∞ –ê–∫—É–Ω–∏–Ω–∞ (–Ω–∞—Å—Ç–æ—è—â–µ–µ...
2,137226,rbc_news,2025-12-03,53463.0,"–ù–∞ —Ö–∞—Ä–∞–∫—Ç–µ—Ä–µ –ø–µ—Ä–µ–≥–æ–≤–æ—Ä–æ–≤ —Å –°–®–ê, –ø—Ä–æ—à–µ–¥—à–∏—Ö –Ω–∞–∫–∞..."
3,137224,rbc_news,2025-12-03,56667.0,–ï–≤—Ä–æ–∫–æ–º–∏—Å—Å–∏—è –Ω–∞–º–µ—Ä–µ–Ω–∞ –∑–∞–ø—Ä–µ—Ç–∏—Ç—å –∏—Å–ø–æ–ª–Ω–µ–Ω–∏–µ –≤–Ω—É...
4,137223,rbc_news,2025-12-03,55604.0,üéô –ü—Ä—è–º–æ —Å–µ–π—á–∞—Å –≤ —ç—Ñ–∏—Ä–µ –†–∞–¥–∏–æ –†–ë–ö –æ–±—Å—É–∂–¥–∞–µ–º –ø–ª–∞...
...,...,...,...,...,...
4842,116045,rbc_news,2025-04-15,108803.0,–°—É–¥ –Ω–∞ –°–∞—Ö–∞–ª–∏–Ω–µ –≤—ã–Ω–µ—Å –ø–µ—Ä–≤–æ–µ —Ä–µ—à–µ–Ω–∏–µ –ø–æ –¥–µ–ª—É –æ...
4843,116044,rbc_news,2025-04-15,108074.0,–û–ø–µ—Ä–∞—Ç–æ—Ä –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω—ã—Ö –ª–æ—Ç–µ—Ä–µ–π –§—Ä–∞–Ω—Ü–∏–∏ —Å—Ç–∞–ª –æ—Ç–≤...
4844,116043,rbc_news,2025-04-15,143013.0,üêò –°–ª–æ–Ω—ã –≤–æ –≤—Ä–µ–º—è –∑–µ–º–ª–µ—Ç—Ä—è—Å–µ–Ω–∏—è –≤ –∑–æ–æ–ø–∞—Ä–∫–µ –°–∞–Ω-...
4845,116041,rbc_news,2025-04-15,125020.0,–ê—ç—Ä–æ–ø–æ—Ä—Ç —é–∂–Ω–æ–∫–æ—Ä–µ–π—Å–∫–æ–≥–æ –≥–æ—Ä–æ–¥–∞ –ú—É–∞–Ω —Ä–µ–≥—É–ª—è—Ä–Ω–æ ...


In [40]:
from qdrant_client import QdrantClient, models
from dotenv import load_dotenv
import numpy as np
import os

load_dotenv()

QDRANT_URL = os.getenv('QDRANT_URL')

client = QdrantClient(
    url=QDRANT_URL,
)

In [2]:
client.create_collection(
    collection_name="my_collection",
    vectors_config=models.VectorParams(
        size=384, 
        distance=models.Distance.COSINE
    )
)


True

In [None]:
points = [
    models.PointStruct(
        id=1,
        vector=np.ones(384).tolist(),
        payload={"text": "–ü—Ä–∏–º–µ—Ä –¥–æ–∫—É–º–µ–Ω—Ç–∞ 1"}
    ),
    models.PointStruct(
        id=2,
        vector=(-np.ones(384)).tolist(),
        payload={"text": "–ü—Ä–∏–º–µ—Ä –¥–æ–∫—É–º–µ–Ω—Ç–∞ 2"}
    )
]

client.upsert(
    collection_name="my_collection",
    points=points,
    wait=True
)

UpdateResult(operation_id=3, status=<UpdateStatus.COMPLETED: 'completed'>)

In [42]:
results = client.query_points(
    collection_name="my_collection",
    query=(-np.ones(384)).tolist(),
    limit=1
)

In [43]:
results.points[0]

ScoredPoint(id=2, version=3, score=0.9999998, payload={'text': '–ü—Ä–∏–º–µ—Ä –¥–æ–∫—É–º–µ–Ω—Ç–∞ 2'}, vector=None, shard_key=None, order_value=None)