In [2]:
# =============================
# 1. Imports
# =============================
import psycopg2
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance
from langchain_qdrant import QdrantVectorStore
from langchain_openai import OpenAIEmbeddings
import os

In [3]:
# =============================
# 2. Connect to Postgres
# =============================
DB_CONFIG = {
    "host": os.getenv("DB_HOST"),
    "port": int(os.getenv("DB_PORT")),
    "dbname": os.getenv("DB_NAME"),
    "user": os.getenv("DB_USER"),
    "password": os.getenv("DB_PASSWORD"),
}
conn = psycopg2.connect(**DB_CONFIG)
cur = conn.cursor()

# Example: fetch first 100 rows for testing
cur.execute("SELECT random_key, persian_name FROM base_products LIMIT 100")
rows = cur.fetchall()

In [4]:
# Choose embedding model
# - text-embedding-3-small → 1536 dims
# - text-embedding-3-large → 3072 dims
embedding_model = "text-embedding-3-small"
dim = 1536 if "small" in embedding_model else 3072

# Create / open a Qdrant database on disk
client = QdrantClient(path="./qdrant_db_test")   # any folder path works

# Create collection (example with OpenAI small embeddings)
client.recreate_collection(
    collection_name="products",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)



  client.recreate_collection(


True

In [5]:
# =============================
# 4. Initialize embeddings + QdrantVectorStore
# =============================
OPENAI_API_KEY = os.getenv("API_KEY")
BASE_URL = os.getenv("BASE_URL")
embeddings = OpenAIEmbeddings(model=embedding_model, 
                              api_key=OPENAI_API_KEY, base_url=BASE_URL)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="products",
    embedding=embeddings,
)

In [5]:
# =============================
# 5. Insert rows into Qdrant
# =============================
texts = [row[1] for row in rows]          # persian_name
metadatas = [{"random_key": row[0], "persian_name": row[1]} for row in rows]

vector_store.add_texts(texts=texts, metadatas=metadatas)

print(f"Inserted {len(texts)} rows into Qdrant.")

Inserted 100 rows into Qdrant.


In [3]:
main()

Resuming from checkpoint: aafnep


Batches: 1batch [00:06,  6.66s/batch]

Inserted 10 rows (last random_key=aafrhl)

Batches: 2batch [00:08,  4.02s/batch]

Inserted 20 rows (last random_key=aafufe)

Batches: 3batch [00:11,  3.36s/batch]

Inserted 30 rows (last random_key=aagayk)

Batches: 4batch [00:15,  3.55s/batch]

Inserted 40 rows (last random_key=aagdpm)

Batches: 5batch [00:17,  3.24s/batch]

Inserted 50 rows (last random_key=aagjei)

Batches: 6batch [00:20,  3.07s/batch]

Inserted 60 rows (last random_key=aagoaz)

Batches: 7batch [00:22,  2.66s/batch]

Inserted 70 rows (last random_key=aagthn)

Batches: 8batch [00:25,  2.64s/batch]

Inserted 80 rows (last random_key=aagygu)

Batches: 8batch [00:26,  3.27s/batch]


KeyboardInterrupt: 