In [10]:
import pandas as pd
import numpy as np
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
import os

# -------------------- CONFIG --------------------
CSV_PATH = "/media/sandeep/363843093842C7971/project_ITM_samsher/My_Startup_Projects/qdrant/data.csv"
COLLECTION_NAME = "restaurant_menu"
QDRANT_HOST = "localhost"
QDRANT_PORT = 6333
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
# ------------------------------------------------

# Step 1: Load CSV
if not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f"CSV file not found: {CSV_PATH}")

df = pd.read_csv(CSV_PATH, quotechar='"')

if "description" not in df.columns:
    raise ValueError("CSV must contain a 'description' column.")

# Step 2: Load embedding model
print("🔄 Loading embedding model...")
model = SentenceTransformer(EMBEDDING_MODEL)

# Step 3: Generate real embeddings from the 'description' column
print("🧠 Generating embeddings from item descriptions...")
embeddings = model.encode(df["description"].tolist(), convert_to_numpy=True)

# Step 4: Connect to Qdrant
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)

# Step 5: Create/Recreate collection
VECTOR_SIZE = embeddings.shape[1]
print(f"📦 Creating Qdrant collection with vector size: {VECTOR_SIZE}")

client.recreate_collection(
    collection_name=COLLECTION_NAME,
    vectors_config={"size": VECTOR_SIZE, "distance": "Cosine"}
)

# Step 6: Upload data with payload
print("📤 Uploading data to Qdrant...")
client.upload_collection(
    collection_name=COLLECTION_NAME,
    vectors=embeddings.tolist(),
    payload=df.to_dict(orient="records"),
    ids=df["id"].tolist()
)

print(f"✅ Successfully uploaded {len(df)} menu items to Qdrant collection '{COLLECTION_NAME}'")


🔄 Loading embedding model...
🧠 Generating embeddings from item descriptions...
📦 Creating Qdrant collection with vector size: 384


  client.recreate_collection(


📤 Uploading data to Qdrant...
✅ Successfully uploaded 50 menu items to Qdrant collection 'restaurant_menu'
