In [6]:
from qdrant_client import QdrantClient, models

In [7]:
client = QdrantClient("http://localhost:6333") #connecting to local Qdrant instance

In [8]:
import requests

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

In [9]:
EMBEDDING_DIMENSIONALITY = 512

In [10]:
from qdrant_client import QdrantClient, models

# Initialize Qdrant client
client = QdrantClient("http://localhost:6333")

# Collection details
collection_name = "zoomcamp-rag"
embedding_dimension = EMBEDDING_DIMENSIONALITY
model_handle = "jinaai/jina-embeddings-v2-small-en"

# Track whether we created the collection
created_new_collection = False

# Check existing collections
existing_collections = [col.name for col in client.get_collections().collections]

if collection_name not in existing_collections:
    client.create_collection(
        collection_name=collection_name,
        vectors_config=models.VectorParams(
            size=embedding_dimension,
            distance=models.Distance.COSINE
        )
    )
    print(f"✅ Created collection '{collection_name}'")
    created_new_collection = True
else:
    print(f"ℹ️ Collection '{collection_name}' already exists. Skipping creation.")

# Only upsert if the collection was freshly created
if created_new_collection:
    points = []
    id = 0

    for course in documents_raw:
        for doc in course['documents']:
            point = models.PointStruct(
                id=id,
                vector=models.Document(text=doc['text'], model=model_handle),
                payload={
                    "text": doc['text'],
                    "section": doc['section'],
                    "course": course['course']
                }
            )
            points.append(point)
            id += 1

    client.upsert(collection_name=collection_name, points=points)
    client.create_payload_index(
    collection_name=collection_name,
    field_name="course",
    field_schema="keyword" # exact matching on string metadata fields
    )
    print(f"🚀 Upserted {len(points)} documents into '{collection_name}'")


✅ Created collection 'zoomcamp-rag'
🚀 Upserted 948 documents into 'zoomcamp-rag'
