In [14]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [15]:
documents[2]

{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
 'section': 'General course-related questions',
 'question': 'Course - Can I still join the course after the start date?',
 'course': 'data-engineering-zoomcamp'}

In [16]:
import os

In [17]:
from google import genai
gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

In [18]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [19]:
def llm(prompt):
    response = gemini_client.models.generate_content(
    model="gemini-2.0-flash", contents=prompt
    )
   
    return response.text


In [20]:
from qdrant_client import QdrantClient, models

In [21]:
qd_client = QdrantClient("http://localhost:6333")

In [22]:
EMBEDDING_DIMENSIONALITY = 512
model_handle = "jinaai/jina-embeddings-v2-small-en"

In [23]:
collection_name = "zoomcamp-faq"

In [24]:
qd_client.delete_collection(collection_name=collection_name)

True

In [25]:
qd_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)

True

In [26]:
qd_client.create_payload_index(
    collection_name=collection_name,
    field_name="course",
    field_schema="keyword"
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [27]:
points = []

for i, doc in enumerate(documents):
    text = doc['question'] + ' ' + doc['text']
    vector = models.Document(text=text, model=model_handle)
    point = models.PointStruct(
        id=i,
        vector=vector,
        payload=doc
    )
    points.append(point)

In [28]:
from tqdm import tqdm

def upsert_with_progress(client, collection_name, points, batch_size=100):
    # Convert points to list if it isn't already
    if not isinstance(points, list):
        points = list(points)
    
    # Process in batches with progress bar
    for i in tqdm(range(0, len(points), batch_size), desc="Uploading batches"):
        batch = points[i:i + batch_size]
        client.upsert(
            collection_name=collection_name,
            points=batch
        )
    
    print(f"✅ Successfully upserted {len(points)} points")

# Usage
upsert_with_progress(qd_client, collection_name, points, batch_size=50)

Uploading batches: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [01:46<00:00,  5.63s/it]

✅ Successfully upserted 948 points





In [29]:
question = 'I just discovered the course. Can I still join it?'

In [30]:
def vector_search(question):
    print('vector_search is used')
    
    course = 'data-engineering-zoomcamp'
    query_points = qd_client.query_points(
        collection_name=collection_name,
        query=models.Document(
            text=question,
            model=model_handle 
        ),
        query_filter=models.Filter( 
            must=[
                models.FieldCondition(
                    key="course",
                    match=models.MatchValue(value=course)
                )
            ]
        ),
        limit=5,
        with_payload=True
    )
    
    results = []
    
    for point in query_points.points:
        results.append(point.payload)
    
    return results

In [31]:
def rag(query):
    search_results = vector_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [32]:
rag('how do I run kafka?')

vector_search is used


'To run kafka, use `docker ps` to confirm that your kafka broker docker container is working. If it is not, navigate to the docker compose yaml file folder and run `docker compose up -d` to start all the instances.\n\nTo run producer/consumer/kstreams in the terminal, in the project directory, run:\n`java -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java`\n'