### RAG with Vector Search

In [1]:
pip install -q "qdrant-client[fastembed]>=1.14.2"

Note: you may need to restart the kernel to use updated packages.


In [2]:
from qdrant_client import QdrantClient, models

In [3]:
client = QdrantClient("http://localhost:6333")

In [4]:
import requests

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
raw_doc = docs_response.json()

documents = []

for course in raw_doc:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [5]:
#pip install minsearch

In [6]:
import minsearch

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.minsearch.Index at 0x7c018e548280>

In [7]:
EMBEDDING_DIMENSIONALITY = 512
model_handle = "jinaai/jina-embeddings-v2-small-en"

In [8]:
collection_name = "FAQ"

In [9]:
client.delete_collection(collection_name=collection_name)

True

In [10]:
client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)

True

In [11]:
client.create_payload_index(
    collection_name=collection_name,
    field_name="course",
    field_schema="keyword"
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [12]:
points = []

for i, doc in enumerate(documents):
    text = doc['question'] + ' ' + doc['text']
    vector = models.Document(text=text, model=model_handle)
    point = models.PointStruct(
        id=i,
        vector=vector,
        payload=doc
    )
    points.append(point)

In [13]:
client.upsert(
    collection_name=collection_name,
    points=points
)

UpdateResult(operation_id=2, status=<UpdateStatus.COMPLETED: 'completed'>)

In [14]:
query = 'I just discovered the course. Can I still join it?'

In [15]:
def course_related_search(query, course = 'data-engineering-zoomcamp'):
    
    query_points = client.query_points(
        collection_name=collection_name,
        query=models.Document(
            text=query,
            model=model_handle 
        ),
        query_filter=models.Filter( 
            must=[
                models.FieldCondition(
                    key="course",
                    match=models.MatchValue(value=course)
                )
            ]
        ),
        limit=5,
        with_payload=True
    )
    
    results = []
    
    for point in query_points.points:
        results.append(point.payload)
    
    return results

In [16]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [17]:
import ollama

In [18]:
def llm(prompt):
    response = ollama.chat(
        model='tinyllama',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response['message']['content']

In [19]:
def rag(query):
    search_results = course_related_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [20]:
rag('how do I run kafka?')

'To answer the question based on the FAQ database, in Section 2: Module 6: streaming with KaFka, when running producer/consumer/etc. Java scripts, no results retrieved or no message sent, the solution is to make sure that the scripts are being run (e.g. JsonConsumer.java, JsonProducer.java) in the src/main/java/org/example/ folder and are running correctly as a StreamsConfig.BOOTSTRAP_SERVERS_CONFIG in the src/main/java/org/example/Secret.java file. In Section 3: Worksheets 2 - RisingWave, you can rectify minor errors by using Git Bash from Windows and activating the PyThon venv from the terminal. To connect to the RisingWave cluster from PowerShell, use the command psql -h localhost -p 4566 -d dev -U root. The equivalent of sourcse commands.sh in PowerShell is . \\ .\\commands.sh from the worksheets directory. Hope this helps!'