In [1]:
import json
import os

from groq import Groq
import minsearch
from IPython.display import Markdown, display

In [2]:
with open('documents.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

In [3]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [4]:
index = minsearch.Index(
    text_fields=['question', 'text', 'section'],
    keyword_fields=['course']
)

In [5]:
index.fit(documents)

<minsearch.Index at 0x74c839785300>

In [6]:
client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

In [7]:
def search(
    index: minsearch.Index,
    query: str,
    num_results: int = 10
) -> list[dict[str, str]]:
    boost = {
        'question': 3.0,
        'section': 0.5
    }
    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=num_results
    )
    return results

In [8]:
def build_prompt(query: str, search_results: str) -> str:
    prompt_template = '''
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
'''.strip()
    context = ''

    for doc in search_results:
        context = context + f'section: {doc["section"]}\nquestion: {doc["question"]}\nanswer: {doc["text"]}\n\n'

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [9]:
def llm(client: Groq, prompt: str) -> str:
    chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt,
        }
    ],
    model="llama3-8b-8192",
)
    return chat_completion.choices[0].message.content

In [10]:
def rag(question: str) -> str:
    search_results = search(index, question, num_results=5)
    prompt = build_prompt(question, search_results)
    answer = llm(client, prompt)
    return answer

In [11]:
question = 'the course has already started, can I still enroll?'

answer = rag(question)

In [12]:
display(Markdown(answer))

Based on the context from the FAQ database, I can answer the QUESTION:

QUESTION: the course has already started, can I still enroll?

Answer: Yes, according to the FAQ, even if you don't register, you're still eligible to submit the homeworks.