In [26]:
import minsearch
import json
from openai import OpenAI

In [14]:
with open('documents.json') as f_in:
    docs_raw = json.load(f_in)  

documents = []

for course_dict in docs_raw:
    for doc in course_dict["documents"]:
        doc["course"] = course_dict["course"]
        documents.append(doc)

In [15]:
prompt_template = """
You're a course teaching assistant. 
Answer the QUESTION based on the context from the FAQ database. 
Use only the facts from the CONTEXT when answering the QUESTION.
If the CONTEXT doesn't contain the answer, ouput None.

QUESTION: {question}
CONTEXT: {context}
""".strip()

In [16]:
index = minsearch.Index(
                text_fields=["question", "text", "section"], 
                keyword_fields=["course"])
index.fit(documents)

<minsearch.Index at 0x760e14fe43e0>

In [27]:
client = OpenAI()

In [17]:
def search(query, index):
    boost = {'question': 3.0, 'section':0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )
    return results

In [23]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. 
Answer the QUESTION based on the context from the FAQ database. 
Use only the facts from the CONTEXT when answering the QUESTION.
If the CONTEXT doesn't contain the answer, ouput None.

QUESTION: {question}
CONTEXT: {context}
""".strip()
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion {doc['question']}\nanswer: {doc['text']}\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [28]:
def llm(prompt):
    response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

In [33]:
def rag(query):
    results = search(query, index)
    prompt = build_prompt(query, results)
    response = llm(prompt)

    return response

In [34]:
query = "Can I follow the course once it finishes?"

In [35]:
rag(query)

'Yes, you can follow the course once it finishes. We will keep all the materials available so you can follow the course at your own pace. You can also continue to look at the homeworks and prepare for the next cohort, as well as start working on your final capstone project.'