In [102]:
import minsearch
import json

In [103]:
with open('documents-llm.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

In [104]:
documents = []

In [105]:
for course_dict in docs_raw:
    for docs in course_dict['documents']:
        docs['course'] = course_dict['course']
        documents.append(docs)

In [106]:
documents[0]

{'text': 'Yes, but if you want to receive a certificate, you need to submit your project while weâ€™re still accepting submissions.',
 'section': 'General course-related questions',
 'question': 'I just discovered the course. Can I still join?',
 'course': 'llm-zoomcamp'}

In [107]:
# We are creating a search index using minsearch library based on the structure of the document. This makes the document easy to search using feilds like 'text' and 'question'

index = minsearch.Index(
    text_fields=["question", "section", "text"],
    keyword_fields=["course"]
)

In [108]:
q = 'the course has already started, can I still enroll?'

In [109]:
index.fit(documents)

<minsearch.minsearch.Index at 0x700ed28737a0>

In [110]:
from openai import OpenAI

In [111]:
client = OpenAI(api_key="", base_url="https://api.groq.com/openai/v1")

In [None]:
response = client.chat.completions.create(
    model='llama3-70b-8192',
    messages=[{"role":"user", "content": q}]
)
print(response.choices[0].message.content)

In [113]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'llm-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [None]:
search('what is data warehouse')

In [115]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [116]:
def llm(prompt):
    response = client.chat.completions.create(
        model='llama3-70b-8192',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [117]:
query = 'can I still join the course?'

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [118]:
rag(query)

'According to the context, the answer to the question "can I still join the course?" is simply "Yes". There are no restrictions mentioned in the provided context that would prevent someone from joining the course.'

In [119]:
rag('the course has already started, can I still enroll?')

'According to the FAQ database, yes, you can still enroll in the course even though it has already started. However, if you want to receive a certificate, you need to submit your project while the course is still accepting submissions.'

In [None]:
documents[0]