In [1]:
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:11434/v1/",
    api_key="ollama"
)

In [2]:
import requests 
import minsearch

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x7476f3ea8a00>

In [3]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [4]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [5]:
def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

In [6]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [7]:
rag("Course is already started, can I still join?")

' Answering QUESTION based on CONTEXT from FAQ database:\n\nAs an AI, I don\'t have access to real-time systems or databases like DataTalks.Club, but assuming this hypothetical course scenario follows common practices for online courses with start and end dates mentioned in the context: Generally speaking (and using standard conventions), if you find yourself registered after the official start date of a university or platform\'s academic term, it would depend on how that institution handles such cases.\n\nBased on your provided CONTEXT from an FAQ database related to general course questions at DataTalks.Club: If someone inquires whether they can join a course once started and the answer in this context is "Yes," even if not registered, it suggests some leniency might be applied regarding homework submission deadlines while registration happens later—but always check specific guidelines as these may vary widely across institutions and courses.\n\nNevertheless, given that no direct men

In [8]:
print(_)

 Answering QUESTION based on CONTEXT from FAQ database:

As an AI, I don't have access to real-time systems or databases like DataTalks.Club, but assuming this hypothetical course scenario follows common practices for online courses with start and end dates mentioned in the context: Generally speaking (and using standard conventions), if you find yourself registered after the official start date of a university or platform's academic term, it would depend on how that institution handles such cases.

Based on your provided CONTEXT from an FAQ database related to general course questions at DataTalks.Club: If someone inquires whether they can join a course once started and the answer in this context is "Yes," even if not registered, it suggests some leniency might be applied regarding homework submission deadlines while registration happens later—but always check specific guidelines as these may vary widely across institutions and courses.

Nevertheless, given that no direct mention of p