In [1]:
import minsearch
import json

In [2]:
with open('documents.json') as f_in:
    docs_raw = json.load(f_in)  

In [4]:
# Let's extract only the course key from the dictionaries
documents = []

for course_dict in docs_raw:
    for doc in course_dict["documents"]:
        doc["course"] = course_dict["course"]
        documents.append(doc)

In [9]:
# Now we use minsearch to search in the documents
# the keyword fields are the fields that will be used to filter the documents
# and the text_fields are that fields that could contain relevant information to answer the question

index = minsearch.Index(
                text_fields=["question", "text", "section"], 
                keyword_fields=["course"])

In [8]:
q = 'the course has already started, can I still enroll?'

In [10]:
index.fit(documents)

<minsearch.Index at 0x7277cc230170>

In [20]:
boost = {'question': 3.0, 'section':0.5}

results = index.search(
    query=q,
    filter_dict={'course': 'data-engineering-zoomcamp'},
    boost_dict=boost,
    num_results=5
)

In [13]:
from openai import OpenAI

In [14]:
client = OpenAI()

In [15]:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": q}]
)

In [17]:
response.choices[0].message.content

"Whether you can still enroll in a course after it has started depends on several factors, including the policies of the institution offering the course, the type of course, and how much of the course has already been completed. Here are a few steps you can take:\n\n1. **Contact the Instructor or Department:**\n   Reach out to the course instructor or the academic department offering the course to inquire about late enrollment policies. They may have the authority to make exceptions.\n\n2. **Check Institutional Policies:**\n   Review the institution's enrollment policies regarding adding courses after the start date. These policies are often found on the institution's website or in the academic catalog.\n\n3. **Consider the Course Format:**\n   Some courses, especially online ones, may be more flexible with enrollment dates. Self-paced courses, in particular, might allow for late enrollment.\n\n4. **Assess the Impact:**\n   Consider how much material you will have missed and whether yo

In [25]:
prompt_template = """
You're a course teaching assistant. 
Answer the QUESTION based on the context from the FAQ database. 
Use only the facts from the CONTEXT when answering the QUESTION.
If the CONTEXT doesn't contain the answer, ouput None.

QUESTION: {question}
CONTEXT: {context}
""".strip()

In [23]:
# Let's convert the response of the query engine to a human readable format
context = ""

for doc in results:
    context = context + f"section: {doc['section']}\nquestion {doc['question']}\nanswer: {doc['text']}\n\n"

In [24]:
# Now we use the context in the prompt template


"section: General course-related questions\nquestion Course - Can I still join the course after the start date?\nanswer: Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.\n\nsection: General course-related questions\nquestion Course - Can I follow the course after it finishes?\nanswer: Yes, we will keep all the materials after the course finishes, so you can follow the course at your own pace after it finishes.\nYou can also continue looking at the homeworks and continue preparing for the next cohort. I guess you can also start working on your final capstone project.\n\nsection: General course-related questions\nquestion Course - When will the course start?\nanswer: The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course wil

In [26]:
prompt = prompt_template.format(question=q, context=context).strip()

In [29]:
print(prompt)

You're a course teaching assistant. 
Answer the QUESTION based on the context from the FAQ database. 
Use only the facts from the CONTEXT when answering the QUESTION.
If the CONTEXT doesn't contain the answer, ouput None.

QUESTION: the course has already started, can I still enroll?
CONTEXT: section: General course-related questions
question Course - Can I still join the course after the start date?
answer: Yes, even if you don't register, you're still eligible to submit the homeworks.
Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

section: General course-related questions
question Course - Can I follow the course after it finishes?
answer: Yes, we will keep all the materials after the course finishes, so you can follow the course at your own pace after it finishes.
You can also continue looking at the homeworks and continue preparing for the next cohort. I guess you can also start working on your fina

In [31]:
# Mix all together
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": prompt}]
)
response.choices[0].message.content

"Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute."