In [1]:
import os
import openai
import minsearch
from openai import OpenAI

In [2]:
client = OpenAI()

In [6]:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role":"user", "content": "Quand est-ce que je vais commencer mon nouveau boulot ?"}] )

In [7]:
response.choices[0].message.content

"Je n'ai pas d'informations spécifiques sur votre situation personnelle. Pour connaître la date de début de votre nouveau travail, je vous recommande de vérifier votre contrat ou de contacter votre employeur ou le service des ressources humaines de l'entreprise. Ils seront en mesure de vous fournir les détails exacts concernant votre commencement. Bonne chance dans votre nouveau poste !"

In [8]:
import minsearch
import json

In [10]:
with open('documents.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

In [11]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [18]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

In [21]:
q = 'the course has already started, can I still enroll?'

In [22]:
index.fit(documents)

<minsearch.Index at 0x7ac06b9c6380>

In [23]:
response = client.chat.completions.create(
    model='gpt-4o',
    messages=[{"role": "user", "content": q}]
)

response.choices[0].message.content

"If the course has already begun, your ability to enroll may depend on several factors including the institution's policies, the type of course, and the instructor's discretion. Here are a few steps you can take:\n\n1. **Check the Institution’s Policy:** Some universities or platforms have specific guidelines about late enrollment. Look up the policy on the official website or student handbook.\n\n2. **Contact the Instructor:** Reach out directly to the course instructor. They may allow late enrollment or offer guidance on how to catch up with the material you've missed.\n\n3. **Contact the Registrar or Admissions Office:** The administrative office in charge of course enrollments can provide information on whether late enrollment is possible and the necessary steps to take.\n\n4. **Consider Online Courses:** Some online learning platforms offer more flexibility with enrollment dates. Check if there are similar courses available online that offer rolling admissions.\n\n5. **Assess Catc

In [24]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [26]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
    Use only the facts from the CONTEXT when answering the QUESTION.
    
    QUESTION: {question}
    
    CONTEXT: 
    {context}
    """.strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [27]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [28]:
query = 'how do I run kafka?'

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [29]:
rag(query)

'To run Kafka, follow these steps based on the provided context for Java and Python environments:\n\n### For Java:\n1. Navigate to the project directory.\n2. Run the following command in the terminal:\n\n```bash\njava -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java\n```\nReplace `<jar_name>` with the actual name of your jar file.\n\n### For Python:\n1. Create and activate a virtual environment:\n\n    ```bash\n    python -m venv env\n    source env/bin/activate\n    ```\n\n    For Windows, use:\n    ```bash\n    env\\Scripts\\activate\n    ```\n\n2. Install required packages from `requirements.txt`:\n\n    ```bash\n    pip install -r ../requirements.txt\n    ```\n\n3. Run your Python Kafka scripts within this virtual environment to ensure dependencies are correctly addressed.\n\n### If you face permission issues (like `./build.sh: Permission denied` Error):\nRun this command in the terminal in the directory where your `build.sh` script is loca

In [30]:
rag('the course has already started, can I still enroll?')

"Yes, you can still enroll in the course even after it has started. You are eligible to submit the homework. However, please keep in mind that there will be deadlines for turning in the final projects, so it's advisable not to leave everything for the last minute."