In [1]:
import minsearch

In [2]:
import json

In [3]:
with open('documents.json','rt' ) as f_in :
    docs_raw = json.load(f_in)

In [4]:
documents = []

for course_dict in docs_raw :
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [5]:
documents[0]

{'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.",
 'section': 'General course-related questions',
 'question': 'Course - When will the course start?',
 'course': 'data-engineering-zoomcamp'}

In [6]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

In [8]:
index.fit(documents)

<minsearch.Index at 0x7ddc5c442290>

In [11]:
!pip install mistralai



In [12]:
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage

In [13]:
import os

# Access the API key from the environment variable
api_key = os.environ['MISTRAL_AI_API_KEY']
model = "mistral-large-latest"

client = MistralClient(api_key=api_key)

chat_response = client.chat(
    model=model,
    messages=[ChatMessage(role="user", content=q)]
)

print(chat_response.choices[0].message.content)

Whether or not you can still enroll in a course that has already started depends on the policies of the institution or platform offering the course. Here are a few possibilities:

1. **Open Enrollment**: Some courses, especially online ones, have open enrollment, which means you can join at any time.

2. **Late Enrollment Period**: Some institutions have a late enrollment period, usually within the first week or two of the course starting. After this period, you might not be able to enroll.

3. **Instructor Permission**: In certain cases, you might be able to enroll late with the instructor's permission.

4. **Wait for the Next Offering**: If the course has strict deadlines for enrollment, you might have to wait for the next offering of the course.

It's best to contact the institution or the course instructor directly to ask about their specific policies regarding late enrollment.


In [19]:
def search (query):
    boost = {'question': 3.0, 'section' : 0.5}

    results = index.search(
    query=query,
    filter_dict={'course':'data-engineering-zoomcamp'},
    boost_dict=boost,
    num_results=10
    )

    return results

In [43]:
def build_prompt (query, search_results) :
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database. 
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: {context}

""".strip()

    context = ""
    
    for doc in results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [44]:
def llm(prompt):
    chat_response = client.chat(
    model=model,
    messages=[ChatMessage(role="user", content=prompt)]
    )

    return chat_response.choices[0].message.content

In [50]:
query = 'How do I run kafka?'

def rag(query):
        results = search(query)
        prompt = build_prompt (query, results)
        answer = llm(prompt)
        return answer

In [51]:
rag (query)

'To run Kafka, you can follow these instructions based on the context provided:\n\nFor Java Kafka:\n- Navigate to the project directory.\n- Run the following command in the terminal:\n  ```\n  java -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java\n  ```\n\nFor Python Kafka:\n- Create a virtual environment and install the required packages.\n- Run the following commands in the terminal:\n  ```\n  python -m venv env\n  source env/bin/activate  # For Windows, use env\\Scripts\\activate\n  pip install -r ../requirements.txt\n  ```\n- Ensure that the Docker images are up and running before executing the Python files.\n- To run the producer, activate the virtual environment and then run the producer.py script.'