In [1]:
import os
os.environ["SSL_CERT_FILE"] = "Fortinet_CA_SSL(15).cer"


In [2]:
pip install minsearch --trusted-host pypi.org --trusted-host files.pythonhosted.org

Note: you may need to restart the kernel to use updated packages.


In [3]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [4]:
documents[:3]

[{'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.",
  'section': 'General course-related questions',
  'question': 'Course - When will the course start?',
  'course': 'data-engineering-zoomcamp'},
 {'text': 'GitHub - DataTalksClub data-engineering-zoomcamp#prerequisites',
  'section': 'General course-related questions',
  'question': 'Course - What are the prerequisites for this course?',
  'course': 'data-engineering-zoomcamp'},
 {'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines 

# The RAG flow

## Search

In [5]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"], 
    keyword_fields=["course"]
)
index.fit(documents)

<minsearch.append.AppendableIndex at 0x7f81c9ea7b00>

In [6]:
index.search('how I can study the course')

[{'text': "Yes! We'll cover some linear algebra in the course, but in general, there will be very few formulas, mostly code.\nHere are some interesting videos covering linear algebra that you can already watch: ML Zoomcamp 1.8 - Linear Algebra Refresher from Alexey Grigorev or the excellent playlist from 3Blue1Brown Vectors | Chapter 1, Essence of linear algebra. Never hesitate to ask the community for help if you have any question.\n(Mélanie Fouesnard)",
  'section': 'General course-related questions',
  'question': "I don't know math. Can I take the course?",
  'course': 'machine-learning-zoomcamp'},
 {'text': "No, you can only get a certificate if you finish the course with a “live” cohort. We don't award certificates for the self-paced mode. The reason is you need to peer-review capstone(s) after submitting a project. You can only peer-review projects at the time the course is running.",
  'section': 'General course-related questions',
  'question': 'Certificate - Can I follow the 

In [7]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [8]:
search("How can I Join the course")

[{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
  'section': 'General course-related questions',
  'question': 'Course - Can I still join the course after the start date?',
  'course': 'data-engineering-zoomcamp',
  '_id': 2},
 {'text': "No, you can only get a certificate if you finish the course with a “live” cohort. We don't award certificates for the self-paced mode. The reason is you need to peer-review capstone(s) after submitting a project. You can only peer-review projects at the time the course is running.",
  'section': 'General course-related questions',
  'question': 'Certificate - Can I follow the course in a self-paced mode and get a certificate?',
  'course': 'data-engineering-zoomcamp',
  '_id': 11},
 {'text': 'Yes, we will keep all the materials after the course finishes, so you can follow the cou

In [9]:
question="Can I still join the course?"

## Prompt

In [10]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

In [11]:
def build_prompt(query, search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [12]:
search_results = search(question)
prompt = build_prompt(question, search_results)

In [13]:
print(prompt)

You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
Can I still join the course?
</QUESTION>

<CONTEXT>
section: General course-related questions
question: Course - Can I still join the course after the start date?
answer: Yes, even if you don't register, you're still eligible to submit the homeworks.
Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

section: General course-related questions
question: Certificate - Can I follow the course in a self-paced mode and get a certificate?
answer: No, you can only get a certificate if you finish the course with a “live” cohort. We don't award certificates for the self-paced mode. The reason is you need to peer-review capstone(s) after submitting a project. You can only peer-review projects at the time the course is running.

section: General

## LLM

In [14]:
from mistralai import Mistral
from mistralai.models import UserMessage
from dotenv import load_dotenv


In [15]:
load_dotenv()

True

In [16]:
api_key = os.getenv("API_KEY")

In [17]:
client = Mistral(api_key = api_key )

In [18]:
def llm(prompt):
    response = client.chat.complete(
        model = "mistral-medium-latest",
        messages = [UserMessage(content=prompt)]
    )
    return response.choices[0].message.content
    

## Put all togather 

- Search
- Prompt
- LLM

In [19]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [20]:
rag(question)

"Yes, you can still join the course even after the start date. You don't need to register to submit homeworks, but be aware of the deadlines for turning in the final projects. Don't leave everything for the last minute."

In [21]:
rag("How I can use chatgpt")

'The provided context does not contain any information about how to use ChatGPT. Therefore, I cannot answer your question based on the given context.'

In [22]:
print(llm("How I can use chatgpt"))

ChatGPT is a versatile AI tool that can assist you in various ways, from answering questions to generating creative content. Here’s how you can use it effectively:

### **1. Basic Usage**
- **Ask Questions**: Type your question or request in the chatbox, and ChatGPT will respond with relevant information.
  *Example*: "What is the capital of France?" or "Explain quantum computing in simple terms."

- **Conversational Mode**: Engage in a back-and-forth dialogue for more detailed or follow-up questions.

### **2. Creative Writing & Content Generation**
- **Storytelling**: Ask for a short story, poem, or even a script.
  *Example*: "Write a sci-fi story about a robot discovering emotions."

- **Blog Posts & Articles**: Request outlines, drafts, or full articles on a topic.
  *Example*: "Write a blog post about the benefits of meditation."

- **Social Media Content**: Get help with captions, tweets, or LinkedIn posts.
  *Example*: "Write a catchy Instagram caption for a travel photo."

###

### RAG is good when have a knowledge base, but if the data not in our DB it is recommended to use another tool such as Agentic RAG which can use both our database or extraernal knowledge 

# Part 1: Agentic RAG

In [23]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
{question}
</QUESTION>

<CONTEXT> 
{context}
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}
""".strip()

In [24]:
question = "how do I run docker on gentoo?"
context = "EMPTY"

prompt = prompt_template.format(question=question, context=context)
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
how do I run docker on gentoo?
</QUESTION>

<CONTEXT> 
EMPTY
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}

If you can answer the QUESTION using CONTEXT, use this template:

{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}

If the context doesn't contain the answer, use your own knowledge to answer the question

{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}


In [25]:
answer = llm(prompt)
print(answer)

```json
{
"action": "SEARCH",
"reasoning": "The context provided is empty, and the question requires specific steps or instructions on how to run Docker on Gentoo. Since this information is not available in the context, I need to search the FAQ database for relevant details on installing and running Docker on Gentoo Linux."
}
```


In [26]:
question = "how do I join the course?"
context = "EMPTY"

prompt = prompt_template.format(question=question, context=context)
answer = llm(prompt)
print(answer)

```json
{
"action": "SEARCH",
"reasoning": "The context provided is empty, and the question pertains to course enrollment procedures, which are likely detailed in the FAQ database. Therefore, I need to search the FAQ database for the relevant information."
}
```


In [27]:
def build_context(search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

    return context.strip()

In [28]:
search_results = search(question)
context = build_context(search_results)
prompt = prompt_template.format(question=question, context=context)
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
how do I join the course?
</QUESTION>

<CONTEXT> 
section: General course-related questions
question: Course - Can I still join the course after the start date?
answer: Yes, even if you don't register, you're still eligible to submit the homeworks.
Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

section: General course-related questions
question: Course - When will the course start?
answer: The purpose of this document is to capture frequently asked technical questions
The exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1
Subscribe to course public Google Calendar (it works from Desktop only).
Register before the course start

In [29]:
answer = llm(prompt)

In [30]:
print(answer)

```json
{
"action": "ANSWER",
"answer": "To join the course, you need to register before the course starts using the provided registration link. Additionally, you should subscribe to the course's public Google Calendar, join the course Telegram channel for announcements, and register in DataTalks.Club's Slack to join the relevant channel.",
"source": "CONTEXT"
}
```


# Part 2: Agentic search

In [31]:
def dedup(seq):
    seen = set()
    result = []
    for el in seq:
        _id = el['_id']
        if _id in seen:
            continue
        seen.add(_id)
        result.append(el)
    return result

search_results = dedup(search_results)

In [32]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than {max_iterations} iterations for a given student question.
The current iteration number: {iteration_number}. If we exceed the allowed number 
of iterations, give the best possible answer with the provided information.

Output templates:

If you want to perform search, use this template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>",
"keywords": ["search query 1", "search query 2", ...]
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER_CONTEXT",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}

<QUESTION>
{question}
</QUESTION>

<SEARCH_QUERIES>
{search_queries}
</SEARCH_QUERIES>

<CONTEXT> 
{context}
</CONTEXT>

<PREVIOUS_ACTIONS>
{previous_actions}
</PREVIOUS_ACTIONS>
""".strip()

In [33]:
question = 'How do I do well on module 1'
max_iterations = 3
iteration_number = 0 
search_queries = []
search_results = []
previous_actions = []

In [34]:
context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=3,
    iteration_number=1
)
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current iteration number

In [35]:
import json

In [36]:
    def extract_json(text):
        """
        Extracts the JSON string from a string that may include ```json ``` markers.

        Args:
            text: The input string.

        Returns:
            The extracted JSON string, or None if no JSON is found.
        """
        import re
        match = re.search(r"```json\s*([\s\S]*?)\s*```", text)
        if match:
            return match.group(1)
        else:
            return None

    text_with_json = "Here is some text. ```json {\"key\": \"value\"} ``` And some more text."
    json_string = extract_json(text_with_json)

    if json_string:
        print(f"Extracted JSON: {json_string}")
    else:
        print("No JSON found in the text.")

Extracted JSON: {"key": "value"}


In [37]:
raw_answer = llm(prompt)

In [38]:
answer_json = extract_json(raw_answer)

In [39]:
answer = json.loads(answer_json)

In [40]:
previous_actions.append(answer)

In [41]:
previous_actions

[{'action': 'SEARCH',
  'reasoning': 'The question asks for advice on performing well in module 1 of a course. Since the context is currently empty, I need to gather relevant information about module 1, including its content, structure, and any specific tips or guidelines provided for success.',
  'keywords': ['module 1 overview',
   'tips for success in module 1',
   'module 1 content',
   'how to excel in module 1']}]

In [42]:
keywords = answer['keywords']

In [43]:
for kw in keywords:
    search_queries.append(kw)
    sr=search(kw)
    search_results.extend(sr)

In [44]:
search_results = dedup(search_results)

In [45]:
iteration_number = 2
context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=3,
    iteration_number=1
)
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current iteration number

In [46]:
answer_json = extract_json(llm(prompt))

In [47]:
print(answer_json)

{
"action": "SEARCH",
"reasoning": "The current context does not provide specific advice on how to do well in Module 1. To address the student's question, I need to search for more detailed information about the structure, content, and best practices for succeeding in Module 1.",
"keywords": ["Module 1 study guide", "key topics in Module 1", "best practices for Module 1", "Module 1 success strategies"]
}


In [52]:
question = "what do I need to do to be successful at module 1?"

search_queries = []
search_results = []
previous_actions = []


iteration = 0

while True:
    print(f'ITERATION #{iteration}...')

    context = build_context(search_results)
    prompt = prompt_template.format(
        question=question,
        context=context,
        search_queries="\n".join(search_queries),
        previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
        max_iterations=3,
        iteration_number=iteration
    )

    print(prompt)

    answer_json = extract_json(llm(prompt))
    answer = json.loads(answer_json)
    print(json.dumps(answer, indent=2))

    previous_actions.append(answer)

    action = answer['action']
    if action != 'SEARCH':
        break

    keywords = answer['keywords']
    search_queries = list(set(search_queries) | set(keywords))
    
    for k in keywords:
        res = search(k)
        search_results.extend(res)

    search_results = dedup(search_results)
    
    iteration = iteration + 1
    if iteration >= 4:
        break

    print()

ITERATION #0...
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current 

JSONDecodeError: Invalid control character at: line 3 column 179 (char 200)

In [53]:
iteration

1

### Same code as before add all in function

In [54]:
def agentic_search(question):
    search_queries = []
    search_results = []
    previous_actions = []

    iteration = 0
    
    while True:
        print(f'ITERATION #{iteration}...')
    
        context = build_context(search_results)
        prompt = prompt_template.format(
            question=question,
            context=context,
            search_queries="\n".join(search_queries),
            previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
            max_iterations=3,
            iteration_number=iteration
        )
    
        print(prompt)
    
        answer_json = extract_json(llm(prompt))
        answer = json.loads(answer_json)
        print(json.dumps(answer, indent=2))

        previous_actions.append(answer)
    
        action = answer['action']
        if action != 'SEARCH':
            break
    
        keywords = answer['keywords']
        search_queries = list(set(search_queries) | set(keywords))

        for k in keywords:
            res = search(k)
            search_results.extend(res)
    
        search_results = dedup(search_results)
        
        iteration = iteration + 1
        if iteration >= 4:
            break
    
        print()

    return answer

In [55]:
agentic_search('how do I prepare for the course?')

ITERATION #0...
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current 

{'action': 'ANSWER_CONTEXT',
 'answer': "To prepare for the course, you should:\n\n1. Install and set up all the required dependencies and tools:\n   - Google Cloud account\n   - Google Cloud SDK\n   - Python 3 (preferably version 3.9 for compatibility, though 3.10 and 3.11 should work)\n   - Terraform\n   - Git\n\n2. Review the course prerequisites and syllabus to ensure you're comfortable with the topics covered.\n\n3. Familiarize yourself with Git and GitHub by:\n   - Creating a GitHub account\n   - Cloning the course repository\n   - Learning how to set up your own repositories\n   - Understanding how to use .gitignore for sensitive files\n\n4. Register for the course using the provided link before it starts on January 15, 2024.\n\n5. Join the course communication channels:\n   - Subscribe to the public Google Calendar\n   - Join the Telegram channel for announcements\n   - Register in DataTalks.Club's Slack and join the relevant channel\n\n6. Consider looking over the course mater

# Part 3: Function calling

In [56]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [63]:
search_tool = {
    "type": "function",
    "function": {           
        "name": "search",
        "description": "Search the FAQ database",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "Search query text to look up in the course FAQ."
                }
            },
            "required": ["query"],
            "additionalProperties": False
        }
    }
}



In [72]:
question = "How do I do well in module 1?"

developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.
""".strip()

tools = [search_tool]

chat_messages = [
    {"role": "system", "content": developer_prompt},
    {"role": "user", "content": question}
]

response = client.chat.complete(
    model = "mistral-medium-latest",
    messages = chat_messages, 
    tools = tools,
    tool_choice = "any", 
    parallel_tool_calls = False,
)
response.choices



[ChatCompletionChoice(index=0, message=AssistantMessage(content='', tool_calls=[ToolCall(function=FunctionCall(name='search', arguments='{"query": "how to do well in module 1"}'), id='QPYKnZpVA', type=None, index=0)], prefix=False, role='assistant'), finish_reason='tool_calls')]

In [73]:
calls = response.choices

In [75]:
call = calls[0]

In [76]:
call

ChatCompletionChoice(index=0, message=AssistantMessage(content='', tool_calls=[ToolCall(function=FunctionCall(name='search', arguments='{"query": "how to do well in module 1"}'), id='QPYKnZpVA', type=None, index=0)], prefix=False, role='assistant'), finish_reason='tool_calls')

In [87]:
f_name = call.message.tool_calls[0].function.name


In [88]:
arguments = json.loads(call.message.tool_calls[0].function.arguments)

In [89]:
arguments

{'query': 'how to do well in module 1'}

In [91]:
globals()['search']

<function __main__.search(query)>

In [92]:
globals()['search_tool']

{'type': 'function',
 'function': {'name': 'search',
  'description': 'Search the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'Search query text to look up in the course FAQ.'}},
   'required': ['query'],
   'additionalProperties': False}}}

In [93]:
f = globals()[f_name]

In [108]:
search_results = f(**arguments)

In [96]:
results

[{'text': 'Even after installing pyspark correctly on linux machine (VM ) as per course instructions, faced a module not found error in jupyter notebook .\nThe solution which worked for me(use following in jupyter notebook) :\n!pip install findspark\nimport findspark\nfindspark.init()\nThereafter , import pyspark and create spark contex<<t as usual\nNone of the solutions above worked for me till I ran !pip3 install pyspark instead !pip install pyspark.\nFilter based on conditions based on multiple columns\nfrom pyspark.sql.functions import col\nnew_final.filter((new_final.a_zone=="Murray Hill") & (new_final.b_zone=="Midwood")).show()\nKrishna Anand',
  'section': 'Module 5: pyspark',
  'question': 'Module Not Found Error in Jupyter Notebook .',
  'course': 'data-engineering-zoomcamp',
  '_id': 322},
 {'text': 'You need to look for the Py4J file and note the version of the filename. Once you know the version, you can update the export command accordingly, this is how you check yours:\n`

In [114]:
search_results = json.dumps(results, indent=2)
print(search_results)

[
  {
    "text": "Even after installing pyspark correctly on linux machine (VM ) as per course instructions, faced a module not found error in jupyter notebook .\nThe solution which worked for me(use following in jupyter notebook) :\n!pip install findspark\nimport findspark\nfindspark.init()\nThereafter , import pyspark and create spark contex<<t as usual\nNone of the solutions above worked for me till I ran !pip3 install pyspark instead !pip install pyspark.\nFilter based on conditions based on multiple columns\nfrom pyspark.sql.functions import col\nnew_final.filter((new_final.a_zone==\"Murray Hill\") & (new_final.b_zone==\"Midwood\")).show()\nKrishna Anand",
    "section": "Module 5: pyspark",
    "question": "Module Not Found Error in Jupyter Notebook .",
    "course": "data-engineering-zoomcamp",
    "_id": 322
  },
  {
    "text": "You need to look for the Py4J file and note the version of the filename. Once you know the version, you can update the export command accordingly, th

In [119]:
tool_call = call.message.tool_calls[0]

chat_messages.append({
    "role": "tool",                
    "tool_call_id": tool_call.id,    
    "name": tool_call.function.name,
    "content": search_results
})


In [122]:
# response = client.chat.complete(
#     model = "mistral-medium-latest",
#     messages = chat_messages, 
#     tools = tools,
#     tool_choice = "any", 
#     parallel_tool_calls = False,
# )
# response.choices
