In [1]:
from openai import OpenAI
openai_client = OpenAI()

In [2]:
import requests

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

In [3]:
documents = []

In [4]:
for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)
        

In [5]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x7400994c3e90>

In [6]:
def search(query):
    boost = {'question':3.0, 'section':0.5}

    results = index.search(
        query=query,
        filter_dict={'course':'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )
    return results
    

In [8]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters":{
        "type": "object",
        "properties":{
            "query":{
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties":False
    }
}

In [24]:
def add_entry(question, answer):
    doc = {
        'question':question,
        'text':answer,
        'section':'user added',
        'course':'data-engineering-zoomcamp'
    }
    index.append(doc)

In [25]:
add_entry_tool= {
    "type":"function",
    "name":"add_entry",
    "description": "Add an entry to the FAQ database",
    "parameters":{
        "type": "object",
        "properties":{
            "question":{
                "type":"string",
                "description": "The question to be added to the FAQ database",
            },
            "answer":{
                "type": "string",
                "description": "The answer to the question",
            }
        },
        "required":["question","answer"],
        "additionalProperties":False
    }
}

In [7]:
def make_call(call):
    args = json.loads(call.arguments)
    f_name = call.name

    if f_name == 'search':
        result = search(**args)
    else:
        raise Error (f'unknown function {f_name}')
        
    return {
        "type": "function_call_output",
        "call_id": call.call_id,
        "output": json.dumps(result),
    }

In [9]:
#tools = [search_tool]

In [10]:
#chat_messages=[]

In [6]:
instructions = """
You're a course teaching assistant. You're given a question from a course student and your task is to answer it.
IF you want to look up the answer, explain why before making the call.
""".strip()

In [7]:
#question = 'I just discovered the course. Can I still join it?'
question = 'How much time does the course take per week?'





In [8]:
from toyaikit.llm import OpenAIClient
from toyaikit.chat import IPythonChatInterface
from toyaikit.chat.runners import OpenAIResponsesRunner
from toyaikit.chat.runners import DisplayingRunnerCallback
from toyaikit.tools import Tools

In [14]:
agent_tools = Tools()
agent_tools.add_tool(search,search_tool)

In [26]:
agent_tools.add_tool(add_entry,add_entry_tool)

In [27]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'Search query text to look up in the course FAQ.'}},
   'required': ['query'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'add_entry',
  'description': 'Add an entry to the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'The question to be added to the FAQ database'},
    'answer': {'type': 'string', 'description': 'The answer to the question'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}}]

In [16]:
chat_interface = IPythonChatInterface()

runner=OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=instructions,
    chat_interface=chat_interface,
    llm_client=OpenAIClient()
)

In [28]:
runner.run();

You: how do I do well in module 1?


You: save it back to FAQ


You: stop


Chat ended.


In [29]:
index.docs[-1]

{'question': 'How do I do well in Module 1?',
 'text': "1. **Familiarize Yourself with Tools**: Understand the tools you'll be using (like Docker, Terraform, and any databases) and ensure they are properly set up. Pay attention to any installation issues others have faced and the suggested solutions.\n\n2. **Follow the Course Materials**: Carefully read through all the course materials and follow the outlined steps. Don’t skip any instructions as they often build upon each other.\n\n3. **Practice Coding**: If programming is required, practice coding outside of the course examples to reinforce your understanding.\n\n4. **Participate in Discussions**: Engage with your peers in discussion forums. They can provide insights and share their experiences, which may help you understand complex concepts better.\n\n5. **Seek Help When Stuck**: If you encounter errors (like the `ModuleNotFoundError` for packages), take the time to research the error or ask for help in class forums.\n\n6. **Schedul

In [18]:
callback = DisplayingRunnerCallback(chat_interface)

In [19]:
results = runner.loop(
    prompt=question,
    callback=callback
)

In [21]:
results.cost

CostInfo(input_cost=0.00015015, output_cost=6.78e-05, total_cost=0.00021794999999999999)

In [9]:
from typing import Any, Dict, List

class SearchTools:

    def __init__(self, index):
        self.index=index

    def search(self, query: str) -> List[Dict[str, Any]]:
        """
        Search for documents related to the given query within the 'data-engineering-zoomcamp' course.
    
        This function performs a search on a predefined index, applying a custom boost to certain
        fields to prioritize matches in the 'question' field over others. It limits results to
        the top 5 most relevant entries.
    
        Args:
            query (str): The search query string to look for.
    
        Returns:
            List[Dict[str, Any]]: A list of dictionaries representing the top search results.
        """
        boost = {'question': 3.0, 'section': 0.5}
    
        results = self.index.search(
            query=query,
            filter_dict={'course': 'data-engineering-zoomcamp'},
            boost_dict=boost,
            num_results=5,
        )
        return results


    def add_entry(self, question: str, answer: str) -> None:
        """
        Add a new question-and-answer pair to the index for the 'data-engineering-zoomcamp' course.
    
        This function appends a document containing a user-provided question and answer
        to the index, tagging it as 'user added' for traceability.
    
        Args:
            question (str): The user's question to be added to the index.
            answer (str): The corresponding answer or explanation text.
    
        Returns:
            None
        """
        doc = {
            'question': question,
            'text': answer,
            'section': 'user added',
            'course': 'data-engineering-zoomcamp'
        }
        self.index.append(doc)


In [10]:
agent_tools = Tools()

In [12]:
search_tools = SearchTools(index)
agent_tools.add_tools(search_tools)

In [14]:
#agent_tools.add_tool(search)

In [15]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'add_entry',
  'description': "Add a new question-and-answer pair to the index for the 'data-engineering-zoomcamp' course.\n\nThis function appends a document containing a user-provided question and answer\nto the index, tagging it as 'user added' for traceability.\n\nArgs:\n    question (str): The user's question to be added to the index.\n    answer (str): The corresponding answer or explanation text.\n\nReturns:\n    None",
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'question parameter'},
    'answer': {'type': 'string', 'description': 'answer parameter'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'search',
  'description': "Search for documents related to the given query within the 'data-engineering-zoomcamp' course.\n\nThis function performs a search on a predefined index, applying a custom boost to certain\nfields to priori