In [56]:
%pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [57]:
from dotenv import load_dotenv

In [58]:
load_dotenv()

True

In [59]:
from openai import OpenAI

openai_client = OpenAI()

In [60]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [61]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x262d3d839d0>

In [62]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [63]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

In [64]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [65]:
instructions = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call
""".strip()

In [66]:
question = 'I just discovered the course. Can I still join it?'





In [67]:
from toyaikit.llm import OpenAIClient
from toyaikit.chat import IPythonChatInterface
from toyaikit.chat.runners import OpenAIResponsesRunner
from toyaikit.chat.runners import DisplayingRunnerCallback
from toyaikit.tools import Tools


In [68]:
agent_tools = Tools()
agent_tools.add_tool(search, search_tool)

In [69]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'Search query text to look up in the course FAQ.'}},
   'required': ['query'],
   'additionalProperties': False}}]

In [70]:
chat_interface = IPythonChatInterface()

runner = OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=instructions,
    chat_interface=chat_interface,
    llm_client=OpenAIClient()
)

In [72]:
callback = DisplayingRunnerCallback(chat_interface)

In [73]:
results = runner.loop(
    prompt=question,
    callback=callback
    
)

In [76]:
results.cost

CostInfo(input_cost=0.00012555, output_cost=7.68e-05, total_cost=0.00020234999999999999)

In [71]:
runner.run();

You: I just discovered the course. Can I still join it?


You: stop


Chat ended.


In [75]:
!uv add toyaikit==0.0.5

[2mResolved [1m153 packages[0m [2min 1.70s[0m[0m
[2mAudited [1m133 packages[0m [2min 506ms[0m[0m


In [78]:
def add_entry(question, answer):
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)




In [79]:
add_entry_tool = {
    "type": "function",
    "name": "add_entry",
    "description": "Add an entry to the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question to be added to the FAQ database",
            },
            "answer": {
                "type": "string",
                "description": "The answer to the question",
            }
        },
        "required": ["question", "answer"],
        "additionalProperties": False
    }
}

In [81]:
agent_tools.add_tool(add_entry, add_entry_tool)

In [83]:
runner.run();

You: how do I do well in module 1?


You: save it back to FAQ


You: stop


Chat ended.


In [84]:
index.docs[-1]

{'question': 'How do I do well in Module 1?',
 'text': 'To excel in Module 1, which covers Docker and Terraform, follow these tips: 1. **Familiarize Yourself with Concepts**: Understand Docker (containers, images, Dockerfile) and Terraform (infrastructure as code). Complete all readings and videos. 2. **Hands-On Practice**: Set up your environment and work through examples. Create your own Docker containers and Terraform configurations. 3. **Solve Common Errors**: Pay attention to troubleshooting steps for errors in SQLAlchemy and psycopg2, applying solutions shared by peers. 4. **Engage with Peers**: Join forums or study groups to share resources and ask questions. 5. **Utilize Help Resources**: Reach out to instructors or teaching assistants for clarification. 6. **Stay Organized**: Track assignments and deadlines to manage your workload.',
 'section': 'user added',
 'course': 'data-engineering-zoomcamp'}

In [95]:
from typing import Any, Dict, List


class SearchTools:

    def __init__(self, index):
        self.index = index
        
    def search(self, query: str) -> List[Dict[str, Any]]:
            """
            Search the FAQ database for entries matching the given query.
        
            Args:
                query (str): Search query text to look up in the course FAQ.
        
            Returns:
                List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.
            """
            boost = {'question': 3.0, 'section': 0.5}
        
            results = self.index.search(
                query=query,
                filter_dict={'course': 'data-engineering-zoomcamp'},
                boost_dict=boost,
                num_results=5,
                output_ids=True
            )
        
            return results
    
    def add_entry(self, question: str, answer: str) -> None:
            """
            Add a new entry to the FAQ database.
        
            Args:
                question (str): The question to be added to the FAQ database.
                answer (str): The corresponding answer to the question.
            """
            doc = {
                'question': question,
                'text': answer,
                'section': 'user added',
                'course': 'data-engineering-zoomcamp'
            }
            self.index.append(doc)


In [96]:
agent_tools = Tools()

In [97]:
search_tools = SearchTools(index)
agent_tools.add_tools(search_tools)

In [98]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'add_entry',
  'description': 'Add a new entry to the FAQ database.\n\nArgs:\n    question (str): The question to be added to the FAQ database.\n    answer (str): The corresponding answer to the question.',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'question parameter'},
    'answer': {'type': 'string', 'description': 'answer parameter'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database for entries matching the given query.\n\nArgs:\n    query (str): Search query text to look up in the course FAQ.\n\nReturns:\n    List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'query parameter'}},
   'required': ['query'],
   'additionalProperties': False}}]