In [1]:
%pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [2]:
from dotenv import load_dotenv

In [3]:
load_dotenv()


True

In [4]:
from openai import OpenAI

openai_client = OpenAI()

In [5]:
import json

In [6]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [7]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x2b6030856a0>

In [8]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [9]:
def make_call(call):
    args = json.loads(call.arguments)
    f_name = call.name
    f = globals()[f_name]
    result = f(**args)
    result_json = json.dumps(result, indent=2)
    return {
        "type": "function_call_output",
        "call_id": call.call_id,
        "output": result_json,
    }

In [None]:
RAG - Retrieval Augmented Generation

In [10]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call. Use as many 
keywords from the user question as possible when making first requests.

Make multiple searches. Try to expand your search by using new keywords based on the results you
get from the search.

At the end, make a clarifying question based on what you presented and ask if there are 
other areas that the user wants to explore.
""".strip()

In [11]:
question = "I just discovered the course, can I join it now?"

In [12]:
chat_messages = [
    {"role": "developer", "content": developer_prompt},
    {"role": "user", "content": question}
]

In [13]:
while True:
    response = openai_client.responses.create(
        model='gpt-4o-mini',
        input=chat_messages,
        tools=[search_tool]
    )
    
    chat_messages.extend(response.output)

    has_function_calls = False
    
    for entry in response.output:
        if entry.type == 'message':
            print(entry.content[0].text)
        if entry.type == 'function_call':
            print(entry)
            result = make_call(entry)
            chat_messages.append(result)
            has_function_calls = True

    if has_function_calls == False:
        break

ResponseFunctionToolCall(arguments='{"query":"join course late enrollment"}', call_id='call_EmiHCApHVg8EFYqpbOkkBZ4f', name='search', type='function_call', id='fc_04252df827b592eb0068f6d9ed3b7481a19a176ca86dfa3ea1', status='completed')
ResponseFunctionToolCall(arguments='{"query":"join course after start date late enrollment eligibility"}', call_id='call_xlpbztcHHJlMvlaB4dcXYAEu', name='search', type='function_call', id='fc_04252df827b592eb0068f6d9ef0d2881a19a630c9bb1355c36', status='completed')
Yes, you can still join the course even if you've discovered it after it has started. You are eligible to submit homework without registering, although it's encouraged to do so for better tracking. There are deadlines for final projects that you'll need to keep in mind, so it's best not to procrastinate.

If you have any specific areas you are curious about, or if there’s anything else you'd like to know regarding course materials, project submissions, or deadlines, feel free to ask!


In [14]:
from toyaikit.llm import OpenAIClient
from toyaikit.tools import Tools
from toyaikit.chat import IPythonChatInterface
from toyaikit.chat.runners import OpenAIResponsesRunner
from toyaikit.chat.runners import DisplayingRunnerCallback

In [15]:
agent_tools = Tools()
agent_tools.add_tool(search, search_tool)

In [16]:
chat_interface = IPythonChatInterface()

runner = OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    llm_client=OpenAIClient()
)

In [17]:
callback = DisplayingRunnerCallback(chat_interface)

In [21]:
result = runner.loop(prompt='how do I install kafka', callback=callback)
messages = result.new_messages  # or result.messages - check the LoopResult attributes

In [22]:
new_messages = runner.loop(
    prompt='I want to use docker',
    previous_messages=messages,  # Use the messages list directly
    callback=callback,
)

In [23]:
messages = runner.run();

You: join course late enrollment


You: course deadlines final project assignments


You: end


You: end


You: stop


Chat ended.


In [24]:
from typing import List, Dict, Any

def search(query: str) -> List[Dict[str, Any]]:
    """
    Search the FAQ database for entries matching the given query.

    Args:
        query (str): Search query text to look up in the course FAQ.

    Returns:
        List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.
    """
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

def add_entry(question: str, answer: str) -> None:
    """
    Add a new entry to the FAQ database.

    Args:
        question (str): The question to be added to the FAQ database.
        answer (str): The corresponding answer to the question.
    """
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)

In [25]:
agent_tools = Tools()
agent_tools.add_tool(search)
agent_tools.add_tool(add_entry)

In [26]:
runner = OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    llm_client=OpenAIClient()
)

In [27]:
runner.run();

You: module 1 success tips


You: module 1 strategies for success


You: How do I do well in Module 1?


You: stop


Chat ended.


In [28]:
index.docs[-1]


{'text': 'Problem description\nInfrastructure created in AWS with CD-Deploy Action needs to be destroyed\nSolution description\nFrom local:\nterraform init -backend-config="key=mlops-zoomcamp-prod.tfstate" --reconfigure\nterraform destroy --var-file vars/prod.tfvars\nAdded by Erick Calderin',
 'section': 'Module 6: Best practices',
 'question': 'How to destroy infrastructure created via GitHub Actions',
 'course': 'mlops-zoomcamp'}

In [29]:
class SearchTools:

    def __init__(self, index):
        self.index = index

    def search(self, query: str) -> List[Dict[str, Any]]:
        """
        Search the FAQ database for entries matching the given query.
    
        Args:
            query (str): Search query text to look up in the course FAQ.
    
        Returns:
            List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.
        """
        boost = {'question': 3.0, 'section': 0.5}
    
        results = self.index.search(
            query=query,
            filter_dict={'course': 'data-engineering-zoomcamp'},
            boost_dict=boost,
            num_results=5,
            output_ids=True
        )
    
        return results

    def add_entry(self, question: str, answer: str) -> None:
        """
        Add a new entry to the FAQ database.
    
        Args:
            question (str): The question to be added to the FAQ database.
            answer (str): The corresponding answer to the question.
        """
        doc = {
            'question': question,
            'text': answer,
            'section': 'user added',
            'course': 'data-engineering-zoomcamp'
        }
        self.index.append(doc)

In [30]:
search_tools = SearchTools(index)

agent_tools = Tools()
agent_tools.add_tools(search_tools)

In [31]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'add_entry',
  'description': 'Add a new entry to the FAQ database.\n\nArgs:\n    question (str): The question to be added to the FAQ database.\n    answer (str): The corresponding answer to the question.',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'question parameter'},
    'answer': {'type': 'string', 'description': 'answer parameter'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database for entries matching the given query.\n\nArgs:\n    query (str): Search query text to look up in the course FAQ.\n\nReturns:\n    List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'query parameter'}},
   'required': ['query'],
   'additionalProperties': False}}]

In [32]:
question = input('You:')
print(question)

You: howdy


howdy


In [33]:
from agents import Agent, function_tool

In [34]:
tools = [
    function_tool(search_tools.search),
    function_tool(search_tools.add_entry),
    # add it here
]

In [35]:
from toyaikit.tools import wrap_instance_methods
tools = wrap_instance_methods(function_tool, search_tools)

In [36]:
agent = Agent(
    name="faq_agent",
    instructions=developer_prompt,
    tools=tools,
    model='gpt-4o-mini'
)

In [37]:
from toyaikit.chat.runners import OpenAIAgentsSDKRunner

In [38]:
runner = OpenAIAgentsSDKRunner(
    chat_interface=chat_interface,
    agent=agent
)

In [39]:
await runner.run();

You: how to do well in module 1


You: stop


Chat ended.


In [None]:
Pydantic AI

In [40]:
from pydantic_ai import Agent

tools = [
    search_tools.search,
    search_tools.add_entry
]

tools

[<bound method SearchTools.search of <__main__.SearchTools object at 0x000002B603AA0D70>>,
 <bound method SearchTools.add_entry of <__main__.SearchTools object at 0x000002B603AA0D70>>]

In [42]:
agent = Agent(
    name="faq_agent",
    instructions=developer_prompt,
    tools=tools,
     model='gpt-4o-mini'
    
)

In [43]:
from toyaikit.chat.runners import PydanticAIRunner

In [44]:
runner = PydanticAIRunner(
    chat_interface=chat_interface,
    agent=agent
)

In [45]:
await runner.run()

You: module 1 success tips


You: module 1 docker terraform tips success


You: tips for success module 1 data engineering


You: module 1 homework assignments


You: how to succeed in data engineering course


You: stop


Chat ended.


In [None]:
MCP

In [None]:
agent <-> MCP server -> tool

In [46]:
from toyaikit.mcp import MCPClient, SubprocessMCPTransport

In [50]:
from toyaikit.mcp.client import MCPClient
from toyaikit.mcp.transports.sse import SSEMCPTransport

ModuleNotFoundError: No module named 'toyaikit.mcp.transports'

In [47]:
command = "uv run python main.py".split()
workdir = "mcp_faq"

client = MCPClient(
    transport=SubprocessMCPTransport(
        server_command=command,
        workdir=workdir
    )
)

In [51]:
client.start_server()

Started server with command: uv run python main.py


In [52]:
client.initialize()

Sending initialize request...
Initialize response: {'protocolVersion': '2024-11-05', 'capabilities': {'experimental': {}, 'prompts': {'listChanged': False}, 'resources': {'subscribe': False, 'listChanged': False}, 'tools': {'listChanged': True}}, 'serverInfo': {'name': 'Demo 🚀', 'version': '1.16.0'}}


{'protocolVersion': '2024-11-05',
 'capabilities': {'experimental': {},
  'prompts': {'listChanged': False},
  'resources': {'subscribe': False, 'listChanged': False},
  'tools': {'listChanged': True}},
 'serverInfo': {'name': 'Demo 🚀', 'version': '1.16.0'}}

In [53]:
client.initialized()

Sending initialized notification...
Handshake completed successfully


In [54]:
client.get_tools()

Retrieving available tools...
Available tools: ['add_entry', 'search']


[{'name': 'add_entry',
  'description': 'Add a new entry to the FAQ database.\n\nArgs:\n    question (str): The question to be added to the FAQ database.\n    answer (str): The corresponding answer to the question.',
  'inputSchema': {'properties': {'question': {'type': 'string'},
    'answer': {'type': 'string'}},
   'required': ['question', 'answer'],
   'type': 'object'},
  '_meta': {'_fastmcp': {'tags': []}}},
 {'name': 'search',
  'description': 'Search the FAQ database for entries matching the given query.\n\nArgs:\n    query (str): Search query text to look up in the course FAQ.\n\nReturns:\n    List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.',
  'inputSchema': {'properties': {'query': {'type': 'string'}},
   'required': ['query'],
   'type': 'object'},
  'outputSchema': {'properties': {'result': {'items': {'additionalProperties': True,
      'type': 'object'},
     'type': 'array'}},
   'required': ['result'],
   'type': 'object',
   'x

In [55]:
result = client.call_tool('search', {'query': 'how do I run docker?'})

Calling tool 'search' with arguments: {'query': 'how do I run docker?'}


In [56]:
from toyaikit.mcp import MCPTools

In [57]:
mcp_tools = MCPTools(client)

In [58]:
runner = OpenAIResponsesRunner(
    tools=mcp_tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    llm_client=OpenAIClient(model='gpt-4o-mini')
)

In [59]:
runner.run();


You: install Kafka


Retrieving available tools...
Available tools: ['add_entry', 'search']
Calling tool 'search' with arguments: {'query': 'install Kafka'}


Calling tool 'search' with arguments: {'query': 'install Apache Kafka'}


You: Kafka installation guide


Calling tool 'add_entry' with arguments: {'question': 'Kafka installation guide', 'answer': '### Apache Kafka Installation Guide\n\n#### Prerequisites:\n1. **Java Installation**:\n   - Kafka requires Java 8 or 11. Check your Java version with:\n     ```bash\n     java --version\n     ```\n   - If you need to set the default Java version, you can run:\n     ```bash\n     export JAVA_HOME=$(/usr/libexec/java_home -v 11.0.x)  # Use the Java version you have installed\n     ```\n\n#### Download Kafka:\n2. **Download the Kafka Distribution**:\n   - Visit the [Apache Kafka downloads page](https://kafka.apache.org/downloads) to get the latest version.\n   - Use the following command to download (update the URL according to the latest version):\n     ```bash\n     wget https://downloads.apache.org/kafka/2.8.0/kafka_2.13-2.8.0.tgz\n     ```\n\n#### Extract Kafka:\n3. **Unzip the Downloaded Files**:\n   - After downloading, extract the contents:\n     ```bash\n     tar -xzf kafka_2.13-2.8.0.tgz\

IndexError: list index out of range

In [60]:
from pydantic_ai.mcp import MCPServerSSE

faq_mcp_client = MCPServerSSE(
    url='http://localhost:8000/sse'
)

In [62]:
agent = Agent(
    name="faq_agent",
    instructions=developer_prompt,
    model='gpt-4o-mini',
    toolsets=[faq_mcp_client]
)

In [63]:
runner = PydanticAIRunner(
    chat_interface=chat_interface,
    agent=agent
)

In [65]:
await runner.run();


You: how do i install kafka python


  + Exception Group Traceback (most recent call last):
  |   File "C:\Users\ryahj\CascadeProjects\ai-bootcamp\ai-bootcamp\.venv\Lib\site-packages\IPython\core\interactiveshell.py", line 3697, in run_code
  |     await eval(code_obj, self.user_global_ns, self.user_ns)
  |   File "C:\Users\ryahj\AppData\Local\Temp\ipykernel_28068\411691516.py", line 1, in <module>
  |     await runner.run();
  |     ^^^^^^^^^^^^^^^^^^
  |   File "C:\Users\ryahj\CascadeProjects\ai-bootcamp\ai-bootcamp\.venv\Lib\site-packages\toyaikit\chat\runners.py", line 324, in run
  |     result = await self.agent.run(
  |              ^^^^^^^^^^^^^^^^^^^^^
  |         user_prompt=user_input, message_history=message_history
  |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |     )
  |     ^
  |   File "C:\Users\ryahj\CascadeProjects\ai-bootcamp\ai-bootcamp\.venv\Lib\site-packages\pydantic_ai\agent\abstract.py", line 222, in run
  |     async with self.iter(
  |                ~~~~~~~~~^
  |        