In [1]:
%pip install --quiet langchain langchain_community langgraph langchain_openai langgraph-supervisor tavily-python httpx


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [16]:
# Set API Keys
import os
os.environ["OPENAI_API_KEY"] = "sk-proj-your-openai-api-key"
os.environ["TAVILY_API_KEY"] = "tvly-dev-your-tavily-api-key"

In [3]:
import httpx
import json
import os

from typing import Dict, Any
from datetime import datetime

from tavily import TavilyClient
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

In [4]:
llm = ChatOpenAI(api_key=os.environ["OPENAI_API_KEY"], model="gpt-4o-mini", temperature=0.1)
tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])


In [5]:
@tool
def extract_documentation_from_website(url: str) -> str:
    """
    Extract documentation content from a website URL.
    This tool helps the agent to extract documentation content from a website URL and learn about the API endpoints and parameters from the website.
    """
    try:
        content_response = tavily_client.extract(urls=url)

        if content_response and len(content_response) > 0:
            return f"Documentation extracted from {url}:\n\n{content_response}"
       
        else:
            return f"Could not extract content from {url}. Please check the URL and try again."
            
    except Exception as e:
        return f"Error extracting documentation from {url}: {str(e)}"

In [6]:
#FIXME: This tool is not parsing as expected, it's not returning the correct JSON schema.
@tool
def create_documentation_summary(content: str) -> str:
    """
    Create a summary of the documentation content.
    This tool helps the agent to create a summary of the documentation content to learn about the API endpoints and parameters from the website.
    It cleans the content to remove all the noise and keep only the most important information.
    Focus on the API endpoints and parameters usage.
    """
    json_schema = {
        "endpoints": [
            {
                "name": "endpoint_name",    
                "description": "endpoint_description",
                "http_request": "GET" or "POST" or "PUT" or "DELETE",
                "url": "endpoint_url",
                "attributes": [
                    {
                        "name": "parameter_name",
                        "description": "parameter_description",
                    }   
                ]
            }
        ]
    }

    system_prompt_summary_agent = f"""
    <CONTEXT>
    You are an expert in the creation of API documentation summaries.
    You are given a raw documentation content and you need to create a summary of the documentation that will be used by another agent to learn about the API endpoints and parameters 
    that are necessary to make API calls.
    </CONTEXT>
    
    Return the summary in the following JSON format.
    Do not lose any information and be very specific, your summary must be based only on the content provided.
    """
    
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt_summary_agent),
        ("user", "Here is the content to be summarized: {content}")
    ])
    structured_llm = llm.with_structured_output(json_schema)

    response = structured_llm.invoke(prompt.invoke({"content": content}))
    
    return response

In [7]:
@tool
def fetch_api_data(endpoint: str, parameters: Dict[str, Any] = None) -> str:
    """
    Fetch data from an API endpoint using the learned parameters.
    This tool performs HTTP requests to API endpoints after the agent
    has learned how to use them from documentation.
    
    Args:
        endpoint (str): The API endpoint URL to fetch data from
        parameters (Dict[str, Any]): Query parameters to include in the request
        
    Returns:
        str: The API response data as a formatted string
    """
    try:
        # Build the URL with parameters
        if parameters:
            # Convert parameters to query string
            param_strings = []
            for key, value in parameters.items():
                param_strings.append(f"{key}={value}")
            if param_strings:
                endpoint += "?" + "&".join(param_strings)
        
        # Make the HTTP request
        with httpx.Client() as client:
            response = client.get(endpoint, timeout=30.0)
            response.raise_for_status()
            
            try:
                data = response.json()
                return f"API Response from {endpoint}:\n\n{json.dumps(data, indent=2)}"
            except json.JSONDecodeError:
                return f"API Response from {endpoint}:\n\n{response.text}"
                
    except httpx.HTTPStatusError as e:
        return f"HTTP Error {e.response.status_code} when fetching {endpoint}: {e.response.text}"
    except httpx.TimeoutException:
        return f"Timeout error when fetching {endpoint}. The request took too long."
    except Exception as e:
        return f"Error fetching data from {endpoint}: {str(e)}"


In [8]:
MEMORY_FILE = "memory_scratchpad_docs.json"

def initialize_memory_file():
    """Initialize the memory file if it doesn't exist"""
    if not os.path.exists(MEMORY_FILE):
        initial_memory = {
            "created_at": datetime.now().isoformat(),
            "last_updated": datetime.now().isoformat(),
            "documentations": {}
        }
        with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
            json.dump(initial_memory, f, indent=2, ensure_ascii=False)
        print(f"Initialized memory file: {MEMORY_FILE}")

def load_memory() -> Dict[str, Any]:
    """Load the memory file"""
    try:
        with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
            return json.load(f)
    except FileNotFoundError:
        initialize_memory_file()
        return load_memory()
    except Exception as e:
        print(f"Error loading memory: {e}")
        return {"documentations": {}}

def save_memory(memory_data: Dict[str, Any]):
    """Save the memory file"""
    try:
        memory_data["last_updated"] = datetime.now().isoformat()
        with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
            json.dump(memory_data, f, indent=2, ensure_ascii=False)
        print(f"Memory saved to {MEMORY_FILE}")
    except Exception as e:
        print(f"Error saving memory: {e}")

# Initialize memory file
initialize_memory_file()

Initialized memory file: memory_scratchpad_docs.json


In [9]:
@tool
def look_memory(website_url: str = None, api_name: str = None) -> str:
    """
    Look up existing API documentation summaries in the memory scratchpad.
    This tool checks if we already have documentation for a specific website or API,
    avoiding the need to re-extract and re-summarize documentation.
    
    Args:
        website_url (str, optional): The URL of the website to look for
        api_name (str, optional): The name of the API to look for
        
    Returns:
        str: Information about what documentation summaries are available in memory
    """
    try:
        memory = load_memory()
        documentations = memory.get("documentations", {})
        
        if not documentations:
            return "Memory scratchpad is empty. No API documentation summaries found."
        
        # If specific URL or API name provided, search for it
        if website_url or api_name:
            found_docs = []
            for doc_id, doc_data in documentations.items():
                doc_url = doc_data.get("source_url", "").lower()
                doc_name = doc_data.get("api_name", "").lower()
                
                if (website_url and website_url.lower() in doc_url) or \
                   (api_name and api_name.lower() in doc_name):
                    found_docs.append({
                        "id": doc_id,
                        "api_name": doc_data.get("api_name", "Unknown"),
                        "source_url": doc_data.get("source_url", "Unknown"),
                        "created_at": doc_data.get("created_at", "Unknown"),
                        "summary": doc_data.get("summary", "No summary available")
                    })
            
            if found_docs:
                result = f"Found {len(found_docs)} documentation summary(ies) in memory:\n\n"
                for doc in found_docs:
                    result += f"**{doc['api_name']}**\n"
                    result += f"   URL: {doc['source_url']}\n"
                    result += f"   Created: {doc['created_at']}\n"
                    result += f"   Summary: {doc['summary'][:200]}...\n\n"
                return result
            else:
                return f"No documentation found for {'website: ' + website_url if website_url else ''}{'API: ' + api_name if api_name else ''}"
        
        # If no specific search, return overview of all documentation
        result = f"Memory scratchpad contains {len(documentations)} API documentation summary(ies):\n\n"
        for doc_id, doc_data in documentations.items():
            result += f"**{doc_data.get('api_name', 'Unknown API')}**\n"
            result += f"   URL: {doc_data.get('source_url', 'Unknown')}\n"
            result += f"   Created: {doc_data.get('created_at', 'Unknown')}\n\n"
        
        return result
        
    except Exception as e:
        return f"Error looking up memory: {str(e)}"

@tool
def write_memory(api_name: str, source_url: str, summary: str) -> str:
    """
    Write a new API documentation summary to the memory scratchpad.
    This tool saves structured documentation summaries so they can be reused
    without re-extracting and re-summarizing the same documentation.
    
    Args:
        api_name (str): The name of the API documentation
        source_url (str): The URL of the documentation source
        summary (str): The structured JSON summary of the API documentation
        
    Returns:
        str: Confirmation message about the saved documentation
    """
    try:
        memory = load_memory()
        
        # Create a unique ID for this documentation
        doc_id = f"{api_name.lower().replace(' ', '_')}_{int(datetime.now().timestamp())}"
        
        # Create the documentation entry
        doc_entry = {
            "api_name": api_name,
            "source_url": source_url,
            "summary": summary,
            "created_at": datetime.now().isoformat(),
            "endpoints_count": len(json.loads(summary).get("endpoints", [])) if summary.startswith('{') else 0
        }
        
        # Add to memory
        memory["documentations"][doc_id] = doc_entry
        
        # Save memory
        save_memory(memory)
        
        return f"Successfully saved documentation for **{api_name}** to memory scratchpad!\n\n" \
               f"**Details:**\n" \
               f"   API: {api_name}\n" \
               f"   Source: {source_url}\n" \
               f"   Endpoints: {doc_entry['endpoints_count']}\n" \
               f"   Saved at: {doc_entry['created_at']}\n\n" \
               f"This documentation can now be reused for future API calls without re-extraction."
               
    except Exception as e:
        return f"Error writing to memory: {str(e)}"

@tool
def get_memory_documentation(api_name: str = None, website_url: str = None) -> str:
    """
    Retrieve specific API documentation from memory for use in API calls.
    This tool gets the full documentation summary from memory to provide context
    for making API calls without re-extracting documentation.
    
    Args:
        api_name (str, optional): The name of the API to retrieve
        website_url (str, optional): The URL of the website to retrieve
        
    Returns:
        str: The full documentation summary from memory
    """
    try:
        memory = load_memory()
        documentations = memory.get("documentations", {})
        
        if not documentations:
            return "No documentation found in memory."
        
        # Find matching documentation
        for doc_id, doc_data in documentations.items():
            doc_url = doc_data.get("source_url", "").lower()
            doc_name = doc_data.get("api_name", "").lower()
            
            if (api_name and api_name.lower() in doc_name) or \
               (website_url and website_url.lower() in doc_url):
                
                return f"**Retrieved from Memory:** {doc_data['api_name']}\n" \
                       f"Source: {doc_data['source_url']}\n" \
                       f"Created: {doc_data['created_at']}\n\n" \
                       f"**Documentation Summary:**\n{doc_data['summary']}"
        
        return f"No documentation found for {'API: ' + api_name if api_name else ''}{'Website: ' + website_url if website_url else ''}"
        
    except Exception as e:
        return f"Error retrieving documentation from memory: {str(e)}"


In [10]:
# Create the enhanced agent with memory system
all_tools = [
    extract_documentation_from_website, 
    create_documentation_summary,
    fetch_api_data,
    look_memory,
    write_memory,
    get_memory_documentation
]

# Enhanced prompt that includes memory-aware behavior
system_prompt_agent = """You are a specialized API assistant with long-term memory capabilities that helps users learn and interact with APIs efficiently.

## Your Memory System:
You have access to a persistent memory scratchpad that stores API documentation summaries. This allows you to:
- Avoid re-extracting documentation you've already processed
- Provide faster responses by using cached knowledge
- Build up a knowledge base of API documentation over time

## Your Workflow:
1. **ALWAYS start by checking memory** using `look_memory` to see if you already have documentation for the requested API
2. **If documentation exists in memory**: Use `get_memory_documentation` to retrieve it and proceed directly to API calls
3. **If no documentation in memory**: 
   - Extract documentation using `extract_documentation_from_website`
   - Summarize it using `create_documentation_summary` 
   - Save it to memory using `write_memory`

## Memory Management:
- Use `look_memory(website_url="...")` or `look_memory(api_name="...")` to check for existing documentation
- Use `write_memory(api_name="...", source_url="...", summary="...")` to save new documentation
- Use `get_memory_documentation(api_name="...")` to retrieve full documentation from memory

## Key Behaviors:
- **Efficiency First**: Always check memory before extracting new documentation
- **Memory Building**: Save all new documentation summaries to build your knowledge base
- **Context Awareness**: Use the retrieved summary documentation to provide information on how to use the API
- **User Communication**: Always explain what you're doing and whether you're using cached or new information
- **Tool Usage**: You can call the tools as many times as you want to get the information you need

## For OpenF1 API specifically:
- Check memory for "OpenF1" or "openf1.org" documentation first
- If not found, extract from https://openf1.org/
- Save the structured summary to memory for future use

Be helpful, efficient, and always leverage your memory system to provide the best experience."""

# Create the enhanced agent
agent = create_react_agent(
    model=llm, 
    tools=all_tools, 
    prompt=system_prompt_agent
)

print("Available tools:")
for i, tool in enumerate(all_tools, 1):
    print(f"   {i}. {tool.name}: {tool.description.split('.')[0] if tool.description else 'No description'}")


Available tools:
   1. extract_documentation_from_website: Extract documentation content from a website URL
   2. create_documentation_summary: Create a summary of the documentation content
   3. fetch_api_data: Fetch data from an API endpoint using the learned parameters
   4. look_memory: Look up existing API documentation summaries in the memory scratchpad
   5. write_memory: Write a new API documentation summary to the memory scratchpad
   6. get_memory_documentation: Retrieve specific API documentation from memory for use in API calls


In [11]:
# Test the memory system
def test_agent_with_memory(query: str):
    """Test function with proper response handling"""
    print(f"[AGENT] Query: {query}")
    print("-" * 80)
    
    response = agent.invoke({
        "messages": [{"role": "user", "content": query}]
    })
    
    print("Agent Response:")
    # Handle the correct response format from LangGraph
    if isinstance(response, dict) and "messages" in response:
        last_message = response["messages"][-1]
        if hasattr(last_message, 'content'):
            print(last_message.content)
        else:
            print(str(last_message))
    else:
        if isinstance(response, list):
            last_message = response[-1]
            if hasattr(last_message, 'content'):
                print(last_message.content)
            else:
                print(str(last_message))
        else:
            print(f"Unexpected response format: {type(response)}")
            print(response)
    print("\n" + "="*80 + "\n")

In [12]:
# Test 1: Check empty memory
print("Test 1: Checking empty memory...")
test_agent_with_memory("Check what API documentation I have in memory")

Test 1: Checking empty memory...
[AGENT] Query: Check what API documentation I have in memory
--------------------------------------------------------------------------------
Agent Response:
It looks like there is no API documentation stored in memory at the moment. If you have a specific API in mind that you would like to learn about, please let me know, and I can extract the documentation for you!




In [13]:
# Test 2: Learn OpenF1 API and save to memory
print("Test 2: Learning OpenF1 API and saving to memory...")
test_agent_with_memory("""
I want to learn about the OpenF1 API. Please:
1. Check if we already have documentation in memory
2. If not, extract documentation from https://openf1.org/
""")


Test 2: Learning OpenF1 API and saving to memory...
[AGENT] Query: 
I want to learn about the OpenF1 API. Please:
1. Check if we already have documentation in memory
2. If not, extract documentation from https://openf1.org/

--------------------------------------------------------------------------------
Memory saved to memory_scratchpad_docs.json
Agent Response:
I have successfully extracted and saved the documentation for the OpenF1 API to memory. Hereâ€™s a brief overview:

### OpenF1 API Overview
- **Description**: OpenF1 is an open-source API that provides real-time and historical Formula 1 data, including lap timings, car telemetry, and radio communications. Historical data is freely accessible, while real-time data requires a paid account.
- **Data Formats**: JSON and CSV.

### Key API Endpoints
1. **Car Data**: Information about each car.
2. **Drivers**: Information about drivers for each session.
3. **Intervals**: Real-time interval data between drivers.
4. **Laps**: Detailed 

In [14]:
# Test 3: Verify memory was saved and test reuse
print("Test 3: Testing memory reuse...")
test_agent_with_memory("""
Now I want to get lap data from OpenF1 API. 
Please check memory first to see if we already have the documentation, 
and if so, use that instead of re-extracting it.
""")

Test 3: Testing memory reuse...
[AGENT] Query: 
Now I want to get lap data from OpenF1 API. 
Please check memory first to see if we already have the documentation, 
and if so, use that instead of re-extracting it.

--------------------------------------------------------------------------------
Agent Response:
I found the documentation for the OpenF1 API in memory. Here's a summary of the relevant information regarding lap data:

### OpenF1 API Overview
OpenF1 is an open-source API that provides real-time and historical Formula 1 data, including lap timings, car telemetry, and more. 

### Endpoint for Lap Data
- **Laps**: Detailed information about individual laps.
  - **Request**: `GET https://api.openf1.org/v1/laps`
  - **Attributes**:
    - `date_start`: Start date of the lap
    - `driver_number`: Number of the driver
    - `duration_sector_1`: Duration of the first sector
    - `duration_sector_2`: Duration of the second sector
    - `duration_sector_3`: Duration of the third sect

In [15]:
# Test 4: Check memory contents
print("Test 4: Checking what's in memory...")
test_agent_with_memory("Show me what API documentation summaries are stored in my memory scratchpad")

Test 4: Checking what's in memory...
[AGENT] Query: Show me what API documentation summaries are stored in my memory scratchpad
--------------------------------------------------------------------------------
Agent Response:
In your memory scratchpad, there is one API documentation summary stored:

- **OpenF1**
  - **URL**: [https://openf1.org/](https://openf1.org/)
  - **Created**: September 16, 2025

If you need to know more about the OpenF1 API or want to make a specific API call, just let me know!


