In [None]:
%pip install --quiet langchain langchain_community langgraph langchain_openai
%pip install --quiet langchain-experimental langgraph-supervisor
%pip install --quiet tavily-python httpx pandas tabulate matplotlib seaborn

In [None]:
# Set API Keys
import os
os.environ["OPENAI_API_KEY"] = "sk-proj-"
os.environ["TAVILY_API_KEY"] = "tvly-dev-"

In [None]:
import httpx
import json
import os

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from typing import Dict, Any
from datetime import datetime

# Simple logging setup
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

from tavily import TavilyClient
from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import InMemorySaver

from langchain_openai import ChatOpenAI
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

---

### **Set Clients**

In [None]:
llm = ChatOpenAI(api_key=os.environ["OPENAI_API_KEY"], model="gpt-4o-mini", temperature=0.1)
tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])

---

### **Set Utils for Scratchpad Memory**

In [None]:
# Scratchpad Memory Utils
MEMORY_FILE = "memory_scratchpad_docs.json"

def initialize_memory_file():
    """Initialize the memory file if it doesn't exist"""
    if not os.path.exists(MEMORY_FILE):
        initial_memory = {
            "created_at": datetime.now().isoformat(),
            "last_updated": datetime.now().isoformat(),
            "documentations": {}
        }
        with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
            json.dump(initial_memory, f, indent=2, ensure_ascii=False)
        print(f"Initialized memory file: {MEMORY_FILE}")

def load_memory() -> Dict[str, Any]:
    """Load the memory file"""
    try:
        with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
            return json.load(f)
    except FileNotFoundError:
        initialize_memory_file()
        return load_memory()
    except Exception as e:
        print(f"Error loading memory: {e}")
        return {"documentations": {}}

def save_memory(memory_data: Dict[str, Any]):
    """Save the memory file"""
    try:
        memory_data["last_updated"] = datetime.now().isoformat()
        with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
            json.dump(memory_data, f, indent=2, ensure_ascii=False)
        print(f"Memory saved to {MEMORY_FILE}")
    except Exception as e:
        print(f"Error saving memory: {e}")

# Initialize memory file
initialize_memory_file()

---

### **Set Utils for CSV Memory**

In [None]:
# CSV Memory Utils
CSV_MEMORY_FILE = "csv_memory.json"

def initialize_csv_memory_file():
    """Initialize the CSV memory file if it doesn't exist"""
    if not os.path.exists(CSV_MEMORY_FILE):
        initial_csv_memory = {
            "created_at": datetime.now().isoformat(),
            "last_updated": datetime.now().isoformat(),
            "csv_data": {}
        }
        with open(CSV_MEMORY_FILE, 'w', encoding='utf-8') as f:
            json.dump(initial_csv_memory, f, indent=2, ensure_ascii=False)
        print(f"Initialized CSV memory file: {CSV_MEMORY_FILE}")

def load_csv_memory() -> Dict[str, Any]:
    """Load the CSV memory file"""
    try:
        with open(CSV_MEMORY_FILE, 'r', encoding='utf-8') as f:
            return json.load(f)
    except FileNotFoundError:
        initialize_csv_memory_file()
        return load_csv_memory()
    except Exception as e:
        print(f"Error loading CSV memory: {e}")
        return {"csv_data": {}}

def save_csv_memory(csv_memory_data: Dict[str, Any]):
    """Save the CSV memory file"""
    try:
        csv_memory_data["last_updated"] = datetime.now().isoformat()
        with open(CSV_MEMORY_FILE, 'w', encoding='utf-8') as f:
            json.dump(csv_memory_data, f, indent=2, ensure_ascii=False)
        print(f"CSV memory saved to {CSV_MEMORY_FILE}")
    except Exception as e:
        print(f"Error saving CSV memory: {e}")

def store_csv_data(csv_name: str, csv_content: str, source: str = "OpenF1"):
    """Store CSV data in persistent file"""
    csv_memory = load_csv_memory()
    csv_memory["csv_data"][csv_name] = {
        "content": csv_content,
        "source": source,
        "stored_at": datetime.now().isoformat(),
        "size": len(csv_content)
    }
    save_csv_memory(csv_memory)
    print(f"CSV data stored: {csv_name} ({len(csv_content)} characters)")

def get_csv_data(csv_name: str) -> str:
    """Get CSV data from persistent file"""
    csv_memory = load_csv_memory()
    if csv_name in csv_memory.get("csv_data", {}):
        return csv_memory["csv_data"][csv_name]["content"]
    return None

def list_available_csvs() -> Dict[str, Any]:
    """List all available CSV datasets in persistent storage"""
    csv_memory = load_csv_memory()
    csv_data = csv_memory.get("csv_data", {})
    
    if not csv_data:
        return {"message": "No CSV datasets available"}
    
    result = {"available_datasets": {}}
    for name, data in csv_data.items():
        result["available_datasets"][name] = {
            "source": data["source"],
            "stored_at": data["stored_at"],
            "size": data["size"]
        }
    return result

# Helper function for loading DataFrames
def load_dataframe_from_csv(csv_name: str) -> pd.DataFrame:
    """Load DataFrame from CSV data stored in persistent file"""
    # Get CSV content from persistent storage
    csv_content = get_csv_data(csv_name)
    if csv_content is None:
        raise ValueError(f"CSV '{csv_name}' not found in persistent storage")
    
    # Load DataFrame from CSV content
    from io import StringIO
    df = pd.read_csv(StringIO(csv_content))
    
    print(f"DataFrame loaded: {csv_name} ({df.shape[0]} rows, {df.shape[1]} columns)")
    return df

# Initialize CSV memory file
initialize_csv_memory_file()


---

### **Tools for Context Agent**

In [None]:
@tool
def extract_documentation_from_website(url: str) -> str:
    """
    Extract documentation content from a given website URL.
    This tool extracts raw documentation text from a website, 
    which can later be processed or analyzed to identify API endpoints, 
    parameters, and other technical details.
    """
    try:
        content_response = tavily_client.extract(urls=url)

        if content_response and len(content_response) > 0:
            return f"Documentation extracted from {url}:\n\n{content_response}"
       
        else:
            return f"Could not extract content from {url}. Please check the URL and try again."
            
    except Exception as e:
        return f"Error extracting documentation from {url}: {str(e)}"

In [None]:
# Simplified Debug Tools
@tool
def debug_csv_storage() -> str:
    """
    Debug tool to check the current state of CSV storage.
    This helps diagnose issues with data sharing between agents.
    """
    try:
        result = "=== CSV STORAGE DEBUG ===\n\n"
        
        # Check persistent storage
        csv_memory = load_csv_memory()
        csv_data = csv_memory.get("csv_data", {})
        result += f"Persistent CSV storage: {len(csv_data)} items\n"
        for name, data in csv_data.items():
            result += f"  - {name}: {data['size']} chars, source: {data['source']}\n"
        
        return result
        
    except Exception as e:
        return f"Error in debug_csv_storage: {str(e)}"

@tool
def list_available_data() -> str:
    """
    List all available data sources for analysis.
    This provides a comprehensive view of what data is available.
    """
    try:
        result = "=== AVAILABLE DATA SOURCES ===\n\n"
        
        # Persistent Storage
        result += "CSV Storage:\n"
        csv_memory = load_csv_memory()
        csv_data = csv_memory.get("csv_data", {})
        if csv_data:
            for name, data in csv_data.items():
                result += f"   - {name}: {data['size']} chars, source: {data['source']}\n"
        else:
            result += "   - No CSV data available\n"
        
        return result
        
    except Exception as e:
        return f"Error listing available data: {str(e)}"


In [None]:
@tool
def create_documentation_summary(content: str) -> str:
    """
    Generate a concise summary of documentation content in Markdown format.
    This tool processes raw documentation text, removes irrelevant details, 
    and creates a well-formatted Markdown file with API endpoints and parameters.
    """
    
    system_prompt_summary_agent = """
    You are an expert in summarizing API documentation.
    You are given raw documentation text and must extract only the relevant information 
    about API endpoints and their parameters. Create a well-formatted Markdown summary.
    
    Format the output as a Markdown document with:
    - Clear headings for each endpoint
    - HTTP method and URL
    - Parameter descriptions in tables
    - Code examples when available
    - Keep it concise but informative
    """

    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt_summary_agent),
        ("user", "Here is the content to be summarized: {content}")
    ])
    
    response = llm.invoke(prompt.format(content=content))
    
    return response.content

In [None]:
@tool
def fetch_api_data(endpoint: str, parameters: Dict[str, Any] = None) -> str:
    """
    Fetch data from a specified API endpoint.
    This tool constructs the request URL with optional parameters, 
    performs an HTTP GET request, and returns the response content.
    For OpenF1 API endpoints, automatically requests CSV format and stores in memory.
    """
    try:
        # Check if this is an OpenF1 API endpoint
        is_openf1 = "api.openf1.org" in endpoint
        
        if parameters:
            # Convert parameters to query string
            param_strings = []
            for key, value in parameters.items():
                param_strings.append(f"{key}={value}")
            if param_strings:
                endpoint += "?" + "&".join(param_strings)
        
        # For OpenF1 API, automatically append csv=true parameter (only if not already present)
        if is_openf1 and "csv=true" not in endpoint:
            separator = "&" if "?" in endpoint else "?"
            endpoint += f"{separator}csv=true"
        
        # Make the HTTP request
        with httpx.Client() as client:
            response = client.get(endpoint, timeout=30.0)
            response.raise_for_status()
            
            # For OpenF1 CSV responses, store in memory and return confirmation
            if is_openf1 and response.headers.get('content-type', '').startswith('text/csv'):
                # Generate CSV name based on endpoint (including all filters)
                csv_name = generate_csv_name(endpoint, parameters)
                store_csv_data(csv_name, response.text, "OpenF1")
                return f"CSV data fetched and stored as '{csv_name}' from {endpoint}\n\nData preview (first 5 lines):\n{response.text.split(chr(10))[:5]}"
            
            # For other APIs, try JSON first, then fall back to text
            try:
                data = response.json()
                return f"API Response from {endpoint}:\n\n{json.dumps(data, indent=2)}"
            except json.JSONDecodeError:
                return f"API Response from {endpoint}:\n\n{response.text}"
                
    except httpx.HTTPStatusError as e:
        return f"HTTP Error {e.response.status_code} when fetching {endpoint}: {e.response.text}"
    except httpx.TimeoutException:
        return f"Timeout error when fetching {endpoint}. The request took too long."
    except Exception as e:
        return f"Error fetching data from {endpoint}: {str(e)}"

def generate_csv_name(endpoint: str, parameters: Dict[str, Any] = None) -> str:
    """Generate a CSV name based on endpoint and all parameters (including URL filters)"""
    # Extract endpoint type (e.g., 'laps', 'sessions', 'drivers')
    endpoint_clean = endpoint.split('?')[0].split('/')[-1] if endpoint else "data"
    
    # Extract all parameters from URL and combine with passed parameters
    all_params = {}
    
    # Parse URL parameters
    if '?' in endpoint:
        url_params = endpoint.split('?')[1]
        for param in url_params.split('&'):
            if '=' in param:
                key, value = param.split('=', 1)
                if key != 'csv':  # Skip csv parameter
                    all_params[key] = value
    
    # Add passed parameters (overriding URL params if same key)
    if parameters:
        for key, value in parameters.items():
            if key != 'csv':  # Skip csv parameter
                all_params[key] = value
    
    # Create suffix from all parameters
    param_suffix = ""
    if all_params:
        param_parts = []
        for key, value in sorted(all_params.items()):  # Sort for consistency
            # Clean parameter values for filename
            clean_value = str(value).replace('=', '').replace('&', '').replace('?', '').replace('<', 'lt').replace('>', 'gt')
            param_parts.append(f"{key}_{clean_value}")
        if param_parts:
            param_suffix = "_" + "_".join(param_parts)
    
    return f"openf1_{endpoint_clean}{param_suffix}.csv"


In [None]:
@tool
def look_memory(website_url: str = None, api_name: str = None) -> str:
    """
    Look up stored API documentation summaries in the memory scratchpad.

    This tool checks if documentation for a given website or API is already 
    available in memory, avoiding the need to re-extract and re-summarize. 
    It can return either specific matches or an overview of all stored summaries.
    """
    try:
        memory = load_memory()
        documentations = memory.get("documentations", {})
        
        if not documentations:
            return "Memory scratchpad is empty. No API documentation summaries found."
        
        # If specific URL or API name provided, search for it
        if website_url or api_name:
            found_docs = []
            for doc_id, doc_data in documentations.items():
                doc_url = doc_data.get("source_url", "").lower()
                doc_name = doc_data.get("api_name", "").lower()
                
                if (website_url and website_url.lower() in doc_url) or \
                   (api_name and api_name.lower() in doc_name):
                    found_docs.append({
                        "id": doc_id,
                        "api_name": doc_data.get("api_name", "Unknown"),
                        "source_url": doc_data.get("source_url", "Unknown"),
                        "created_at": doc_data.get("created_at", "Unknown"),
                        "summary": doc_data.get("summary", "No summary available")
                    })
            
            if found_docs:
                result = f"Found {len(found_docs)} documentation summary(ies) in memory:\n\n"
                for doc in found_docs:
                    result += f"**{doc['api_name']}**\n"
                    result += f"   URL: {doc['source_url']}\n"
                    result += f"   Created: {doc['created_at']}\n"
                    result += f"   Summary: {doc['summary'][:200]}...\n\n"
                return result
            else:
                return f"No documentation found for {'website: ' + website_url if website_url else ''}{'API: ' + api_name if api_name else ''}"
        
        # If no specific search, return overview of all documentation
        result = f"Memory scratchpad contains {len(documentations)} API documentation summary(ies):\n\n"
        for doc_id, doc_data in documentations.items():
            result += f"**{doc_data.get('api_name', 'Unknown API')}**\n"
            result += f"   URL: {doc_data.get('source_url', 'Unknown')}\n"
            result += f"   Created: {doc_data.get('created_at', 'Unknown')}\n\n"
        
        return result
        
    except Exception as e:
        return f"Error looking up memory: {str(e)}"

In [None]:
@tool
def write_memory(api_name: str, source_url: str, summary: str) -> str:
    """
    Save a new API documentation summary into the memory scratchpad.
    This tool stores structured documentation so it can be reused later 
    without re-extracting and re-summarizing the same API source.
    """
    try:
        memory = load_memory()
        
        # Create a unique ID for this documentation
        doc_id = f"{api_name.lower().replace(' ', '_')}_{int(datetime.now().timestamp())}"
        
        # Create the documentation entry
        doc_entry = {
            "api_name": api_name,
            "source_url": source_url,
            "summary": summary,
            "created_at": datetime.now().isoformat(),
            "endpoints_count": len(json.loads(summary).get("endpoints", [])) if summary.startswith('{') else 0
        }
        
        # Add to memory
        memory["documentations"][doc_id] = doc_entry
        
        # Save memory
        save_memory(memory)
        
        return f"Successfully saved documentation for **{api_name}** to memory scratchpad!\n\n" \
               f"**Details:**\n" \
               f"   API: {api_name}\n" \
               f"   Source: {source_url}\n" \
               f"   Endpoints: {doc_entry['endpoints_count']}\n" \
               f"   Saved at: {doc_entry['created_at']}\n\n" \
               f"This documentation can now be reused for future API calls without re-extraction."
               
    except Exception as e:
        return f"Error writing to memory: {str(e)}"

---

### **Tools for Analysis Agent**

In [None]:
@tool
def get_memory_documentation(api_name: str = None, website_url: str = None) -> str:
    """
    Retrieve a stored API documentation summary from the memory scratchpad.
    This tool returns the full documentation entry for a given API name 
    or website URL, allowing reuse without re-extracting the source.
    """
    try:
        memory = load_memory()
        documentations = memory.get("documentations", {})
        
        if not documentations:
            return "No documentation found in memory."
        
        # Find matching documentation
        for doc_id, doc_data in documentations.items():
            doc_url = doc_data.get("source_url", "").lower()
            doc_name = doc_data.get("api_name", "").lower()
            
            if (api_name and api_name.lower() in doc_name) or \
               (website_url and website_url.lower() in doc_url):
                
                return f"**Retrieved from Memory:** {doc_data['api_name']}\n" \
                       f"Source: {doc_data['source_url']}\n" \
                       f"Created: {doc_data['created_at']}\n\n" \
                       f"**Documentation Summary:**\n{doc_data['summary']}"
        
        return f"No documentation found for {'API: ' + api_name if api_name else ''}{'Website: ' + website_url if website_url else ''}"
        
    except Exception as e:
        return f"Error retrieving documentation from memory: {str(e)}"

In [None]:
@tool
def analyze_data_with_pandas(analysis_query: str, csv_names: str = None) -> str:
    """
    Analyze CSV datasets using a Pandas DataFrame agent.
    This tool loads CSV data from persistent storage and allows natural language queries 
    on the datasets by leveraging a Pandas agent that executes LLM-generated Python code.
    It can work with multiple DataFrames simultaneously for comparative analysis.
    
    Args:
        analysis_query: The analysis query in natural language
        csv_names: Comma-separated list of CSV names to analyze. If None, analyzes all available CSVs.
    """
    try:
        # Get list of available CSVs
        available_csvs = list_available_csvs()
        
        if "message" in available_csvs:
            return "No CSV datasets available. Please fetch some data from OpenF1 API first."
        
        # Get available CSV names
        available_names = list(available_csvs["available_datasets"].keys())
        
        # Determine which CSVs to analyze
        if csv_names:
            csv_list = [name.strip() for name in csv_names.split(',')]
            # Filter to only include available CSVs
            csv_list = [name for name in csv_list if name in available_names]
        else:
            csv_list = available_names
        
        if not csv_list:
            return "No valid CSV names provided or no CSVs available."
        
        # Load DataFrames directly from persistent storage
        dataframes_list = []
        dataframe_names = []
        
        for csv_name in csv_list:
            try:
                # Load DataFrame using helper function
                df = load_dataframe_from_csv(csv_name)
                
                # Create a clean name for the DataFrame
                clean_name = csv_name.replace('.csv', '').replace('openf1_', '')
                dataframes_list.append(df)
                dataframe_names.append(f"df_{clean_name}")
                
            except Exception as e:
                print(f"Warning: Could not load {csv_name}: {e}")
                continue
        
        if not dataframes_list:
            return "No DataFrames could be loaded successfully."
        
        # Create pandas agent with all dataframes
        agent = create_pandas_dataframe_agent(
            ChatOpenAI(temperature=0, model="gpt-4o-mini"),
            dataframes_list,
            verbose=True,
            agent_type=AgentType.OPENAI_FUNCTIONS,
            allow_dangerous_code=True
        )
        
        result = agent.invoke(analysis_query)
        return f"Analysis of {len(dataframes_list)} CSV datasets:\n\n{result}"
        
    except Exception as e:
        return f"Analysis error: {str(e)}"


---

### **Tools Bindings**

In [None]:
# Tools Bindings for Context Agent
tools_context_agent = [
    extract_documentation_from_website, 
    create_documentation_summary,
    fetch_api_data,
    look_memory,
    write_memory,
    get_memory_documentation,
]

print("Available tools:")
for i, tool in enumerate(tools_context_agent, 1):
    print(f"   {i}. {tool.name}: {tool.description.split('.')[0] if tool.description else 'No description'}")

In [None]:
# Tools Bindings for Analysis Agent
tools_analysis_agent = [
    analyze_data_with_pandas,
    debug_csv_storage,
    list_available_data
]

print("Available tools:")
for i, tool in enumerate(tools_analysis_agent, 1):
    print(f"   {i}. {tool.name}: {tool.description.split('.')[0] if tool.description else 'No description'}")

---

### **Systems Prompts**

In [None]:
# System Prompt Context Agent
system_prompt_context_agent = """
You are a specialized API assistant with long-term memory capabilities that helps users learn and interact with APIs efficiently.

## Your Memory System:
You have access to a persistent memory scratchpad that stores API documentation summaries. This allows you to:
- Avoid re-extracting documentation you've already processed
- Provide faster responses by using cached knowledge
- Build up a knowledge base of API documentation over time

## Your Workflow:
1. **ALWAYS start by checking memory** using `look_memory` to see if you already have documentation for the requested API
2. **If documentation exists in memory**: Use `get_memory_documentation` to retrieve it and proceed directly to API calls
3. **If no documentation in memory**: 
   - Extract documentation using `extract_documentation_from_website`
   - Summarize it using `create_documentation_summary` 
   - Save it to memory using `write_memory`

## Available Tools and How to Use Them:

### 1. Memory Management Tools:
- `look_memory(website_url="https://example.com")` - Check if documentation exists for a specific URL
- `look_memory(api_name="OpenF1")` - Check if documentation exists for a specific API name
- `get_memory_documentation(api_name="OpenF1")` - Retrieve full documentation from memory
- `write_memory(api_name="OpenF1", source_url="https://openf1.org", summary="...")` - Save new documentation

### 2. Documentation Tools:
- `extract_documentation_from_website(url="https://example.com")` - Extract raw documentation from a website
- `create_documentation_summary(content="...")` - Convert raw documentation into structured summary

### 3. API Data Fetching Tool:
- `fetch_api_data(endpoint="https://api.example.com/v1/data")` - Fetch data from any API endpoint
- `fetch_api_data(endpoint="https://api.example.com/v1/data", parameters={"foo": xxx, "bar": "yyy"})` - Fetch with parameters

**IMPORTANT**: For OpenF1 API, the system automatically adds `csv=true` parameter to get CSV format. You can still add other parameters:
- Example: `fetch_api_data(endpoint="https://api.openf1.org/v1/laps", parameters={"example_parameter_1": example_value_1, "example_parameter_2": example_value_2})`
- This will become: `https://api.openf1.org/v1/laps?example_parameter_1=example_value_1&example_parameter_2=example_value_2&csv=true`

## Key Behaviors:
- **Efficiency First**: Always check memory before extracting new documentation
- **Memory Building**: Save all new documentation summaries to build your knowledge base
- **Context Awareness**: Use the retrieved summary documentation to provide information on how to use the API
- **User Communication**: Always explain what you're doing and whether you're using cached or new information
- **Rate Limit Protection**: Minimize API calls by fetching comprehensive datasets in single calls
- **Tool Usage**: You can call the tools as many times as you want to get the information you need

## CRITICAL: API Rate Limit Rules:
- **AVOID making multiple API calls** when one comprehensive call would work
- **ALWAYS fetch the largest dataset possible** in a single call
- **Only apply specific filters** when user explicitly requests them

## For OpenF1 API specifically:
- Check memory for "OpenF1" or "openf1.org" documentation first
- If not found, extract from https://openf1.org/#api-endpoints
- Save the structured summary to memory for future use

## Data Fetching Strategy - CRITICAL RULES:

### API RATE LIMIT PROTECTION:
- **MAXIMIZE DATA PER CALL**: Always fetch the largest possible dataset in a single API call
- **PREFER COMPLETE DATASETS**: When in doubt, fetch complete datasets without filters to avoid data loss
- **USE FILTERS ONLY WHEN EXPLICITLY REQUESTED**: Only apply filters when the user specifically requests filtered data

### Fetching Examples:
- **GOOD**: `fetch_api_data(endpoint="https://api.openf1.org/v1/laps")` - Gets ALL laps
- **GOOD**: `fetch_api_data(endpoint="https://api.openf1.org/v1/sessions")` - Gets ALL sessions
- **BAD**: Multiple calls like `fetch_api_data(..., parameters={"driver_number": 1})` then `fetch_api_data(..., parameters={"driver_number": 2})`
- **BAD**: Fetching small filtered datasets when user asks for "performance analysis"

### Smart Fetching Strategy:
- **For general analysis requests**: Fetch complete datasets without filters
- **For specific requests**: Only then apply the specific filter if it doesn't limit the analysis capabilities

Be helpful, efficient, and always leverage your memory system to provide the best experience.
"""

In [None]:
# System Prompt Analysis Agent
system_prompt_analysis_agent = """
You are a specialized data analysis agent that can analyze CSV datasets using pandas and generate visualizations.

## Your Context:
You can analyze CSV data that was fetched by the Context Agent from APIs (like OpenF1). This data is stored persistently and can be accessed directly when needed. The system supports multiple DataFrames simultaneously, allowing for comparative analysis across different CSV files.

## Available Tools and How to Use Them:

### 1. Data Discovery Tools:
- `list_available_data()` - Get a comprehensive view of all available data sources
- `debug_csv_storage()` - Check what CSV data is available in persistent storage (more detailed)

### 2. Analysis Tool:
- `analyze_data_with_pandas(analysis_query="your question here")` - Analyze all available CSV datasets
- `analyze_data_with_pandas(analysis_query="your question here", csv_names="dataset1,dataset2")` - Analyze specific CSV datasets

**IMPORTANT**: The analysis tool can:
- Work with multiple DataFrames simultaneously
- Perform joins and merges between DataFrames
- Generate visualizations (graphs, charts, plots)
- Execute complex pandas operations
- Answer natural language questions about the data

## Your Workflow - MANDATORY STEPS:
1. **ALWAYS start by checking what data is available** using `list_available_data()` or `debug_csv_storage()`
2. **If no data is available**: Ask the user to fetch data using the Context Agent first
3. **BEFORE EVERY ANALYSIS**: Always call `list_available_data()` to get the most current list of datasets
4. **Once data is available**: Use `analyze_data_with_pandas()` to perform analysis

## CRITICAL: Always Check for New Data:
- **NEVER assume** you know what datasets are available
- **ALWAYS call** `list_available_data()` before any analysis
- **New datasets** may have been added by the Context Agent since your last check
- **This prevents** analyzing outdated or incomplete data

## Analysis Capabilities:
- **Multi-Dataset Analysis**: Compare data across different CSV files, perform joins, find relationships
- **Visualization**: Generate graphs, charts, plots to help users understand the data
- **Complex Queries**: Answer natural language questions about the data

## Key Behaviors:
- **Always check data availability first** before attempting analysis
- **MANDATORY: Call `list_available_data()` before every analysis** to get current datasets
- **Use natural language** for your analysis queries - the tool understands complex questions
- **Generate visualizations** when they help explain the data
- **Be specific** about which datasets to analyze when needed
- **You can call tools multiple times** to get the information you need

Be helpful, efficient, and always provide clear insights with visualizations when appropriate.
"""

---

### **ReAct Agents**

In [None]:
# Create Context Agent
context_agent = create_react_agent(
    model=llm, 
    tools=tools_context_agent, 
    prompt=system_prompt_context_agent,
    checkpointer=InMemorySaver()
)

In [None]:
# Create Analysis Agent
analysis_agent = create_react_agent(
    model=llm, 
    tools=tools_analysis_agent, 
    prompt=system_prompt_analysis_agent,
    checkpointer=InMemorySaver()
)

---

### **Testing Agents**

In [None]:
config = {"configurable": {"thread_id": "test_multi_df_session"}}

In [None]:
response = context_agent.invoke(
    {"messages": [{"role": "user", "content": "What documentation I have in memory?"}]},
    config
)
print(response["messages"][-1].content)

In [None]:
response = context_agent.invoke(
    {"messages": [{"role": "user", "content": "Get info and save at memory about the OpenF1 API from https://openf1.org/"}]},
    config
)
print(response["messages"][-1].content)

In [None]:
response = context_agent.invoke(
    {"messages": [{"role": "user", "content": "How I can use the OpenF1 API to get the lap data?"}]},
    config
)
print(response["messages"][-1].content)

In [None]:
response = context_agent.invoke(
    {"messages": [{"role": "user", "content": "How I can use the OpenF1 API to get the positions of a driver during a race?"}]},
    config
)
print(response["messages"][-1].content)

In [None]:
response = analysis_agent.invoke(
    {"messages": [{"role": "user", "content": "Which datasets I have in memory?"}]},
    config
)
print(response["messages"][-1].content)