In [None]:
import os
from dotenv import load_dotenv
from huggingface_hub import login

#Login() calls an inbuilt login prompt that does not work for me
#login()


# Load environment variables from .env file in current directory
load_dotenv(dotenv_path=".env")

# Fetch the HuggingFace token
token = os.getenv("HF_TOKEN")

# Verify token is loaded
print("Token loaded:", token is not None)




In [None]:
# using smolagents, we can build an agent capable of searching the web using DuckDuckGo.
# To give the agent access to this tool, we include it in the tool list when creating the agent.
# For the model, we’ll rely on InferenceClientModel, which provides access to Hugging Face’s Serverless Inference API. 
# The default model is "Qwen/Qwen2.5-Coder-32B-Instruct"


from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel

agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=InferenceClientModel())

agent.run("Search for the best music recommendations for a party at the Wayne's mansion.")



In [None]:
from smolagents import CodeAgent, tool, InferenceClientModel

# Tool to suggest a menu based on the occasion
@tool
def suggest_menu(occasion: str) -> str:
    """
    Suggests a menu based on the occasion.
    Args:
        occasion (str): The type of occasion for the party. Allowed values are:
                        - "casual": Menu for casual party.
                        - "formal": Menu for formal party.
                        - "superhero": Menu for superhero party.
                        - "custom": Custom menu.
    """
    if occasion == "casual":
        return "Pizza, snacks, and drinks."
    elif occasion == "formal":
        return "3-course dinner with wine and dessert."
    elif occasion == "superhero":
        return "Buffet with high-energy and healthy food."
    else:
        return "Custom menu for the butler."

# Alfred, the butler, preparing the menu for the party
agent = CodeAgent(tools=[suggest_menu], model=InferenceClientModel())

# Preparing the menu for the party
agent.run("Prepare a formal menu for the party.")

In [None]:
# Code execution has strict security measures - imports outside a predefined safe list are blocked by default. 
# However, you can authorize additional imports by passing them as strings in additional_authorized_imports. 
# For more details on secure code execution, see the official guide: https://huggingface.co/docs/smolagents/tutorials/secure_code_execution

from smolagents import CodeAgent, InferenceClientModel
import numpy as np
import time
import datetime

agent = CodeAgent(tools=[], model=InferenceClientModel(), additional_authorized_imports=['datetime'])

agent.run(
    """
    Alfred needs to prepare for the party. Here are the tasks:
    1. Prepare the drinks - 30 minutes
    2. Decorate the mansion - 60 minutes
    3. Set up the menu - 45 minutes
    4. Prepare the music and playlist - 45 minutes

    If we start right now, at what time will the party be ready?
    """
)

In [None]:
# The smolagents library makes this possible by allowing you to share a complete agent with the community and download others for immediate use. 
HUB_NAME = '1729prashant/AlfredAgent'
agent.push_to_hub(HUB_NAME)

# To download the agent again, use the code below:
# Change to your username and repo name
alfred_agent = agent.from_hub(HUB_NAME, trust_remote_code=True)

alfred_agent.run("Give me the best playlist for a party at Wayne's mansion. The party idea is a 'villain masquerade' theme")  

In [None]:
# how did Alfred build such an agent using smolagents? By integrating several tools, he can generate an agent as follows. 
from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, InferenceClientModel, Tool, tool, VisitWebpageTool

@tool
def suggest_menu(occasion: str) -> str:
    """
    Suggests a menu based on the occasion.
    Args:
        occasion: The type of occasion for the party.
    """
    if occasion == "casual":
        return "Pizza, snacks, and drinks."
    elif occasion == "formal":
        return "3-course dinner with wine and dessert."
    elif occasion == "superhero":
        return "Buffet with high-energy and healthy food."
    else:
        return "Custom menu for the butler."

@tool
def catering_service_tool(query: str) -> str:
    """
    This tool returns the highest-rated catering service in Gotham City.
    
    Args:
        query: A search term for finding catering services.
    """
    # Example list of catering services and their ratings
    services = {
        "Gotham Catering Co.": 4.9,
        "Wayne Manor Catering": 4.8,
        "Gotham City Events": 4.7,
    }
    
    # Find the highest rated catering service (simulating search query filtering)
    best_service = max(services, key=services.get)
    
    return best_service

class SuperheroPartyThemeTool(Tool):
    name = "superhero_party_theme_generator"
    description = """
    This tool suggests creative superhero-themed party ideas based on a category.
    It returns a unique party theme idea."""
    
    inputs = {
        "category": {
            "type": "string",
            "description": "The type of superhero party (e.g., 'classic heroes', 'villain masquerade', 'futuristic Gotham').",
        }
    }
    
    output_type = "string"

    def forward(self, category: str):
        themes = {
            "classic heroes": "Justice League Gala: Guests come dressed as their favorite DC heroes with themed cocktails like 'The Kryptonite Punch'.",
            "villain masquerade": "Gotham Rogues' Ball: A mysterious masquerade where guests dress as classic Batman villains.",
            "futuristic Gotham": "Neo-Gotham Night: A cyberpunk-style party inspired by Batman Beyond, with neon decorations and futuristic gadgets."
        }
        
        return themes.get(category.lower(), "Themed party idea not found. Try 'classic heroes', 'villain masquerade', or 'futuristic Gotham'.")


# Alfred, the butler, preparing the menu for the party
agent = CodeAgent(
    tools=[
        DuckDuckGoSearchTool(), 
        VisitWebpageTool(),
        suggest_menu,
        catering_service_tool,
        SuperheroPartyThemeTool(),
	FinalAnswerTool()
    ], 
    model=InferenceClientModel(),
    max_steps=10,
    verbosity_level=2
)

agent.run("Give me the best playlist for a party at the Wayne's mansion. The party idea is a 'villain masquerade' theme")

In [None]:
# As Alfred fine-tunes the Party Preparator Agent, he’s growing weary of debugging its runs. 
# Agents, by nature, are unpredictable and difficult to inspect. But since he aims to build 
# the ultimate Party Preparator Agent and deploy it in production, he needs robust traceability 
# for future monitoring and analysis.

# Once again, smolagents comes to the rescue! It embraces the OpenTelemetry standard for 
# instrumenting agent runs, allowing seamless inspection and logging. With the help of Langfuse 
# and the SmolagentsInstrumentor, Alfred can easily track and analyze his agent’s behavior.

# First, we need to install the necessary dependencies:
!pip install opentelemetry-sdk opentelemetry-exporter-otlp openinference-instrumentation-smolagents langfuse


In [None]:
# Next, Alfred has already created an account on Langfuse and has his API keys ready. 
# If you haven’t done so yet, you can sign up for Langfuse Cloud here https://cloud.langfuse.com/ 
# or explore alternatives https://huggingface.co/docs/smolagents/tutorials/inspect_runs

# Once you have your API keys, they need to be properly configured as follows:
import os

# Get keys for your project from the project settings page: https://cloud.langfuse.com
os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-..." 
os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-..." 
os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com" # 🇪🇺 EU region
# os.environ["LANGFUSE_HOST"] = "https://us.cloud.langfuse.com" # 🇺🇸 US region



In [None]:
# With the environment variables set, we can now initialize the Langfuse client. 
# get_client() initializes the Langfuse client using the credentials provided in the environment variables.

from langfuse import get_client
 
langfuse = get_client()
 
# Verify connection
if langfuse.auth_check():
    print("Langfuse client is authenticated and ready!")
else:
    print("Authentication failed. Please check your credentials and host.")


# Finally, Alfred is ready to initialize the SmolagentsInstrumentor and start tracking his agent’s performance.

from openinference.instrumentation.smolagents import SmolagentsInstrumentor

SmolagentsInstrumentor().instrument()


from smolagents import CodeAgent, InferenceClientModel

agent = CodeAgent(tools=[], model=InferenceClientModel())
alfred_agent = agent.from_hub('sergiopaniego/AlfredAgent', trust_remote_code=True)
alfred_agent.run("Give me the best playlist for a party at Wayne's mansion. The party idea is a 'villain masquerade' theme")  

# Alfred can now access these logs here https://cloud.langfuse.com/project/cm7bq0abj025rad078ak3luwi/traces/995fc019255528e4f48cf6770b0ce27b?timestamp=2025-02-19T10%3A28%3A36.929Z
# to review and analyze them.





In [None]:
import inspect
import smolagents
#print(inspect.getsourcefile(smolagents.DuckDuckGoSearchTool))


In [None]:
#  Example: Running a Tool Calling Agent

from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, InferenceClientModel

agent = ToolCallingAgent(tools=[DuckDuckGoSearchTool()], model=InferenceClientModel())

agent.run("Search for the best music recommendations for a party at the Wayne's mansion.")



In [None]:
# In smolagents, tools are treated as functions that an LLM can call within an agent system.

# To interact with a tool, the LLM needs an interface description with these key components:

#    Name: What the tool is called
#    Tool description: What the tool does
#    Input types and descriptions: What arguments the tool accepts
#    Output type: What the tool returns

# For instance, while preparing for a party at Wayne Manor, Alfred needs various tools to gather information - 
# from searching for catering services to finding party theme ideas. Here’s how a simple search tool interface might look:

#    Name: web_search
#    Tool description: Searches the web for specific queries
#    Input: query (string) - The search term to look up
#    Output: String containing the search results

# By using these tools, Alfred can make informed decisions and gather all the information needed for planning the perfect party.


# Tool Creation Methods

# In smolagents, tools can be defined in two ways:
#    Using the @tool decorator for simple function-based tools
#    Creating a subclass of Tool for more complex functionality

# The @tool Decorator
# The @tool decorator is the recommended way to define simple tools. Under the hood, 
# smolagents will parse basic information about the function from Python. So if you name 
# your function clearly and write a good docstring, it will be easier for the LLM to use.

# Using this approach, we define a function with:
#    A clear and descriptive function name that helps the LLM understand its purpose.
#    Type hints for both inputs and outputs to ensure proper usage.
#    A detailed description, including an Args: section where each argument is explicitly described. 
#    - These descriptions provide valuable context for the LLM, so it’s important to write them carefully.


# Below is an example of how Alfred can use the @tool decorator to make this happen:

from smolagents import CodeAgent, InferenceClientModel, tool

# Let's pretend we have a function that fetches the highest-rated catering services.
@tool
def catering_service_tool(query: str) -> str:
    """
    This tool returns the highest-rated catering service in Gotham City.

    Args:
        query: A search term for finding catering services.
    """
    # Example list of catering services and their ratings
    services = {
        "Gotham Catering Co.": 4.9,
        "Wayne Manor Catering": 4.8,
        "Gotham City Events": 4.7,
    }

    # Find the highest rated catering service (simulating search query filtering)
    best_service = max(services, key=services.get)

    return best_service


agent = CodeAgent(tools=[catering_service_tool], model=InferenceClientModel())

# Run the agent to find the best catering service
result = agent.run("Can you give me the name of the highest-rated catering service in Gotham City?")

print(result)   # Output: Gotham Catering Co.








# Defining a Tool as a Python Class

# This approach involves creating a subclass of Tool. For complex tools, we can implement a class instead of a Python function. 
# The class wraps the function with metadata that helps the LLM understand how to use it effectively. In this class, we define:
#    name: The tool’s name.
#    description: A description used to populate the agent’s system prompt.
#    inputs: A dictionary with keys type and description, providing information to help the Python interpreter process inputs.
#    output_type: Specifies the expected output type.
#    forward: The method containing the inference logic to execute.

#Below, we can see an example of a tool built using Tool and how to integrate it within a CodeAgent.
#Generating a tool to generate ideas about the superhero-themed party

# Alfred’s party at the mansion is a superhero-themed event, but he needs some creative ideas to make it truly special. 
# As a fantastic host, he wants to surprise the guests with a unique theme.

# To do this, he can use an agent that generates superhero-themed party ideas based on a given category. 
# This way, Alfred can find the perfect party theme to wow his guests.



from smolagents import Tool, CodeAgent, InferenceClientModel

class SuperheroPartyThemeTool(Tool):
    name = "superhero_party_theme_generator"
    description = """
    This tool suggests creative superhero-themed party ideas based on a category.
    It returns a unique party theme idea."""

    inputs = {
        "category": {
            "type": "string",
            "description": "The type of superhero party (e.g., 'classic heroes', 'villain masquerade', 'futuristic Gotham').",
        }
    }

    output_type = "string"

    def forward(self, category: str):
        themes = {
            "classic heroes": "Justice League Gala: Guests come dressed as their favorite DC heroes with themed cocktails like 'The Kryptonite Punch'.",
            "villain masquerade": "Gotham Rogues' Ball: A mysterious masquerade where guests dress as classic Batman villains.",
            "futuristic Gotham": "Neo-Gotham Night: A cyberpunk-style party inspired by Batman Beyond, with neon decorations and futuristic gadgets."
        }

        return themes.get(category.lower(), "Themed party idea not found. Try 'classic heroes', 'villain masquerade', or 'futuristic Gotham'.")

# Instantiate the tool
party_theme_tool = SuperheroPartyThemeTool()
agent = CodeAgent(tools=[party_theme_tool], model=InferenceClientModel())

# Run the agent to generate a party theme idea
result = agent.run(
    "What would be a good superhero party idea for a 'villain masquerade' theme?"
)

print(result)  # Output: "Gotham Rogues' Ball: A mysterious masquerade where guests dress as classic Batman villains."






# smolagents comes with a set of pre-built tools that can be directly injected into your agent. The default toolbox includes:
#    PythonInterpreterTool
#    FinalAnswerTool
#    UserInputTool
#    DuckDuckGoSearchTool
#    GoogleSearchTool
#    VisitWebpageTool

# Alfred could use various tools to ensure a flawless party at Wayne Manor:
#    First, he could use the DuckDuckGoSearchTool to find creative superhero-themed party ideas.
#    For catering, he’d rely on the GoogleSearchTool to find the highest-rated services in Gotham.
#    To manage seating arrangements, Alfred could run calculations with the PythonInterpreterTool.
#    Once everything is gathered, he’d compile the plan using the FinalAnswerTool.
# With these tools, Alfred guarantees the party is both exceptional and seamless. 


# Sharing and Importing Tools
# One of the most powerful features of smolagents is its ability to share custom tools on the Hub 
# and seamlessly integrate tools created by the community. This includes connecting with HF Spaces 
# and LangChain tools, significantly enhancing Alfred’s ability to orchestrate an unforgettable party at Wayne Manor. 🎭

# With these integrations, Alfred can tap into advanced event-planning tools—whether it’s adjusting 
# the lighting for the perfect ambiance, curating the ideal playlist for the party, or coordinating with Gotham’s finest caterers.

# Sharing a Tool to the Hub
# Sharing your custom tool with the community is easy! Simply upload it to your Hugging Face account using the push_to_hub() method.
# For instance, Alfred can share his party_theme_tool to help others find the best catering services in Gotham. Here’s how to do it:
party_theme_tool.push_to_hub("{your_username}/party_theme_tool", token="<YOUR_HUGGINGFACEHUB_API_TOKEN>")





# Importing a Tool from the Hub
# You can easily import tools created by other users using the load_tool() function. 
# For example, Alfred might want to generate a promotional image for the party using AI. 
# Instead of building a tool from scratch, he can leverage a predefined one from the community:
from smolagents import load_tool, CodeAgent, InferenceClientModel

image_generation_tool = load_tool(
    "m-ric/text-to-image",
    trust_remote_code=True
)

agent = CodeAgent(
    tools=[image_generation_tool],
    model=InferenceClientModel()
)

agent.run("Generate an image of a luxurious superhero-themed party at Wayne Manor with made-up superheros.")





# Importing a Hugging Face Space as a Tool
# You can also import a HF Space as a tool using Tool.from_space(). This opens up 
# possibilities for integrating with thousands of spaces from the community for tasks 
# from image generation to data analysis.

# The tool will connect with the spaces Gradio backend using the gradio_client, 
# so make sure to install it via pip if you don’t have it already.

# For the party, Alfred can use an existing HF Space for the generation of the AI-generated 
# image to be used in the announcement (instead of the pre-built tool we mentioned before). Let’s build it!
from smolagents import CodeAgent, InferenceClientModel, Tool

image_generation_tool = Tool.from_space(
    "black-forest-labs/FLUX.1-schnell",
    name="image_generator",
    description="Generate an image from a prompt"
)

model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct")

agent = CodeAgent(tools=[image_generation_tool], model=model)

agent.run(
    "Improve this prompt, then generate an image of it.",
    additional_args={'user_prompt': 'A grand superhero-themed party at Wayne Manor, with Alfred overseeing a luxurious gala'}
)





# Importing a LangChain Tool
# We can reuse LangChain tools in your smolagents workflow!
# You can easily load LangChain tools using the Tool.from_langchain() method. Alfred 
# is preparing for a spectacular superhero night at Wayne Manor while the Waynes are away. 
# To make sure every detail exceeds expectations, he taps into LangChain tools to find top-tier entertainment ideas.

# By using Tool.from_langchain(), Alfred effortlessly adds advanced search functionalities to his smolagent, 
# enabling him to discover exclusive party ideas and services with just a few commands.

# Here’s how he does it:
from langchain.agents import load_tools
from smolagents import CodeAgent, InferenceClientModel, Tool

search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])

agent = CodeAgent(tools=[search_tool], model=model)

agent.run("Search for luxury entertainment ideas for a superhero-themed event, such as live performances and interactive experiences.")



# Importing a tool collection from any MCP server
# smolagents also allows importing tools from the hundreds of MCP servers available on glama.ai or smithery.ai. If you want to dive deeper about MCP, you can check our free MCP Course.
# -Install mcp client
# --We first need to install the mcp integration for smolagents.
# --pip install "smolagents[mcp]"

# The MCP servers tools can be loaded in a ToolCollection object as follow:

import os
from smolagents import ToolCollection, CodeAgent
from mcp import StdioServerParameters
from smolagents import InferenceClientModel


model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct")


server_parameters = StdioServerParameters(
    command="uvx",
    args=["--quiet", "pubmedmcp@0.1.3"],
    env={"UV_PYTHON": "3.12", **os.environ},
)

with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as tool_collection:
    agent = CodeAgent(tools=[*tool_collection.tools], model=model, add_base_tools=True)
    agent.run("Please find a remedy for hangover.")

In [None]:
#  Building Agentic RAG Systems


# Retrieval Augmented Generation (RAG) systems combine the capabilities of data retrieval and 
# generation models to provide context-aware responses. For example, a user’s query is passed 
# to a search engine, and the retrieved results are given to the model along with the query. 
# The model then generates a response based on the query and retrieved information.

# Agentic RAG (Retrieval-Augmented Generation) extends traditional RAG systems by combining 
# autonomous agents with dynamic knowledge retrieval.

# While traditional RAG systems use an LLM to answer queries based on retrieved data, agentic 
# RAG enables intelligent control of both retrieval and generation processes, improving efficiency and accuracy.

# Traditional RAG systems face key limitations, such as relying on a single retrieval step and 
# focusing on direct semantic similarity with the user’s query, which may overlook relevant information.

# Agentic RAG addresses these issues by allowing the agent to autonomously formulate search queries, 
# critique retrieved results, and conduct multiple retrieval steps for a more tailored and comprehensive output.


# Custom Knowledge Base Tool
# For specialized tasks, a custom knowledge base can be invaluable. Let’s create a tool that queries a 
# vector database of technical documentation or specialized knowledge. Using semantic search, the agent 
# can find the most relevant information for Alfred’s needs.
# A vector database stores numerical representations (embeddings) of text or other data, created by machine 
# learning models. It enables semantic search by identifying similar meanings in high-dimensional space.
# This approach combines predefined knowledge with semantic search to provide context-aware solutions for 
# event planning. With specialized knowledge access, Alfred can perfect every detail of the party.
# In this example, we’ll create a tool that retrieves party planning ideas from a custom knowledge base. 
# We’ll use a BM25 retriever to search the knowledge base and return the top results, and 
# RecursiveCharacterTextSplitter to split the documents into smaller chunks for more efficient search.


from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from smolagents import Tool
from langchain_community.retrievers import BM25Retriever
from smolagents import CodeAgent, InferenceClientModel

class PartyPlanningRetrieverTool(Tool):
    name = "party_planning_retriever"
    description = "Uses semantic search to retrieve relevant party planning ideas for Alfred’s superhero-themed party at Wayne Manor."
    inputs = {
        "query": {
            "type": "string",
            "description": "The query to perform. This should be a query related to party planning or superhero themes.",
        }
    }
    output_type = "string"

    def __init__(self, docs, **kwargs):
        super().__init__(**kwargs)
        self.retriever = BM25Retriever.from_documents(
            docs, k=5  # Retrieve the top 5 documents
        )

    def forward(self, query: str) -> str:
        assert isinstance(query, str), "Your search query must be a string"

        docs = self.retriever.invoke(
            query,
        )
        return "\nRetrieved ideas:\n" + "".join(
            [
                f"\n\n===== Idea {str(i)} =====\n" + doc.page_content
                for i, doc in enumerate(docs)
            ]
        )

# Simulate a knowledge base about party planning
party_ideas = [
    {"text": "A superhero-themed masquerade ball with luxury decor, including gold accents and velvet curtains.", "source": "Party Ideas 1"},
    {"text": "Hire a professional DJ who can play themed music for superheroes like Batman and Wonder Woman.", "source": "Entertainment Ideas"},
    {"text": "For catering, serve dishes named after superheroes, like 'The Hulk's Green Smoothie' and 'Iron Man's Power Steak.'", "source": "Catering Ideas"},
    {"text": "Decorate with iconic superhero logos and projections of Gotham and other superhero cities around the venue.", "source": "Decoration Ideas"},
    {"text": "Interactive experiences with VR where guests can engage in superhero simulations or compete in themed games.", "source": "Entertainment Ideas"}
]

source_docs = [
    Document(page_content=doc["text"], metadata={"source": doc["source"]})
    for doc in party_ideas
]

# Split the documents into smaller chunks for more efficient search
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    add_start_index=True,
    strip_whitespace=True,
    separators=["\n\n", "\n", ".", " ", ""],
)
docs_processed = text_splitter.split_documents(source_docs)

# Create the retriever tool
party_planning_retriever = PartyPlanningRetrieverTool(docs_processed)

# Initialize the agent
agent = CodeAgent(tools=[party_planning_retriever], model=InferenceClientModel())

# Example usage
response = agent.run(
    "Find ideas for a luxury superhero-themed party, including entertainment, catering, and decoration options."
)

print(response)

In [None]:
import sys
!{sys.executable} -m pip install 'smolagents[litellm]' plotly geopandas shapely kaleido -q


In [None]:
# Multi-Agent Systems in Action
# A multi-agent system consists of multiple specialized agents working together under the coordination 
# of an Orchestrator Agent. This approach enables complex workflows by distributing tasks among agents with distinct roles.

# For example, a Multi-Agent RAG system can integrate:
# A Web Agent for browsing the internet.
# A Retriever Agent for fetching information from knowledge bases.
# An Image Generation Agent for producing visuals.
# All of these agents operate under an orchestrator that manages task delegation and interaction.

# Solving a complex task with a multi-agent hierarchy
# You can follow the code in this notebook that you can run using Google Colab.
# The reception is approaching! With your help, Alfred is now nearly finished with the preparations.
# But now there’s a problem: the Batmobile has disappeared. Alfred needs to find a replacement, and find it quickly.
# Fortunately, a few biopics have been done on Bruce Wayne’s life, so maybe Alfred could get a car left behind on one 
# of the movie sets, and re-engineer it up to modern standards, which certainly would include a full self-driving option.
# But this could be anywhere in the filming locations around the world - which could be numerous.

# So Alfred wants your help. Could you build an agent able to solve this task?

# 👉 Find all Batman filming locations in the world, calculate the time to transfer via boat to there, and represent them 
# on a map, with a color varying by boat transfer time. Also represent some supercar factories with the same boat transfer time.



# We first make a tool to get the cargo plane transfer time.
import math
from typing import Optional, Tuple

from smolagents import tool


@tool
def calculate_cargo_travel_time(
    origin_coords: Tuple[float, float],
    destination_coords: Tuple[float, float],
    cruising_speed_kmh: Optional[float] = 750.0,  # Average speed for cargo planes
) -> float:
    """
    Calculate the travel time for a cargo plane between two points on Earth using great-circle distance.

    Args:
        origin_coords: Tuple of (latitude, longitude) for the starting point
        destination_coords: Tuple of (latitude, longitude) for the destination
        cruising_speed_kmh: Optional cruising speed in km/h (defaults to 750 km/h for typical cargo planes)

    Returns:
        float: The estimated travel time in hours

    Example:
        >>> # Chicago (41.8781° N, 87.6298° W) to Sydney (33.8688° S, 151.2093° E)
        >>> result = calculate_cargo_travel_time((41.8781, -87.6298), (-33.8688, 151.2093))
    """

    def to_radians(degrees: float) -> float:
        return degrees * (math.pi / 180)

    # Extract coordinates
    lat1, lon1 = map(to_radians, origin_coords)
    lat2, lon2 = map(to_radians, destination_coords)

    # Earth's radius in kilometers
    EARTH_RADIUS_KM = 6371.0

    # Calculate great-circle distance using the haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = (
        math.sin(dlat / 2) ** 2
        + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
    )
    c = 2 * math.asin(math.sqrt(a))
    distance = EARTH_RADIUS_KM * c

    # Add 10% to account for non-direct routes and air traffic controls
    actual_distance = distance * 1.1

    # Calculate flight time
    # Add 1 hour for takeoff and landing procedures
    flight_time = (actual_distance / cruising_speed_kmh) + 1.0

    # Format the results
    return round(flight_time, 2)


print(calculate_cargo_travel_time((41.8781, -87.6298), (-33.8688, 151.2093)))


# Setting up the agent
# For the model provider, we use Together AI
# The GoogleSearchTool uses the Serper API to search the web, so this requires either having setup env 
# variable SERPAPI_API_KEY and passing provider="serpapi" or having SERPER_API_KEY and passing provider=serper.
# If you don’t have any Serp API provider setup, you can use DuckDuckGoSearchTool but beware that it has a rate limit.

import os
from PIL import Image
from smolagents import CodeAgent, GoogleSearchTool, InferenceClientModel, VisitWebpageTool, DuckDuckGoSearchTool

model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together")

# We can start by creating a simple agent as a baseline to give us a simple report.
task = """Find all Batman filming locations in the world, calculate the time to transfer via cargo plane to here (we're in Gotham, 40.7128° N, 74.0060° W), and return them to me as a pandas dataframe.
Also give me some supercar factories with the same cargo plane transfer time."""

agent = CodeAgent(
    model=model,
    # tools=[GoogleSearchTool("serper"), VisitWebpageTool(), calculate_cargo_travel_time],
    tools=[DuckDuckGoSearchTool(), VisitWebpageTool(), calculate_cargo_travel_time],
    additional_authorized_imports=["pandas"],
    max_steps=20,
)

result = agent.run(task)

result



# We could already improve this a bit by throwing in some dedicated planning steps, and adding more prompting.
# Planning steps allow the agent to think ahead and plan its next steps, which can be useful for more complex tasks.
agent.planning_interval = 4

detailed_report = agent.run(f"""
You're an expert analyst. You make comprehensive reports after visiting many websites.
Don't hesitate to search for many queries at once in a for loop.
For each data point that you find, visit the source url to confirm numbers.

{task}
""")

print(detailed_report)

detailed_report


# Thanks to these quick changes, we obtained a much more concise report by simply providing our agent a detailed prompt,
# and giving it planning capabilities!
# The model’s context window is quickly filling up. So if we ask our agent to combine the results of detailed search with 
# another, it will be slower and quickly ramp up tokens and costs.
# ➡️ We need to improve the structure of our system.

# ✌️ Splitting the task between two agents
# Multi-agent structures allow to separate memories between different sub-tasks, with two great benefits:

# Each agent is more focused on its core task, thus more performant
# Separating memories reduces the count of input tokens at each step, thus reducing latency and cost.
# Let’s create a team with a dedicated web search agent, managed by another agent.

# The manager agent should have plotting capabilities to write its final report: so let us give it access to additional 
# imports, including plotly, and geopandas + shapely for spatial plotting.
model = InferenceClientModel(
    "Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=8096
)

web_agent = CodeAgent(
    model=model,
    tools=[
        GoogleSearchTool(provider="serper"),
        VisitWebpageTool(),
        calculate_cargo_travel_time,
    ],
    name="web_agent",
    description="Browses the web to find information",
    verbosity_level=0,
    max_steps=10,
)

# The manager agent will need to do some mental heavy lifting.
# So we give it the stronger model DeepSeek-R1, and add a planning_interval to the mix.
from smolagents.utils import encode_image_base64, make_image_url
from smolagents import OpenAIServerModel


def check_reasoning_and_plot(final_answer, agent_memory):
    multimodal_model = OpenAIServerModel("gpt-4o", max_tokens=8096)
    filepath = "saved_map.png"
    assert os.path.exists(filepath), "Make sure to save the plot under saved_map.png!"
    image = Image.open(filepath)
    prompt = (
        f"Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. Now here is the plot that was made."
        "Please check that the reasoning process and plot are correct: do they correctly answer the given task?"
        "First list reasons why yes/no, then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not."
        "Don't be harsh: if the plot mostly solves the task, it should pass."
        "To pass, a plot should be made using px.scatter_map and not any other method (scatter_map looks nicer)."
    )
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt,
                },
                {
                    "type": "image_url",
                    "image_url": {"url": make_image_url(encode_image_base64(image))},
                },
            ],
        }
    ]
    output = multimodal_model(messages).content
    print("Feedback: ", output)
    if "FAIL" in output:
        raise Exception(output)
    return True


manager_agent = CodeAgent(
    model=InferenceClientModel("deepseek-ai/DeepSeek-R1", provider="together", max_tokens=8096),
    tools=[calculate_cargo_travel_time],
    managed_agents=[web_agent],
    additional_authorized_imports=[
        "geopandas",
        "plotly",
        "shapely",
        "json",
        "pandas",
        "numpy",
    ],
    planning_interval=5,
    verbosity_level=2,
    final_answer_checks=[check_reasoning_and_plot],
    max_steps=15,
)


manager_agent.visualize()


manager_agent.run("""
Find all Batman filming locations in the world, calculate the time to transfer via cargo plane to here (we're in Gotham, 40.7128° N, 74.0060° W).
Also give me some supercar factories with the same cargo plane transfer time. You need at least 6 points in total.
Represent this as spatial map of the world, with the locations represented as scatter points with a color that depends on the travel time, and save it to saved_map.png!

Here's an example of how to plot and return a map:
import plotly.express as px
df = px.data.carshare()
fig = px.scatter_map(df, lat="centroid_lat", lon="centroid_lon", text="name", color="peak_hour", size=100,
     color_continuous_scale=px.colors.sequential.Magma, size_max=15, zoom=1)
fig.show()
fig.write_image("saved_image.png")
final_answer(fig)

Never try to process strings using code: when you have a string to read, just print it and you'll see it.
""")

manager_agent.python_executor.state["fig"]


In [None]:
import sys
!{sys.executable} -m pip install helium selenium python-dotenv #"smolagents[all]" 


In [None]:
# Vision Agents with smolagents
# In this example, imagine Alfred, the butler at Wayne Manor, is tasked with verifying the identities of the 
# guests attending the party. As you can imagine, Alfred may not be familiar with everyone arriving. To help 
# him, we can use an agent that verifies their identity by searching for visual information about their appearance 
# using a VLM. This will allow Alfred to make informed decisions about who can enter. 


# Providing Images at the Start of the Agent’s Execution
# In this approach, images are passed to the agent at the start and stored as task_images alongside the task 
# prompt. The agent then processes these images throughout its execution.
# Consider the case where Alfred wants to verify the identities of the superheroes attending the party. He already
# has a dataset of images from previous parties with the names of the guests. Given a new visitor’s image, the agent 
# can compare it with the existing dataset and make a decision about letting them in.
# In this case, a guest is trying to enter, and Alfred suspects that this visitor might be The Joker impersonating 
# Wonder Woman. Alfred needs to verify their identity to prevent anyone unwanted from entering.
# Let’s build the example. First, the images are loaded. In this case, we use images from Wikipedia to keep the 
# example minimal, but imagine the possible use-case!
from PIL import Image
import requests
from io import BytesIO

image_urls = [
    "https://upload.wikimedia.org/wikipedia/commons/e/e8/The_Joker_at_Wax_Museum_Plus.jpg", # Joker image
    "https://upload.wikimedia.org/wikipedia/en/9/98/Joker_%28DC_Comics_character%29.jpg" # Joker image
]

images = []
for url in image_urls:
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36" 
    }
    response = requests.get(url,headers=headers)
    image = Image.open(BytesIO(response.content)).convert("RGB")
    images.append(image)


# Now that we have the images, the agent will tell us whether one guest is actually a superhero (Wonder Woman) or a villain (The Joker).
from smolagents import CodeAgent, OpenAIServerModel

model = OpenAIServerModel(model_id="gpt-4o")

# Instantiate the agent
agent = CodeAgent(
    tools=[],
    model=model,
    max_steps=20,
    verbosity_level=2
)

response = agent.run(
    """
    Describe the costume and makeup that the comic character in these photos is wearing and return the description.
    Tell me if the guest is The Joker or Wonder Woman.
    """,
    images=images
)


# Providing Images with Dynamic Retrieval
# The previous approach is valuable and has many potential use cases. However, in situations where the guest 
# is not in the database, we need to explore other ways of identifying them. One possible solution is to dynamically 
# retrieve images and information from external sources, such as browsing the web for details.

# In this approach, images are dynamically added to the agent’s memory during execution. As we know, agents in 
# smolagents are based on the MultiStepAgent class, which is an abstraction of the ReAct framework. This class 
# operates in a structured cycle where various variables and knowledge are logged at different stages:

# SystemPromptStep: Stores the system prompt.
# TaskStep: Logs the user query and any provided input.
# ActionStep: Captures logs from the agent’s actions and results.
# This structured approach allows agents to incorporate visual information dynamically and respond adaptively 
# to evolving tasks. When browsing, the agent can take screenshots and 
# save them as observation_images in the ActionStep.

# We’ll need a set of agent tools specifically designed for browsing, such as search_item_ctrl_f, go_back, and 
# close_popups. These tools allow the agent to act like a person navigating the web.
@tool
def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
    """
    Searches for text on the current page via Ctrl + F and jumps to the nth occurrence.
    Args:
        text: The text to search for
        nth_result: Which occurrence to jump to (default: 1)
    """
    elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
    if nth_result > len(elements):
        raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
    result = f"Found {len(elements)} matches for '{text}'."
    elem = elements[nth_result - 1]
    driver.execute_script("arguments[0].scrollIntoView(true);", elem)
    result += f"Focused on element {nth_result} of {len(elements)}"
    return result


@tool
def go_back() -> None:
    """Goes back to previous page."""
    driver.back()


@tool
def close_popups() -> str:
    """
    Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows! This does not work on cookie consent banners.
    """
    webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()


# We also need functionality for saving screenshots, as this will be an essential part of what our VLM 
# agent uses to complete the task. This functionality captures the screenshot and saves it in 
# step_log.observations_images = [image.copy()], allowing the agent to store and process the images dynamically as it navigates.

def save_screenshot(step_log: ActionStep, agent: CodeAgent) -> None:
    sleep(1.0)  # Let JavaScript animations happen before taking the screenshot
    driver = helium.get_driver()
    current_step = step_log.step_number
    if driver is not None:
        for step_logs in agent.logs:  # Remove previous screenshots from logs for lean processing
            if isinstance(step_log, ActionStep) and step_log.step_number <= current_step - 2:
                step_logs.observations_images = None
        png_bytes = driver.get_screenshot_as_png()
        image = Image.open(BytesIO(png_bytes))
        print(f"Captured a browser screenshot: {image.size} pixels")
        step_log.observations_images = [image.copy()]  # Create a copy to ensure it persists, important!

    # Update observations with current URL
    url_info = f"Current url: {driver.current_url}"
    step_log.observations = url_info if step_logs.observations is None else step_log.observations + "\n" + url_info
    return


# This function is passed to the agent as step_callback, as it’s triggered at the end of each step during 
# the agent’s execution. This allows the agent to dynamically capture and store screenshots throughout its process.
# Now, we can generate our vision agent for browsing the web, providing it with the tools we created, along with 
# the DuckDuckGoSearchTool to explore the web. This tool will help the agent retrieve necessary information for
# verifying guests’ identities based on visual cues.

from smolagents import CodeAgent, OpenAIServerModel, DuckDuckGoSearchTool
model = OpenAIServerModel(model_id="gpt-4o")

agent = CodeAgent(
    tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f],
    model=model,
    additional_authorized_imports=["helium"],
    step_callbacks=[save_screenshot],
    max_steps=20,
    verbosity_level=2,
)

# With that, Alfred is ready to check the guests’ identities and make informed decisions about whether to let them into the party:
agent.run("""
I am Alfred, the butler of Wayne Manor, responsible for verifying the identity of guests at party. A superhero has arrived at the entrance claiming to be Wonder Woman, but I need to confirm if she is who she says she is.

Please search for images of Wonder Woman and generate a detailed visual description based on those images. Additionally, navigate to Wikipedia to gather key details about her appearance. With this information, I can determine whether to grant her access to the event.
""" + helium_instructions)


In [None]:
# Create a Basic Code Agent with Web Search Capability
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel

agent = CodeAgent(
    tools=[DuckDuckGoSearchTool()],
    model=HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
)


# Set Up a Multi-Agent System with Manager and Web Search Agents
web_agent = ToolCallingAgent(
    tools=[DuckDuckGoSearchTool(), visit_webpage],
    model=model,
    max_steps=10,
    name="search",
    description="Runs web searches for you."
)

manager_agent = CodeAgent(
    tools=[],
    model=model,
    managed_agents=[web_agent],
    additional_authorized_imports=["time", "numpy", "pandas"]
)


# Configure Agent Security Settings
from smolagents import CodeAgent, E2BSandbox

agent = CodeAgent(
    tools=[],
    model=model,
    sandbox=E2BSandbox(),
    additional_authorized_imports=["numpy"]
)


# Implement a Tool-Calling Agent
from smolagents import ToolCallingAgent

agent = ToolCallingAgent(
    tools=[custom_tool],
    model=model,
    max_steps=5,
    name="tool_agent",
    description="Executes specific tools based on input"
)


# Set Up Model Integration
from smolagents import HfApiModel, LiteLLMModel

# Hugging Face model
hf_model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")

# Alternative model via LiteLLM
other_model = LiteLLMModel("anthropic/claude-3-sonnet")