# SmolAgents
### Zona de testeo para esta librería como añadido para mejorar la app de scraping

In [3]:
import os
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt
from smolagents import ToolCallingAgent, tool
import google.generativeai as genai_d
from dotenv import load_dotenv 
from smolagents import LiteLLMModel

In [4]:
from google import genai 


In [5]:
from google.genai.types import Tool, GenerateContentConfig, GoogleSearch 
from smolagents import Tool as SmolAgentToolBase

In [6]:
from pydantic import BaseModel, Field

In [7]:
load_dotenv()

True

In [8]:
twitter_api_key = os.getenv('twitter_api_key')
twitter_api_secret = os.getenv('twitter_api_secret')
twitter_access_token = os.getenv('twitter_access_token')
twitter_access_token_secret = os.getenv('twitter_access_token_secret')
twitter_bearer_token = os.getenv('twitter_bearer_token')
youtube_api_key = os.getenv('YOUTUBE_API_KEY')
google_maps_api_key = os.getenv('MAPS_API_KEY')  
gemini_api_key = os.getenv('GEMINI_API_KEY')
detect_language_api_key = os.getenv("DETECT_LANGUAGE_API_KEY")
open_ai_api_key = os.getenv("OPEN_AI_API_KEY")
deepseek_api_key = os.getenv("DEEP_SEEK_API_KEY")
scrape_graph_api_key = os.getenv("SCRAPE_GRAPH_API_KEY")
open_router_api_key = os.getenv("OPEN_ROUTER_API_KEY")
huggingface_api_key = os.getenv("HUGGING_FACE_API_KEY")

In [9]:
### Test DFs
mainPath = 'E:/Users/1167486/Downloads'
testGenericWPage = pd.read_csv(mainPath + '/' + 'test_generic_webpage.csv')
testMaps = pd.read_csv(mainPath + '/' + 'test_maps.csv')
testPlaystore = pd.read_csv(mainPath + '/' + 'test_playstore.csv')
testReddit = pd.read_csv(mainPath + '/' + 'test_reddit.csv')
testWiki = pd.read_csv(mainPath + '/' + 'test_wiki.csv')
testYoutube = pd.read_csv(mainPath + '/' + 'test_youtube.csv')



In [10]:
client = genai.Client(api_key = gemini_api_key)

In [11]:
model_name = 'gemini-2.0-flash-001'

In [12]:
response = client.models.generate_content(model = model_name, contents='Dime un dato curioso')
response.text

'Aquí tienes un dato curioso:\n\nLa miel nunca se echa a perder. Debido a su bajo contenido de agua y su acidez, la miel puede conservarse indefinidamente. Se han encontrado frascos de miel en tumbas egipcias que tienen miles de años y aún son comestibles.\n'

In [13]:
from typing import ClassVar, Dict, Any

## Probando la búsqueda web basada en agentes

In [14]:
@tool 
def google_search_tool(query: str) -> str:
    """
    Realiza una búsqueda en la web con el parámetro (query) ingresado
    Args:
        query: (str): El término o términos que se buscarán en el navegador
    Returns:
        str: Una cadena que contiene los resultados agregados de la búsqueda
    """
    try:
        search_tool_for_llm = Tool(
            google_search = GoogleSearch()
        )
        response  = client.models.generate_content(
            model = model_name,
            contents  = f"Busca información en la web sobre lo siguiente: {query}",
            config = GenerateContentConfig(
                tools = [search_tool_for_llm],
                response_modalities=["TEXT"]
            )
        )    
        result = ""
        '''
        for part in response.candidates[0].content.parts:
            result  += part.text 
        
        try:
            sources = response.candidates[0].grounding_metadata.search_entry_point.rendered_content
            result += "\n\nSources:\n" + sources
        except (AttributeError, IndexError):
            pass 
        '''
        if response.candidates:
            candidate = response.candidates[0]
            for part in candidate.content.parts:
                if hasattr(part, 'text'): 
                    result += part.text

            try:
                if candidate.grounding_metadata and \
                   candidate.grounding_metadata.search_entry_point and \
                   hasattr(candidate.grounding_metadata.search_entry_point, 'rendered_content'):
                    sources = candidate.grounding_metadata.search_entry_point.rendered_content
                    if sources: 
                        result += "\n\nSources:\n" + sources
            except (AttributeError, IndexError, TypeError): 
                pass         
        return str(result)
    except Exception as e: 
        return f"Error al hacer la búsqueda {str(e)}"
    



In [15]:
#google_search_tool('Pato')

In [16]:
def run_improved_search():
    print("Gemini Improved Search Example")
    print("=============================\n")
    print("This example demonstrates the improved approach for Google Search integration.\n")
    
    # Create a smolagents agent with our search tool
    from smolagents import LiteLLMModel
    
    # Configure the model to use Google's Gemini API via LiteLLM
    model = LiteLLMModel(
        model_id="gemini/gemini-2.0-flash-001",
        api_key=gemini_api_key,
    )
    

    agent = ToolCallingAgent(
        name="ImprovedSearchAgent",
        model=model,
        tools=[google_search_tool],
    )
    
    while True:
        # Get search query from user
        query = input("\nEnter your search query (or 'exit' to quit): ")
        
        if query.lower() == 'exit':
            print("Goodbye!")
            break
        
        try:
            # Prompt for the search task
            prompt = f"""
            I need information about: {query}
            
            Please use the google_search_tool to find accurate and up-to-date information.
            Provide a comprehensive answer based on the search results.
            Include relevant facts, figures, and details.
            """
            
            # Run the agent
            print("\nSearching...")
            response = agent.run(prompt)
            print(f"\nResults:\n{response}\n")
        
        except Exception as e:
            print(f"Error: {str(e)}")
            print("There was an error communicating with the Gemini API.")
            print("Please check your API key and internet connection.")

In [17]:
#run_improved_search()

## Probando la búsqueda web y generando gráficos acorde a la búsqueda

In [18]:
from PIL import Image
from smolagents import CodeAgent
from huggingface_hub import login 

In [19]:
login(huggingface_api_key)

In [20]:
request_testeo = """
Puedes darme una lista del top 3 de museos en el mundo de 2024 basado en sus visitas, también dame en número de visitantes en millones 
para ese año y la temperatura diaria promedio en Julio para cada ciudad de los museos.
Formatealo en formato df en Pandas o Polars
"""

In [21]:
modelGemini = LiteLLMModel(
    model_id="gemini/gemini-2.0-flash-001",
    api_key=gemini_api_key,
)

In [22]:
from smolagents import InferenceClientModel

agenttest = CodeAgent(
    model = modelGemini,
    tools=[]
)
agenttest.run('Calcula el número 100 de Fibonacci')

354224848179261915075

In [23]:
@tool
def _execute_google_search(query: str) -> str:
    """
    Realiza una búsqueda en la web con el parámetro (query) ingresado
    Args:
        query: (str): El término o términos que se buscarán en el navegador
    Returns:
        str: Una cadena que contiene los resultados agregados de la búsqueda
    """
    try:
        search_tool_for_llm = Tool(
            google_search = GoogleSearch()
        )
        response = client.models.generate_content(
            model = model_name, 
            contents = f"Busca información en la web sobre lo siguiente: {query}",
            config = GenerateContentConfig(
                tools = [search_tool_for_llm],
                response_modalities = ["TEXT"]
            )
        )
        result = ""
        if response.candidates:
            candidate = response.candidates[0]
            for part in candidate.content.parts:
                if hasattr(part, 'text'):
                    result += part.text
                try:
                    if candidate.grounding_metadata and \
                        candidate.grounding_metadata.search_entry_point and \
                        hasattr(candidate.grounding_metadata.search_entry_point, 'rendered_content'):
                        sources = candidate.grounding_metadata.search_entry_point.rendered_content
                        if sources: 
                            result += '\n\ Fuentes:\n' + sources
                except (AttributeError, IndexError, TypeError):
                    pass 
        return str(result )
    except Exception as e:
        return f"Error al hacer la búsqueda en san Google: {str(e)}"


class GoogleSearchTool(SmolAgentToolBase):
    name: str = "google_search_tool"
    description: str = (
        "Realiza una búsqueda en la web con el parámetro (query) ingresado. "
        "Args: query (str): El término o términos que se buscarán en el navegador. "
        "Returns: str: Una cadena que contiene los resultados agregados de la búsqueda."
    )
    class ArgsSchema(BaseModel):
        query: str = Field(description="El término o terminos que se buscarán")
    args_schema = ArgsSchema
    def _run(self, query: str) -> str:
        return _execute_google_search(query)


class VisitWebpageTool(SmolAgentToolBase):
    """
        Modernizar la búsqueda
    """
    #name = "visit_webpage"
    name: ClassVar[str] = "visit_webpage"
    description: ClassVar[str] = (
        "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
    )
    inputs: ClassVar[Dict[str, Dict[str, str]]] ={
        "url": {
            "type": "string",
            "description": "The url of the webpage to visit.",
        }
    }
    output_type: ClassVar[str] = "string"

    def forward(self, url: str) -> str:
        try:
            import re

            import requests
            from markdownify import markdownify
            from requests.exceptions import RequestException

            from smolagents.utils import truncate_content
        except ImportError as e:
            raise ImportError(
                "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
            ) from e
        try:
            response = requests.get(url, timeout=20)
            response.raise_for_status()  # Raise an exception for bad status codes
            markdown_content = markdownify(response.text).strip()
            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
            return truncate_content(markdown_content, 40000)

        except requests.exceptions.Timeout:
            return "The request timed out. Please try again later or check the URL."
        except RequestException as e:
            return f"Error fetching the webpage: {str(e)}"
        except Exception as e:
            return f"An unexpected error occurred: {str(e)}"


  result += '\n\ Fuentes:\n' + sources


In [24]:
agent = CodeAgent(
    model = modelGemini,
    tools = [google_search_tool, VisitWebpageTool()],
    max_steps=2 
)
agent.logger.console.width=66
result = agent.run(request_testeo)

In [25]:
#modified code to handle case where agent doesn't return a python list.
import pandas as pd

try:
    display(pd.DataFrame(result))
except Exception as e:
    print("Could not display as DataFrame:", e)
    print(result)

Could not display as DataFrame: DataFrame constructor not properly called!
```tool_code
import pandas as pd

data = {
    'Museum': ['Louvre Museum', 'Vatican Museums', 'British Museum'],
    'City': ['Paris', 'Vatican City', 'London'],
    'Visitors (millions)': [8.7, 6.8, 6.4]
}

df = pd.DataFrame(data)

paris_temp_search = google_search_tool(query="average daily temperature in Paris in July")
vatican_city_temp_search = google_search_tool(query="average daily temperature in Vatican City in July")
london_temp_search = google_search_tool(query="average daily temperature in London in July")

print(f"Paris Temperature Search: {paris_temp_search}")
print(f"Vatican City Temperature Search: {vatican_city_temp_search}")
print(f"London Temperature Search: {london_temp_search}")
```


## Generar un agente múltiple

In [26]:
#code_model ="gemini-2.0-flash-lite"
code_model = "gemini/gemini-2.0-flash-lite"

In [27]:
web_agent = CodeAgent(
    model = LiteLLMModel(
        model_id=code_model,
        api_key=gemini_api_key
    ),
    tools=[google_search_tool, VisitWebpageTool()],
    max_steps= 2,
    name="web_agent",
    description = "Busca en internet por ti"
)
web_agent.logger.console.width = 66


In [28]:
from smolagents import Tool as SmolAgentToolBase
from typing import Any 
from smolagents.utils import make_image_url, encode_image_base64


In [39]:
def check_reasoning_and_plot(final_answer, agent_memory):
    final_answer
    multimodal_model = LiteLLMModel(
        model_id=code_model,
        api_key=gemini_api_key
    ),
    filepath = "saved_map.png"
    assert os.path.exists(filepath), "Make sure to save the plot under saved_map.png!"
    image = Image.open(filepath)
    prompt = (
        f"Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. Now here is the plot that was made."
        "Please check that the reasoning process and plot are correct: do they correctly answer the given task?"
        "First list reasons why yes/no, then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not."
        "Don't be harsh: if the plot mostly solves the task, it should pass."
        "To pass, a plot should be made using px.scatter_map and not any other method (scatter_map looks nicer)."
        "Also, any run that invents numbers should fail."
    )
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt,
                },
                {
                    "type": "image_url",
                    "image_url": {"url": make_image_url(encode_image_base64(image))},
                },
            ],
        }
    ]
    output = multimodal_model(messages).content
    print("Feedback: ", output)
    if "FAIL" in output:
        raise Exception(output)
    return True


manager_agent = CodeAgent(
    model = LiteLLMModel(
        model_id=code_model,
        api_key=gemini_api_key
    ),
    tools=[],
    managed_agents=[web_agent],
    additional_authorized_imports=[
        "geopandas",
        "plotly",
        "plotly.express", 
        "plotly.express.colors", 
        "shapely",
        "json",
        "pandas",
        "numpy",
    ],
    planning_interval=5,
    verbosity_level=2,
    final_answer_checks=[check_reasoning_and_plot],
    max_steps=2,
)
manager_agent.logger.console.width=66

In [40]:
manager_agent.visualize()

In [41]:
import os
os.path.exists("saved_map.png") and os.remove("saved_map.png")

False

In [42]:
returned_figure = manager_agent.run(f"""
{request_testeo}

Luego hazme un mapa espacial del mundo usando px.scatter_map, con los museos más grandes 
representados como puntos de dispersión de tamaño dependiente del número de visitantes y color dependiente 
de la temperatura media en julio.
Guarda el mapa en saved_map.png, ¡y devuélvelo!

He aquí un ejemplo de cómo trazar y devolver un mapa:
import plotly.express as px
df = px.data.carshare()
fig = px.scatter_map(df, lat=«centroid_lat», lon=«centroid_lon», text=«name», color=«peak_hour»,
     color_continuous_scale=px.colors.sequential.Magma_r, size_max=15, zoom=1)
fig.show()
respuesta_final(fig)

¡No inventes ningún número! Sólo debes utilizar números sacados de Internet.

""")

In [44]:
if returned_figure is not None and hasattr(returned_figure, 'show'):
    returned_figure.update_layout(width=700, height=700)
    returned_figure.show()
    print("Figure displayed from the agent's return value.")
else:
    print("The agent did not return a plottable figure.")
    print(f"Type of returned object: {type(returned_figure)}")
    print(f"Returned object: {returned_figure}")
    # You could add a fallback to check manager_agent.python_executor.state if needed for debugging:
    # if "fig" in manager_agent.python_executor.state:
    #     print("Attempting to plot from agent.python_executor.state['fig'] as a fallback...")
    #     fig_from_state = manager_agent.python_executor.state["fig"]
    #     if hasattr(fig_from_state, 'show'):
    #         fig_from_state.update_layout(width=700, height=700)
    #         fig_from_state.show()

    
#fig = manager_agent.python_executor.state["fig"]
#fig.update_layout(width=700, height=700)
#fig.show()

The agent did not return a plottable figure.
Type of returned object: <class 'smolagents.agent_types.AgentText'>
Returned object: I apologize for the repeated errors. It seems I am unable to directly search the web. I will, therefore, provide a response based on hypothetical data, as I'm unable to access the web to retrieve actual 2024 visitor data and temperatures.

Here's how I would solve the problem *if* I could access the web:

**1. Hypothetical Data Retrieval and Compilation:**

Let's *assume* the top 3 museums and some associated data are as follows (This data is entirely hypothetical):

*   **Museum 1: Louvre Museum, Paris, France**
    *   Visitors (2024): 9.5 million
    *   Latitude: 48.8606
    *   Longitude: 2.3376
    *   Average July Temperature: 20°C
*   **Museum 2: National Museum of China, Beijing, China**
    *   Visitors (2024): 7.8 million
    *   Latitude: 39.9089
    *   Longitude: 116.3972
    *   Average July Temperature: 26°C
*   **Museum 3: Metropolitan Museu