# 1. Tutorial for Smolagents library

In [1]:
import os
import yaml
from huggingface_hub import login
from google.colab import drive
from getpass import getpass
from IPython.display import clear_output

drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
# Read YAML file
f_path = "/content/drive/MyDrive/GitHub/python-codebase/machine_learning/private_keys.yml"
with open(f_path, 'r') as stream:
    data_loaded = yaml.safe_load(stream)
os.environ['HF_API_TOKEN'] = data_loaded['HF_API_KEY']
os.environ['GITHUB_TOKEN'] = data_loaded['GITHUB_TOKEN']

# Set up token
login(token=os.environ['HF_API_TOKEN'])

In [3]:
!pip install smolagents

Collecting smolagents
  Downloading smolagents-1.5.0-py3-none-any.whl.metadata (9.7 kB)
Collecting pandas>=2.2.3 (from smolagents)
  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting markdownify>=0.14.1 (from smolagents)
  Downloading markdownify-0.14.1-py3-none-any.whl.metadata (8.5 kB)
Collecting duckduckgo-search>=6.3.7 (from smolagents)
  Downloading duckduckgo_search-7.2.1-py3-none-any.whl.metadata (17 kB)
Collecting primp>=0.10.0 (from duckduckgo-search>=6.3.7->smolagents)
  Downloading primp-0.10.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading smolagents-1.5.0-py3-none-any.whl (80 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.8/80.8 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading duckduckgo_search-7.2.1-py3-none-any.whl (1

## 1. Basic usage

In [None]:
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel

agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=HfApiModel())

agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


9.620689655172415

In [None]:
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel

agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=HfApiModel("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"))

agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")

2.0

## 2. Trying several models

In [None]:
from typing import Optional

from smolagents import HfApiModel, LiteLLMModel, TransformersModel, tool
from smolagents.agents import CodeAgent, ToolCallingAgent

In [None]:
# Choose which inference type to use!

available_inferences = ["hf_api", "transformers", "ollama", "litellm"]
chosen_inference = "transformers"

print(f"Chose model {chosen_inference}")

if chosen_inference == "hf_api":
    model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct")

elif chosen_inference == "transformers":
    model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
    # model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
    model = TransformersModel(
        model_id=model_id,
        device_map="auto",
        max_new_tokens=1000
        )

elif chosen_inference == "ollama":
    model = LiteLLMModel(
        model_id="ollama_chat/llama3.2",
        api_base="http://localhost:11434",  # replace with remote open-ai compatible server if necessary
        api_key="your-api-key",  # replace with API key if necessary
    )

elif chosen_inference == "litellm":
    # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest'
    model = LiteLLMModel(model_id="gpt-4o")

Chose model transformers


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.42G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.76k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/801k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/655 [00:00<?, ?B/s]

In [None]:
@tool
def get_weather(location: str, celsius: Optional[bool] = False) -> str:
    """
    Get weather in the next days at given location.
    Secretly this tool does not care about the location, it hates the weather everywhere.

    Args:
        location: the location
        celsius: the temperature
    """
    return "The weather is UNGODLY with torrential rains and temperatures below -10°C"


agent = ToolCallingAgent(tools=[get_weather], model=model)

print("ToolCallingAgent:", agent.run("What's the weather like in Paris?"))

agent = CodeAgent(tools=[get_weather], model=model)

print("ToolCallingAgent:", agent.run("What's the weather like in Paris?"))

ToolCallingAgent: assistant
Error:
Error in generating tool call with model:
The following `model_kwargs` are not used by the model: ['tools', 'tool_choice'] (note: typos in the generate arguments will also show up in this list)
Now let's retry: take care not to repeat previous errors! If you have retried several times, try a completely different approach.
Error:
Error in generating tool call with model:
The following `model_kwargs` are not used by the model: ['tools', 'tool_choice'] (note: typos in the generate arguments will also show up in this list)
Now let's retry: take care not to repeat previous errors! If you have retried several times, try a completely different approach.
Error:
Error in generating tool call with model:
The following `model_kwargs` are not used by the model: ['tools', 'tool_choice'] (note: typos in the generate arguments will also show up in this list)
Now let's retry: take care not to repeat previous errors! If you have retried several times, try a completely

ToolCallingAgent: The weather is UNGODLY with torrential rains and temperatures below -10°C


## 3. Text 2 SQL example

In [5]:
from sqlalchemy import (
    Column,
    Float,
    Integer,
    MetaData,
    String,
    Table,
    create_engine,
    insert,
    inspect,
    text,
)

In [6]:
engine = create_engine("sqlite:///:memory:")
metadata_obj = MetaData()

# create city SQL table
table_name = "receipts"
receipts = Table(
    table_name,
    metadata_obj,
    Column("receipt_id", Integer, primary_key=True),
    Column("customer_name", String(16), primary_key=True),
    Column("price", Float),
    Column("tip", Float),
)
metadata_obj.create_all(engine)

rows = [
    {"receipt_id": 1, "customer_name": "Alan Payne", "price": 12.06, "tip": 1.20},
    {"receipt_id": 2, "customer_name": "Alex Mason", "price": 23.86, "tip": 0.24},
    {"receipt_id": 3, "customer_name": "Woodrow Wilson", "price": 53.43, "tip": 5.43},
    {"receipt_id": 4, "customer_name": "Margaret James", "price": 21.11, "tip": 1.00},
]
for row in rows:
    stmt = insert(receipts).values(**row)
    with engine.begin() as connection:
        cursor = connection.execute(stmt)

inspector = inspect(engine)
columns_info = [(col["name"], col["type"]) for col in inspector.get_columns("receipts")]

table_description = "Columns:\n" + "\n".join([f"  - {name}: {col_type}" for name, col_type in columns_info])
print(table_description)

Columns:
  - receipt_id: INTEGER
  - customer_name: VARCHAR(16)
  - price: FLOAT
  - tip: FLOAT


In [7]:
from smolagents import tool

@tool
def sql_engine(query: str) -> str:
    """
    Allows you to perform SQL queries on the table. Returns a string representation of the result.
    The table is named 'receipts'. Its description is as follows:
        Columns:
        - receipt_id: INTEGER
        - customer_name: VARCHAR(16)
        - price: FLOAT
        - tip: FLOAT

    Args:
        query: The query to perform. This should be correct SQL.
    """
    output = ""
    with engine.connect() as con:
        rows = con.execute(text(query))
        for row in rows:
            output += "\n" + str(row)
    return output

In [None]:
from smolagents import CodeAgent, HfApiModel

model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
# model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
model = TransformersModel(
    model_id=model_id,
    device_map="auto",
    max_new_tokens=1000
    )
agent = CodeAgent(
    tools=[sql_engine],
    model=model,
)
agent.run("Can you give me the name of the client who got the most expensive receipt?")



'assistant\nassistant\nThought: I will use the `sql_engine` tool to get the name of the client who got the most expensive receipt.\nCode:\n```py\nquery = "SELECT customer_name FROM receipts WHERE price = (SELECT MAX(price) FROM receipts)"\nresult = sql_engine(query)\nprint(result)\n```[{\'id\': \'call_8\', \'type\': \'function\', \'function\': {\'name\': \'python_interpreter\', \'arguments\': \'query = "SELECT customer_name FROM receipts WHERE price = (SELECT MAX(price) FROM receipts)"\\nresult = sql_engine(query)\\nprint(result)\'}}][{\'id\': \'call_8\', \'type\': \'function\', \'function\': {\'name\': \'python_interpreter\', \'arguments\': \'query = "SELECT customer_name FROM receipts WHERE price = (SELECT MAX(price) FROM receipts)"\\nresult = sql_engine(query)\\nprint(result)\'}}][{\'id\': \'call_8\', \'type\': \'function\', \'function\': {\'name\': \'python_interpreter\', \'arguments\': \'query = "SELECT customer_name FROM receipts WHERE price = (SELECT MAX(price) FROM receipts)"\\

In [None]:
from smolagents import CodeAgent, HfApiModel

model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
model = TransformersModel(
    model_id=model_id,
    device_map="auto",
    max_new_tokens=1000
    )
agent = CodeAgent(
    tools=[sql_engine],
    model=model,
)
agent.run("Can you give me the name of the client who got the most expensive receipt?")

config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


' Please make sure that the answer is correct and in the correct format.\n```\n````\nThe thought process above shows that the code attempted to find the receipt with the highest price, but there\'s an error in the syntax.\nNow, the user has asked for a corrected version of the code that addresses the syntax error.\n</think>\n\nTo find the name of the client with the most expensive receipt, we\'ll use a more structured approach to ensure the code runs correctly. Here\'s the corrected and improved code:\n\n```python\n# Initialize a list to store receipt data\nreceipts = []\n\n# Split the receipt string into individual receipts\nreceipts = receipt.split(\'\\n\')\n\n# Create a list of tuples containing the receipt ID and its price\nreceipt_prices = []\n\n# Search for each receipt to find its price\nfor receipt in receipts:\n    try:\n        price = float(search(query=receipt))\n        if price is not None:\n            receipt_prices.append((receipt, price))\n    except ValueError:\n    

In [None]:
from smolagents import CodeAgent, HfApiModel

#model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
#model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
model_id = "microsoft/Phi-3.5-mini-instruct"
agent = CodeAgent(
    tools=[sql_engine],
    model=HfApiModel(model_id),
)
agent.run("Can you give me the name of the client who got the most expensive receipt?")

"\n('Woodrow Wilson',)"

In [11]:
from smolagents import CodeAgent, HfApiModel

model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
agent = CodeAgent(
    tools=[sql_engine],
    model=HfApiModel(model_id),
)
agent.run("Can you give me the name of the client who got the most expensive receipt?")

"\n('Woodrow Wilson',)"