**Building code-agent with huggingfaces smolagents and exploring its capabilities**

**Installing the packages**

In [2]:
!pip install -q requests gradio smolagents python-dotenv


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.1/54.1 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.9/322.9 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.5/114.5 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m106.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m86.4 MB/s[0m eta [36m0:00:00[0m
[?25h

**Importing packages and logging into huggingface**

In [89]:
# Warning control
import warnings

warnings.filterwarnings("ignore")

import os
import io
import IPython.display
from PIL import Image
import base64

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv() # read local .env file

from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [90]:
import numpy as np
import pandas as pd

**Defining the model**

In [97]:
from smolagents import HfApiModel, CodeAgent


model = HfApiModel(
    "Qwen/Qwen2.5-72B-Instruct",
    provider="together", # Choose a specific inference provider
    max_tokens=4096,
    temperature=0.1
)

**Setting up the agent**

In [98]:
agent = CodeAgent(
    model=model,
    tools=[],
    max_steps=10,
    additional_authorized_imports=["pandas", "numpy"],
    verbosity_level=2
)
agent.logger.console.width=66

**Creating the sample data consisting of distance, time and speed**

In [99]:
import pandas as pd

# Create sample data
data = {
    "distance_km": [10, 25, 50, 100, 5],
    "time_hr": [0.5, 1, 1.5, 2, 0.25],
}

# Calculate speed
data["speed_kmph"] = [d / t for d, t in zip(data["distance_km"], data["time_hr"])]

# Create DataFrame
df = pd.DataFrame(data)

print(df)


   distance_km  time_hr  speed_kmph
0           10     0.50   20.000000
1           25     1.00   25.000000
2           50     1.50   33.333333
3          100     2.00   50.000000
4            5     0.25   20.000000


**Dropping the speed column**

In [100]:
df = df.drop("speed_kmph", axis=1)
df.head()

Unnamed: 0,distance_km,time_hr
0,10,0.5
1,25,1.0
2,50,1.5
3,100,2.0
4,5,0.25


**Defining the task and running the agent**

In [102]:
task = """Here is a dataframe of different distance and time taken.
Could you give me a comparative table (as a dataframe) and calculate speed column for each of them?
"""
agent.logger.level = 1 # Lower verbosity level
agent.run(
    task,
    additional_args={"suppliers_data": df},
)

Unnamed: 0,distance_km,time_hr,speed_kmph
0,10,0.5,20.0
1,25,1.0,25.0
2,50,1.5,33.333333
3,100,2.0,50.0
4,5,0.25,20.0


From the above it can be seen that agent was able to generate the code to calculate the speed, the values are correct.

**Creating another dataset**

In [103]:
import pandas as pd

data = {
    "age": [25, 32, 45, 29, 60],
    "weight_kg": [70, 85, 95, 68, 72],
    "height_cm": [175, 180, 165, 160, 170],
    "waist_cm": [82, 90, 105, 78, 85],
    "hip_cm": [95, 100, 110, 92, 98],
    "systolic_bp": [120, 135, 145, 118, 130],
    "diastolic_bp": [80, 88, 92, 76, 85],
    "resting_heart_rate": [72, 80, 78, 70, 74],
    "cholesterol_mg_dl": [180, 220, 250, 170, 200],
    "glucose_mg_dl": [90, 105, 130, 85, 110]
}

df = pd.DataFrame(data)

# Calculate BMI = weight (kg) / (height (m))^2
df["bmi"] = df["weight_kg"] / ((df["height_cm"] / 100) ** 2)

df.head()


Unnamed: 0,age,weight_kg,height_cm,waist_cm,hip_cm,systolic_bp,diastolic_bp,resting_heart_rate,cholesterol_mg_dl,glucose_mg_dl,bmi
0,25,70,175,82,95,120,80,72,180,90,22.857143
1,32,85,180,90,100,135,88,80,220,105,26.234568
2,45,95,165,105,110,145,92,78,250,130,34.894399
3,29,68,160,78,92,118,76,70,170,85,26.5625
4,60,72,170,85,98,130,85,74,200,110,24.913495


In [104]:
df = df.drop('bmi',axis = 1)

**Task isto analyse the dataset and create all important columns based on the existing ones( to test feature engineering)**

In [105]:
task = """Here is a dataframe, analyse it properly.
Could you give the important feature engineering columns each of them and add it to the existing dataframe?
"""
agent.logger.level = 1 # Lower verbosity level
agent.run(
    task,
    additional_args={"suppliers_data": df},
)

Unnamed: 0,age,weight_kg,height_cm,waist_cm,hip_cm,systolic_bp,diastolic_bp,resting_heart_rate,cholesterol_mg_dl,glucose_mg_dl,BMI,waist_to_hip_ratio,blood_pressure_ratio
0,25,70,175,82,95,120,80,72,180,90,22.857143,0.863158,1.5
1,32,85,180,90,100,135,88,80,220,105,26.234568,0.9,1.534091
2,45,95,165,105,110,145,92,78,250,130,34.894399,0.954545,1.576087
3,29,68,160,78,92,118,76,70,170,85,26.5625,0.847826,1.552632
4,60,72,170,85,98,130,85,74,200,110,24.913495,0.867347,1.529412


The agent was able to successfully generate the code to generate new features