In [None]:
import ollama
import os
import re
import requests
import magic
import datetime
import json

from bs4 import BeautifulSoup
from markdownify import markdownify
from requests.exceptions import RequestException
from smolagents import CodeAgent, LiteLLMModel, DuckDuckGoSearchTool, ToolCallingAgent, tool, VisitWebpageTool

In [None]:
oc = ollama.Client("http://localhost:11434")

DATA_DIR = "/users/formation/irtn7prtnc/LLM_Valdom/Dataset"
os.makedirs(DATA_DIR, exist_ok = True)

model = LiteLLMModel(
    model_id = "ollama/qwen2.5-coder:32b", #['deepseek-r1:32b', 'qwen2.5-coder:32b', 'llama3.1:8b', 'mistral-nemo:latest', 'mistral:latest']
    api_base = "http://localhost:11434/api/generate",
    num_ctx = 24000
    )

Définition des outils

Outils Data Agent

In [None]:
@tool
def list_files(directory: str) -> list:
    """Lists all files in a given directory.

    Args:
        directory: The path of the directories.

    Returns:
        The dirrectory files list, or an error message.
    """
    try:
        return os.listdir(directory)

    except Exception as e:
        return f"Error listing files: {str(e)}"

#####################################################################################################

@tool
def detect_file_type(file_path: str) -> str:
    """Detects the MIME type of a file.

    Args:
        file_path: The path of the file that we want the type.

    Returns:
        Tue file type of an error message if it's not possible to detecting the type.
    """
    try:
        mime = magic.Magic(mime=True)
        return mime.from_file(file_path)

    except Exception as e:
        return f"Error detecting file type: {str(e)}"

#####################################################################################################

@tool
def extract_any_archive(file_path: str, destination: str = None) -> str:
    """Extracts a archive file to a specified directory.

    Args:
        file_path: The path of the file to extract.

    Returns:
        The extracted file path, or an error message if file extraction failed.
    """
    try:
        if destination is None:
            destination = os.path.splitext(file_path)[0]

        patoolib.extract_archive(file_path, outdir=destination)

        os.remove(zip_path)

        return f"Archive extracted successfully to: {destination}"

    except Exception as e:
        return f"Error extracting archive: {str(e)}"

####################################################################################################

# @tool
# def move_file(source: str, destination: str) -> str:
#     """Moves a file or directory to a new location.

#     Args:
#         source: The current path of the file to move.
#         destination: The new path of the file.

#     Returns:
#         The new file path, or an error message if file transfer failed
#     """
#     try:
#         if not os.path.exists(destination):
#             os.makedirs(destination)

#         shutil.move(source, destination)

#         return f"File successfully move to : {destination}"

#     except Exception as e:
#         return f"Fail during file transfert : {str(e)}"

####################################################################################################

import json
from pathlib import Path
from langchain.schema import Document
from langchain.tools import tool

@tool
def parse_json_to_docs(file_path: str) -> list[Document]:
    """
    Parse un fichier JSON (objet ou liste d'objets) et retourne une liste de Documents LangChain prêts à être indexés.

    Args:
        file_path: Chemin vers le fichier JSON.

    Returns:
        Liste de documents contenant le texte plat issu du JSON, ou une erreur.
    """
    try:
        def flatten_json(y, prefix=''):
            out = {}
            def flatten(x, name=''):
                if isinstance(x, dict):
                    for a in x:
                        flatten(x[a], f'{name}{a}.')
                elif isinstance(x, list):
                    for i, a in enumerate(x):
                        flatten(a, f'{name}{i}.')
                else:
                    out[name[:-1]] = x
            flatten(y, prefix)
            return out

        def format_flattened(flattened):
            return '\n'.join([f"- {k}: {v}" for k, v in flattened.items()])

        file = Path(file_path)
        with open(file, 'r', encoding='utf-8') as f:
            data = json.load(f)

        documents = []

        if isinstance(data, dict):
            flat = flatten_json(data)
            content = format_flattened(flat)
            documents.append(Document(page_content=content, metadata={"source": str(file)}))

        elif isinstance(data, list):
            for i, item in enumerate(data):
                flat = flatten_json(item)
                content = format_flattened(flat)
                documents.append(Document(
                    page_content=content,
                    metadata={"source": str(file), "index": i}
                ))
        else:
            return [Document(page_content=f"Error: Le JSON n'est ni un objet ni une liste.", metadata={})]

        return documents

    except Exception as e:
        return [Document(page_content=f"Erreur pendant le parsing JSON : {str(e)}", metadata={})]

####################################################################################################

from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain.tools import tool

@tool
def index_documents_to_faiss(documents: list[Document], index_path: str = "vectorstore_faiss") -> str:
    """
    Indexe une liste de documents dans une base FAISS et la sauvegarde localement.

    Args:
        documents: Liste de Documents LangChain à indexer.
        index_path: Chemin de sauvegarde de la base FAISS.

    Returns:
        Message de confirmation ou d'erreur.
    """
    try:
        if not documents:
            return "Erreur : Aucun document fourni pour l'indexation."

        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        vectorstore = FAISS.from_documents(documents, embeddings)
        vectorstore.save_local(index_path)

        return f"Index FAISS créé avec succès à l'emplacement : {index_path}"

    except Exception as e:
        return f"Erreur pendant l'indexation FAISS : {str(e)}"

####################################################################################################

import csv
from pathlib import Path
from langchain.schema import Document
from langchain.tools import tool

@tool
def parse_csv_to_docs(file_path: str) -> list[Document]:
    """
    Parse un fichier CSV et retourne une liste de documents (1 ligne = 1 document).
    
    Args:
        file_path: Chemin du fichier CSV.
    
    Returns:
        Liste de Documents LangChain avec texte structuré.
    """
    try:
        path = Path(file_path)
        documents = []

        with open(path, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            for i, row in enumerate(reader):
                content = "\n".join([f"- {k}: {v}" for k, v in row.items()])
                documents.append(Document(
                    page_content=content,
                    metadata={"source": str(path), "index": i}
                ))
        
        return documents

    except Exception as e:
        return [Document(page_content=f"Erreur pendant le parsing CSV : {str(e)}", metadata={})]

####################################################################################################

import xml.etree.ElementTree as ET
from pathlib import Path
from langchain.schema import Document
from langchain.tools import tool

@tool
def parse_xml_to_docs(file_path: str) -> list[Document]:
    """
    Parse un fichier XML en liste de Documents LangChain.
    Chaque élément enfant devient un document.

    Args:
        file_path: Chemin du fichier XML.

    Returns:
        Liste de Documents avec contenu structuré.
    """
    try:
        path = Path(file_path)
        tree = ET.parse(path)
        root = tree.getroot()
        documents = []

        for i, elem in enumerate(root):
            content = []
            for child in elem.iter():
                if child is not elem:
                    tag = child.tag
                    text = (child.text or "").strip()
                    if text:
                        content.append(f"- {tag}: {text}")
            if content:
                documents.append(Document(
                    page_content="\n".join(content),
                    metadata={"source": str(path), "index": i}
                ))

        return documents

    except Exception as e:
        return [Document(page_content=f"Erreur pendant le parsing XML : {str(e)}", metadata={})]

####################################################################################################

NameError: name 'tool' is not defined

Outils Web Agent

In [None]:
@tool
def visit_webpage(url: str) -> str:
    """Visits a webpage at the given URL and returns its content as a markdown string.

    Args:
        url: The URL of the webpage to visit.

    Returns:
        The content of the webpage converted to Markdown, or an error message if the request fails.
    """
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()

        # Convert the HTML content to Markdown
        markdown_content = markdownify(response.text).strip()

        # Remove multiple line breaks
        markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
        return markdown_content

    except RequestException as e:
        return f"Error fetching the webpage: {str(e)}"

    except Exception as e:
        return f"An unexpected error occurred: {str(e)}"

#####################################################################################################

@tool
def extract_links(url: str) -> list:
    """Extracts all links from a webpage.

    Args:
        url: The URL of the webpage to visit.

    Returns:
        The content of the webpage links, or an error message if the request fails.
    """
    try:
        response = requests.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")

        links = [a["href"] for a in soup.find_all("a", href=True)]
        return links

    except Exception as e:
        return f"Error extracting links: {str(e)}"

#####################################################################################################

#Evites les erreur 404 lorsqu'il génère des liens
@tool
def check_url_validity(url: str) -> bool:
    """Checks if a URL is reachable before visit the web page.

    Args:
        url: The URL to reach.

    Returns:
        The possibility or not to acces to the webpage.
    """

    try:
        response = requests.head(url, allow_redirects=True, timeout=5)
        return response.status_code == 200

    except requests.RequestException:
        return False

#####################################################################################################

@tool
def download_file(url: str) -> str:
    """Download a file from a give URL to a target destination.

    Args:
        url: The URL of the file to download.

    Returns:
        The downloaded file path, or an error message if file download failed
    """
    try:
        local_filename = os.path.join(CACHE_DIR, url.split('/')[-1])

        with requests.get(url, stream=True) as r:
            r.raise_for_status()

            with open(local_filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
        return f"File downloaded with succes : {local_filename}"

    except Exception as e:
        return f"Error during file downloading: {str(e)}"

Outils Graph Agent

In [None]:
@tool
def generate_chart_from_csv(file_path: str, x_column: str, y_column: str, chart_type: str = "bar") -> str:
    """Generates a chart from a CSV file and saves it as an image.
    
    Args:
        file_path: CSV file path that contained data that will be use for graph
        x_column:
        y_column:
        chart_type:
    
    Returns:
        Generated chart path or an error message if generation failed. 
    """
    try:
        df = pd.read_csv(file_path)
        plt.figure(figsize=(8, 6))
        
        if chart_type == "bar":
            df.plot(x=x_column, y=y_column, kind="bar")
        
        elif chart_type == "line":
            df.plot(x=x_column, y=y_column, kind="line")
        
        else:
            return "Unsupported chart type. Use 'bar' or 'line'."
        
        image_path = file_path.replace(".csv", ".png")
        plt.savefig(image_path)
        plt.close()
        return f"Chart saved at: {image_path}"
    
    except Exception as e:
        return f"Error generating chart: {str(e)}"

In [None]:
@tool
def ask_user_for_input(question: str) -> str:
    """Ask the user a question and wait for input.
    
    Args: 
        question:
    """

    return input(question)

## Configuration des Agents


In [None]:
data_agent = ToolCallingAgent(
    tools=[list_files, detect_file_type],
    model=model,
    add_base_tools = False,
    max_steps = 10,
    name = "Data_agent",
    description ='An agent that is able to handle local files to extract pertinent data to answer to the request. It can performed severals actions on local files that are stored in "users/formation/irtn7prtnc/LLM_Valdom/Cache" or "/users/formation/irtn7prtnc/LLM_Valdom/RAG". You must work with this agent in priority to find data relative to the request.'
    )

#####################################################################################################

web_agent = ToolCallingAgent(
    tools = [DuckDuckGoSearchTool(), VisitWebpageTool(), visit_webpage, extract_links, download_file],
    model = model,
    add_base_tools = True,
    max_steps = 10,
    name="Web_agent",
    description ='An agent that is able to navigate inside a given webpage with the aim to access pertinent data, extract pertinent links or download interesting files regarding the request. First, it needs to check if data can be found in “https://data.assemblee-nationale.fr” or “https://www.assemblee-nationale.fr”. Then if it can’t find pertinent data, or if there is still missing data to answer precisely to the request, it can perform web searches to collect pertinent data.'
    )

#####################################################################################################

infograpic_agent = ToolCallingAgent(
    tools=[],
    model=model,
    add_base_tools = False,
    max_steps = 10,
    name = "Info_agent",
    description = "An useless agent for now, but you can discuss with him about weather if you want"
    )

#####################################################################################################

manager_agent = CodeAgent(
    tools = [],
    model = model,
    managed_agents = [data_agent],
    additional_authorized_imports = ["time", "numpy", "pandas"],
    planning_interval = 3,
    verbosity_level = 2,
    #add_base_tools = True
    max_steps = 5,
    description  = "Your objective is to answer the query efficiently. "
        "1. First, check if you can find data or information inside local files using Data Agent. "
        "2. If the data are not found locally, use Web Agent to find and download the relevant information. "
        "3. If visual representation is needed, use Infographic Agent to generate charts or graphs."
    )

## Utilisation de l'agent


In [None]:
agent_output = manager_agent.run('discuss with others agents')

print("Final output:")
print(agent_output)