In [6]:
"""
 - create service id
 - create api key
 - go to projects
    - manage -> access control
    - add collborator
        - add serviceID
         - search for service id name -> make it admin -> add

"""

import os
from dotenv import load_dotenv
load_dotenv(override=True)

from langchain_ibm import WatsonxLLM, ChatWatsonx
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams


def watsonx_model(model_id="mistralai/mixtral-8x7b-instruct-v01", decoding_method='greedy', max_new_tokens=1000, 
                  min_new_tokens=1, temperature=0.5, top_k=50, top_p=1, repetition_penalty=1):
    params = {
        GenParams.DECODING_METHOD: decoding_method,
        GenParams.MIN_NEW_TOKENS: min_new_tokens,
        GenParams.MAX_NEW_TOKENS: max_new_tokens,
        GenParams.RANDOM_SEED: 42,
        GenParams.TEMPERATURE: temperature,
        GenParams.TOP_K: top_k,
        GenParams.TOP_P: top_p,
        GenParams.REPETITION_PENALTY: repetition_penalty
    }
    ibm_cloud_url = os.getenv("IBM_CLOUD_URL", None)
    project_id = os.getenv("PROJECT_ID", None)
    api_key = os.getenv("API_KEY")
    # print(ibm_cloud_url, project_id, api_key)
    watsonx_llm = ChatWatsonx(
        model_id=model_id,
        url=ibm_cloud_url,
        apikey=api_key,
        project_id=project_id,
        params=params,
    )
    return watsonx_llm

llm = watsonx_model("meta-llama/llama-3-3-70b-instruct", max_new_tokens=8192)


In [2]:
class WatsonXResponse:
    def __init__(self, content):
        self.content = content

'''
import re

class WatsonXWrapper:
    def __init__(self, chat_model):
        self.chat_model = chat_model
    
    def __call__(self, prompt, stop_sequences=None, **kwargs):
        try:
            # Enforce structured output
            formatted_prompt = f"""
            You are an AI that generates **only executable Python code**.
            STRICT FORMAT (Do not return explanations or markdown):

            ```py
            # Python code starts here
            print("Hello, World!")  # Example
            ```<end_code>

            Generate Python code for: {prompt}
            """

            # Get model response
            response = self.chat_model.invoke(formatted_prompt)

            # Extract text safely
            content = response.content if hasattr(response, "content") else response  

            # Remove "Thought" and other extra text
            content = re.sub(r"(?i)Thought:.*?Code:\s*", "", content, flags=re.DOTALL).strip()

            # Ensure correct code block format
            if not content.startswith("```py"):
                content = f"```py\n{content}\n```<end_code>"

            return WatsonXResponse(content)

        except Exception as e:
            print(f"Error generating response: {e}")
            return WatsonXResponse("")
'''

class WatsonXWrapper:
    def __init__(self, chat_model):
        self.chat_model = chat_model
    
    def __call__(self, prompt, stop_sequences=None, **kwargs):
        try:
            response = self.chat_model.invoke(prompt)  # Use invoke instead of generate_text
            return WatsonXResponse(response.content)  # Extract content correctly
        except Exception as e:
            print(f"Error generating response: {e}")
            return WatsonXResponse("")


# Assuming you already have your WatsonX chat_model initialized
wrapped_llm = WatsonXWrapper(llm)  # chat_model is your existing WatsonX model


In [4]:
from smolagents import CodeAgent, MultiStepAgent, DuckDuckGoSearchTool, HfApiModel

# model = HfApiModel()
agent = MultiStepAgent(tools=[DuckDuckGoSearchTool()], model=wrapped_llm, max_steps=2)

agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts? Be very precise")

"1. The length of Pont des Arts is 153 meters. \n2. The speed of a leopard at full speed is approximately 50-60 km/h. Let's use 55 km/h as a rough estimate. \n3. To convert the speed from km/h to m/s, we multiply by 1000/3600. So, 55 km/h * (1000/3600) = 15.28 m/s. \n4. To find the time it takes for the leopard to run through the bridge, we divide the length of the bridge by the speed of the leopard. So, 153 meters / 15.28 m/s = 10.01 seconds. \n\nThe final answer is 10.01 seconds."

<img src="./images/code_vs_json_actions.png">

In [10]:
from smolagents import CodeAgent, MultiStepAgent, DuckDuckGoSearchTool, HfApiModel

# model = HfApiModel()
agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=wrapped_llm, max_steps=2)

agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts? Be very precise")

Error generating response: Unexpected message type: MessageRole.TOOL_RESPONSE. Use one of 'human', 'user', 'ai', 'assistant', 'function', 'tool', or 'system'.


Error generating response: Unexpected message type: MessageRole.TOOL_RESPONSE. Use one of 'human', 'user', 'ai', 'assistant', 'function', 'tool', or 'system'.


''

In [9]:
from smolagents import MultiStepAgent, ToolCallingAgent, DuckDuckGoSearchTool

# Initialize the external tool for data retrieval (market news & sentiment)
search_tool = DuckDuckGoSearchTool()

# Define a multi-step agent with two steps:
# Step 1: Fetch the latest market news for XYZ stock.
# Step 2: Fetch the current market sentiment for XYZ stock.
agent = MultiStepAgent(tools=[DuckDuckGoSearchTool()], model=wrapped_llm, max_steps=3, provide_run_summary=True)

# Run the agent with a high-level prompt that combines the outputs into an investment recommendation
result = agent.run("Based on the latest market news and sentiment for IBM stock, provide a final investment recommendation.")
print(result)


 "Based on the latest market news and sentiment for IBM stock, I would recommend a 'hold' position for investors who already own the stock, and a 'neutral' position for potential buyers. This recommendation is driven by the mixed sentiment in the market, with some analysts predicting a decline in IBM's stock price due to increased competition and others expecting a rebound due to the company's strong fundamentals and recent innovations. As such, it's essential for investors to closely monitor the stock's performance and adjust their strategies accordingly."<|eom_id|>


In [28]:
from smolagents import MultiStepAgent, ToolCallingAgent, DuckDuckGoSearchTool, Tool

# Custom tool for reading files (assumes a text file for simplicity)
# class FileReaderTool(Tool):
#     name = "file_reader"
#     description = "Reads the content of a legal document from a text file."
#     inputs = {"file_path": {"type": "string", "description": "Path to the document file."}}
#     output_type = "string"

#     def forward(self, file_path: str) -> str:
#         with open(file_path, 'r') as file:
#             return file.read()

# Custom tool for basic text analysis (e.g., sentiment analysis and contradiction detection)
class SimpleTextAnalyzerTool(Tool):
    name = "text_analyzer"
    description = "Analyzes text for sentiment and detects contradictions."
    inputs = {"text": {"type": "string", "description": "Text to analyze."}}
    output_type = "object"

    def forward(self, text: str) -> dict:
        # Dummy analysis; replace with actual logic as needed.
        sentiment = "neutral"
        contradictions = "No contradictions detected."
        return {"sentiment": sentiment, "contradictions": contradictions}

# Initialize available tools.
search_tool = DuckDuckGoSearchTool()  # For fetching relevant legal case law.
# file_reader_tool = FileReaderTool()
text_analyzer_tool = SimpleTextAnalyzerTool()

# IMPORTANT: Replace 'wrapped_llm' with your actual language model initialization.
# For example: wrapped_llm = YourModelInitialization()
# wrapped_llm = ...

# Define prompt templates that force the agent to output tool call actions explicitly.
prompt_templates = {
    "initial": (
        "You are an agent tasked with reviewing a legal contract. "
        "Your first step is to fetch relevant legal case law for contract dispute resolution using available tools. "
        "When external information is needed, output your action in the following format: [TOOL_CALL: tool_name, input: <value>]. "
        "Wait for the tool response before proceeding."
    ),
    "action": (
        "Observation: {observation}\n"
        "Based on this observation, decide your next step. "
        "If you need to call a tool, output your action as [TOOL_CALL: tool_name, input: <value>]. "
        "Otherwise, describe your reasoning for the next step."
    ),
    "final": (
        "All necessary analysis is complete. "
        "Now, based on the gathered information and tool responses, provide a final recommendation on whether the contract should be approved, revised, or escalated."
    )
}

# Create the MultiStepAgent using the allowed parameters.
agent = MultiStepAgent(
    tools=[search_tool, text_analyzer_tool], #, file_reader_tool],
    model=wrapped_llm,          # Replace with your actual LLM instance.
    max_steps=6,
    prompt_templates=prompt_templates
)

# Provide a complex, high-level instruction that directs the agent to perform multi-step reasoning.
instruction = (
    "Review the provided legal contract. First, fetch relevant case law for contract disputes. "
    "Then, read the contract to extract key clauses. Next, analyze the text for sentiment and contradictions. "
    "Finally, generate a recommendation on whether to approve, revise, or escalate the contract. "
    "Remember, when external information is needed, output your action in the format: [TOOL_CALL: tool_name, input: <value>]."
)

# Run the agent.
result = agent.run(instruction)
print(result)

 I need to review the contract and extract key clauses. However, I don't have the contract provided. Can you please provide the contract? I will then proceed with the analysis. 

Once I have the contract, my next step would be to [TOOL_CALL: fetch_case_law, input: contract_disputes] to get relevant case law for contract disputes. 

After that, I will read the contract to extract key clauses and analyze the text for sentiment and contradictions. 

Finally, I will generate a recommendation on whether to approve, revise, or escalate the contract based on my analysis. 

Please provide the contract to proceed.


### ML Pipeline Agent

In [9]:
from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel

os.environ["WATSONX_URL"] = os.getenv("IBM_CLOUD_URL", None)
os.environ["WATSONX_APIKEY"] = os.getenv("API_KEY")
os.environ["WATSONX_PROJECTID"] = os.getenv("PROJECT_ID", None)


watsonx_model = LiteLLMModel(model_id="watsonx/meta-llama/llama-3-3-70b-instruct", api_key=os.getenv("API_KEY"))

task = """
1. Load the Diabetes dataset from the 'sklearn' library using the following code:
        from sklearn.datasets import load_diabetes
        import pandas as pd

        # Load the dataset
        data, target = load_diabetes(return_X_y=True, as_frame=False)

        # Create a DataFrame
        df = pd.DataFrame(data, columns=load_diabetes().feature_names)
        df['target'] = target
2. Split data with a train/test split of 75%/25%
3. Create a linear regression model on the training data predicting the target variable using the "sklearn" or "statsmodels" library.
4. Execute on a strategy of combination of up to 3 predictors that attains the lowest root mean square error (RMSE) on the testing data. 
   (You can't use the target variable).
5. Use feature engineering as needed to improve model performance.
6. Based on the lowest RMSE of each model for the testing data, provide a final list of predictors for the top 5 models
7. Only Output The predictors as a table in Markdown format.Do not provide any other Reasoning or explanation.
"""


feature_selection_agent = CodeAgent(
    tools=[DuckDuckGoSearchTool()], # search internet if necessary
    additional_authorized_imports=['pandas','statsmodels','sklearn','numpy','json'], # packages for code interpreter
    model=watsonx_model # model set above
)
result = feature_selection_agent.run(task)
print(result)

```python
import numpy as np
import math
import pandas as pd
from sklearn.model_selection import train_test_split
from itertools import combinations
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Create a simple dataset
np.random.seed(0)
data, target = np.random.rand(100, 10), np.random.rand(100)
df = pd.DataFrame(data, columns=['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'])
df['target'] = target

# Split data with a simple split
split_index = int(0.75 * len(df))
X_train = df.drop('target', axis=1).iloc[:split_index]
X_test = df.drop('target', axis=1).iloc[split_index:]
y_train = df['target'].iloc[:split_index]
y_test = df['target'].iloc[split_index:]

# Generate all possible combinations of predictors
predictors = list(X_train.columns)
combinations_list = []
for r in range(1, 4):
    combinations_list.extend(combinations(predictors, r))

# Initialize a dictionary to store the results
results = {}

# Iterate over all

### Orchestrating a multi-agent system

In [1]:
# utility

import re
import requests
from markdownify import markdownify
from requests.exceptions import RequestException
from smolagents import tool


@tool
def visit_webpage(url: str) -> str:
    """Visits a webpage at the given URL and returns its content as a markdown string.

    Args:
        url: The URL of the webpage to visit.

    Returns:
        The content of the webpage converted to Markdown, or an error message if the request fails.
    """
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes

        # Convert the HTML content to Markdown
        markdown_content = markdownify(response.text).strip()

        # Remove multiple line breaks
        markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)

        return markdown_content

    except RequestException as e:
        return f"Error fetching the webpage: {str(e)}"
    except Exception as e:
        return f"An unexpected error occurred: {str(e)}"

In [2]:
from smolagents import (
    CodeAgent,
    ToolCallingAgent,
    HfApiModel,
    DuckDuckGoSearchTool,
    LiteLLMModel,
)

model = HfApiModel()

web_agent = ToolCallingAgent(
    tools=[DuckDuckGoSearchTool(), visit_webpage],
    model=model,
    max_steps=10,
    name="search",
    description="Runs web searches for you. Give it your query as an argument.",
)

manager_agent = CodeAgent(
    tools=[],
    model=model,
    managed_agents=[web_agent],
    additional_authorized_imports=["time", "numpy", "pandas"],
)

In [None]:
answer = manager_agent.run("If LLM training continues to scale up at the current rhythm until 2030, what would be the electric power in GW required to power the biggest training runs by 2030? What would that correspond to, compared to some countries? Please provide a source for any numbers used.")