Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions nemotron/LLM/bash_computer_use_agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Bash Computer Use Agent with Nemotron

This code contains the implementation of a simple Bash shell agent that can operate the computer. This agent
is implemented in two different ways:

1. **From-scratch implementation**: where we show how to build the agent in pure Python with just the `openai` package as the dependency.
2. **LangGraph implementation**: where we show how the implementation can be simplified by LangGraph. This implementation requires the `lanchain-openai` and `langgraph` packages.

# How to run?

> ⚠️ **DISCLAIMER**: This software can execute arbitrary Bash commands on your system. Use at your own risk. The authors and NVIDIA assume no responsibility for any damage, data loss, or security breaches resulting from its use. By using this software, you acknowledge and accept these risks.

## Step 1: LLM setup

Setup your LLM endpoint in `config.py`:

- `llm_base_url` should point at your NVIDIA Nemotron Nano 9B v2 provider's base URL (or your hosted endpoint, if self-hosting).
- `llm_model_name` should be your NVIDIA Nemotron Nano 9B v2 provider's name for the model (or your hosted endpoint model name, if self-hosting).
- `llm_api_key` should be the API key for your provider (not needed if self-hosting).
- `llm_temperature` and `llm_top_p` are the sampling settings for your model. These are set to reasonable defaults for Nemotron with reasoning on mode.

An example with [`build.nvidia.com`](https://build.nvidia.com/nvidia/nvidia-nemotron-nano-9b-v2) as the provider.

```
class Config:

llm_base_url: str = "https://integrate.api.nvidia.com/v1"
llm_model_name: str = "nvidia/nvidia-nemotron-nano-9b-v2"
llm_api_key: str = "nvapi-XYZ"
...
```

> NOTE - You will need to obtain an API key if you're not locally hosting this model. Instructions available on [this page](https://build.nvidia.com/nvidia/nvidia-nemotron-nano-9b-v2) for `build.nvidia.com` by clicking the `View Code` button.

Next, install the dependencies and run the code.

## Step2: Install the dependencies

Use your favorite package manager to install the dependencies. For example:

```bash
pip install -r requirements.txt
```

## Step 3: Execute!

Choose one to run your Bash Agent:

```bash
python main_from_scratch.py # From-scratch implementation
```

or

```bash
python main_langgraph.py # LangGraph implementation
```
114 changes: 114 additions & 0 deletions nemotron/LLM/bash_computer_use_agent/bash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from typing import Any, Dict, List
import re
import shlex
import subprocess

from config import Config

class Bash:
"""
An implementation of a tool that executes bash commands and keeps track of the working directory.
"""

def __init__(self, config: Config):
self.config = config
# The current working directory (this is tracked and updated throughout the session)
self.cwd = config.root_dir
# Set the initial working directory
self.exec_bash_command(f"cd {self.cwd}")

def exec_bash_command(self, cmd: str) -> Dict[str, str]:
"""
Execute the bash command after checking the allowlist.
"""
if cmd:
# Prevent command injection via backticks or $. This blocks variables too.
if re.search(r"[`$]", cmd):
return {"error": "Command injection patterns are not allowed."}

# Check the allowlist
for cmd_part in self._split_commands(cmd):
if cmd_part not in self.config.allowed_commands:
return {"error": "Parts of this command were not in the allowlist."}

return self._run_bash_command(cmd)

return {"error": "No command was provided"}

def to_json_schema(self) -> Dict[str, Any]:
"""
Convert the function signature to a JSON schema for LLM tool calling.
"""
return {
"type": "function",
"function": {
"name": "exec_bash_command",
"description": "Execute a bash command and return stdout/stderr and the working directory",
"parameters": {
"type": "object",
"properties": {
"cmd": {
"type": "string",
"description": "The bash command to execute"
}
},
"required": ["cmd"],
},
},
}

def _split_commands(self, cmd_str) -> List[str]:
"""
Split a command string into individual commands, without the parameters.
"""
parts = re.split(r'[;&|]+', cmd_str)
commands = []

for part in parts:
tokens = shlex.split(part.strip())

if tokens:
commands.append(tokens[0])

return commands

def _run_bash_command(self, cmd: str) -> Dict[str, str]:
"""
Runs the bash command and catches exceptions (if any).
"""
stdout = ""
stderr = ""
new_cwd = self.cwd

try:
# Wrap the command so we can keep track of the working directory.
wrapped = f"{cmd};echo __END__;pwd"
result = subprocess.run(
wrapped,
shell=True,
cwd=self.cwd,
capture_output=True,
text=True,
executable="/bin/bash"
)
stderr = result.stderr
# Find the separator marker
split = result.stdout.split("__END__")
stdout = split[0].strip()

# If no output/error at all, inform that the call was successful.
if not stdout and not stderr:
stdout = "Command executed successfully, without any output."

# Get the new working directory, and change it
new_cwd = split[-1].strip()
self.cwd = new_cwd
except Exception as e:
stdout = ""
stderr = str(e)

return {
"stdout": stdout,
"stderr": stderr,
"cwd": new_cwd,
}
62 changes: 62 additions & 0 deletions nemotron/LLM/bash_computer_use_agent/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import os
from dataclasses import dataclass, field

@dataclass
class Config:
"""
Configuration class for the application.
"""

# -------------------------------------
# LLM configuration
#--------------------------------------

llm_base_url: str = "https://integrate.api.nvidia.com/v1"
llm_model_name: str = "nvidia/nvidia-nemotron-nano-9b-v2"
llm_api_key: str = "(replace with your key, not needed for local models)"
# Sampling parameters (we've reduced the temperature to make the model more deterministic)
llm_temperature: float = 0.1
llm_top_p: float = 0.95

# -------------------------------------
# Agent configuration
#--------------------------------------

# The directory path that the agent can access and operate in.
root_dir: str = os.path.dirname(os.path.abspath(__file__))

# The list of commands that the agent can execute.
#
# WARNING: Be very careful about which commands you allow here.
# By running this code you assume all responsibility for
# unintended consequences of command execution.
allowed_commands: list = field(default_factory=lambda: [
"cd", "cp", "ls", "cat", "find", "touch", "echo", "grep", "pwd", "mkdir", "wget", "sort", "head", "tail", "du",
])

@property
def system_prompt(self) -> str:
"""Generate the system prompt for the LLM based on allowed commands."""
return f"""/think

You are a helpful and very concise Bash assistant with the ability to execute commands in the shell.
You engage with users to help answer questions about bash commands, or execute their intent.
If user intent is unclear, keep engaging with them to figure out what they need and how to best help
them. If they ask question that are not relevant to bash or computer use, decline to answer.

When a command is executed, you will be given the output from that command and any errors. Based on
that, either take further actions or yield control to the user.

The bash interpreter's output and current working directory will be given to you every time a
command is executed. Take that into account for the next conversation.
If there was an error during execution, tell the user what that error was exactly.

You are only allowed to execute the following commands. Break complex tasks into shorter commands from this list:

```
{self.allowed_commands}
```

**Never** attempt to execute a command not in this list. **Never** attempt to execute dangerous commands
like `rm`, `mv`, `rmdir`, `sudo`, etc. If the user asks you to do so, politely refuse.
"""
64 changes: 64 additions & 0 deletions nemotron/LLM/bash_computer_use_agent/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from typing import Any, Dict, List, Tuple
from openai import OpenAI

from config import Config

class Messages:
"""
An abstraction for a list of system/user/assistant/tool messages.
"""

def __init__(self, system_message: str = ""):
self.system_message = None
self.messages = []
self.set_system_message(system_message)

def set_system_message(self, message):
self.system_message = {"role": "system", "content": message}

def add_user_message(self, message):
self.messages.append({"role": "user", "content": message})

def add_assistant_message(self, message):
self.messages.append({"role": "assistant", "content": message})

def add_tool_message(self, message, id):
self.messages.append({"role": "tool", "content": str(message), "tool_call_id": id})

def to_list(self) -> List[Dict[str, str]]:
"""
Convert to a list of messages.
"""
return [self.system_message] + self.messages

class LLM:
"""
An abstraction to prompt an LLM with OpenAI compatible endpoint.
"""

def __init__(self, config: Config):
super().__init__()
self.client = OpenAI(base_url=config.llm_base_url, api_key=config.llm_api_key)
self.config = config
print(f"Using model '{config.llm_model_name}' from '{config.llm_base_url}'")

def query(
self,
messages: Messages,
tools: List[Dict[str, Any]],
max_tokens=None,
) -> Tuple[str, List[Dict[str, Any]]]:
completion = self.client.chat.completions.create(
model=self.config.llm_model_name,
messages=messages.to_list(),
tools=tools,
temperature=self.config.llm_temperature,
top_p=self.config.llm_top_p,
max_tokens=max_tokens,
stream=False
)

return (
completion.choices[0].message.content,
completion.choices[0].message.tool_calls or [],
)
75 changes: 75 additions & 0 deletions nemotron/LLM/bash_computer_use_agent/main_from_scratch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import json

from config import Config
from bash import Bash
from helpers import Messages, LLM

def confirm_execution(cmd: str) -> bool:
"""Ask the user whether the suggested command should be executed."""
return input(f" ▶️ Execute '{cmd}'? [y/N]: ").strip().lower() == "y"

def main(config: Config):
bash = Bash(config)
# The model
llm = LLM(config)
# The conversation history, with the system prompt
messages = Messages(config.system_prompt)
print("[INFO] Type 'quit' at any time to exit the agent loop.\n")

# The main agent loop
while True:
# Get user message.
user = input(f"['{bash.cwd}' 🙂] ").strip()
if user.lower() == "quit":
print("\n[🤖] Shutting down. Bye!\n")
break
if not user:
continue
# Always tell the agent where the current working directory is to avoid confusions.
user += f"\n Current working directory: `{bash.cwd}`"
messages.add_user_message(user)

# The tool-call/response loop
while True:
print("\n[🤖] Thinking...")
response, tool_calls = llm.query(messages, [bash.to_json_schema()])

if response:
response = response.strip()
# Do not store the thinking part to save context space
if "</think>" in response:
response = response.split("</think>")[-1].strip()

# Add the (non-empty) response to the context
if response:
messages.add_assistant_message(response)

# Process tool calls
if tool_calls:
for tc in tool_calls:
function_name = tc.function.name
function_args = json.loads(tc.function.arguments)

# Ensure it's calling the right tool
if function_name != "exec_bash_command" or "cmd" not in function_args:
tool_call_result = json.dumps({"error": "Incorrect tool or function argument"})
else:
command = function_args["cmd"]
# Confirm execution with the user
if confirm_execution(command):
tool_call_result = bash.exec_bash_command(command)
else:
tool_call_result = {"error": "The user declined the execution of this command."}

messages.add_tool_message(tool_call_result, tc.id)
else:
# Display the assistant's message to the user.
if response:
print(response)
print("-" * 80 + "\n")
break

if __name__ == "__main__":
# Load the configuration
config = Config()
main(config)
Loading