In [1]:
%pip install --upgrade --quiet pip python-dotenv openai openai-agents

Note: you may need to restart the kernel to use updated packages.


In [2]:
from dotenv import load_dotenv
import asyncio
from typing import Any
from pydantic import BaseModel
from agents import Agent, Runner, trace, WebSearchTool, FunctionTool, function_tool, RunContextWrapper, ModelSettings
from IPython.display import Markdown

In [3]:
load_dotenv(override=True)

True

In [4]:
# making your own tools the easy way

# define a pydantic model for the function arguments
class User(BaseModel):
    name: str
    age: int


@function_tool
# docstrings and signatures will be used to automatically generate a function tool schema, so include those and make them count
def make_user_dance(user: User) -> str:
    """
    Agent can make a user dance
    
    Args:
        user: User to make dance
    Returns:
        Success message
    """
    return f"User {user.name} is {user.age} years old and is now dancing"


In [5]:
# making your own tools the hard way

def do_some_simple_tool_work(data: str) -> str:
    return "done"

class FunctionArgs(BaseModel):
    username: str
    age: int

async def run_simple_tool_work(ctx: RunContextWrapper[Any], args: str) -> str:
    parsed_args = FunctionArgs.model_validate_json(args)
    return do_some_simple_tool_work(data=f"{parsed_args.username} is {parsed_args.age} years old")

do_work_tool = FunctionTool(
    name = "process_user",
    description = "Process a user",
    params_json_schema = FunctionArgs.model_json_schema(),
    on_invoke_tool = run_simple_tool_work
)

In [6]:
simple_agent = Agent(
    name="simple_agent",
    model="gpt-4o-mini",
    instructions="""
    You are a helpful assistant that can answer questions and help with tasks.
    """,
    tools=[
        WebSearchTool(),
        do_work_tool,
        make_user_dance
    ]
)

In [7]:
run_name = "Run name example"

with trace(run_name):
    result = await Runner.run(simple_agent, "What is the current stock price of Apple? After you do that, process the user 'John' who is 30 years old. If the user is over 20, make them dance. Whats the stock price of Apple?")
    print(result.final_output)
    

The current stock price of Apple (AAPL) is $268.47. 

I've also processed the user 'John' who is 30 years old, and he is now dancing! ðŸŽ‰ If you need anything else, feel free to ask!


# Now let's model what it would look like to flesh out a more involved agent - A job hunter assistant

## This agent will have access to
- Notion table for logs of company info
- sub agents:
    - post finder agent
        - find post linkedin
        - find post glassdoor
        - find post lever
        - find post indeed
    - 

In [11]:
class Thought(BaseModel):
    thought: str

@function_tool
def save_thought(thought: Thought) -> str:
    """
    Agent can save thoughts to a note pad

    Args:
        thought: Thought to save
    Returns:
        Success message
    """
    return f"Thought saved: {thought.thought}"


class Memo(BaseModel):
    memo: str

@function_tool
def write_memo(memo: Memo) -> str:
    """
    Agent can write memos
    
    Args:
        memo: main bullet points of the memo
    Returns:
        Success message
    """
    return f"Memo written: {memo.memo}"

class CompanyInfo(BaseModel):
    company_name: str

@function_tool
def get_company_info(company_name: str) -> dict[str, Any]:
    """
    Agent can get company info
    
    Args:
        company_name: name of the company
    Returns:
        Company info as a dict
    """
    return {
        "company_name": company_name,
        "stock_price": 100
    }
    
    

In [12]:

user_query = "What has recently happened in the options and futures markets?"
with trace(f"research_agent_{i}"):
    result = await Runner.run(research_agent, user_query)
    i += 1
    display(Markdown(result.final_output))


NameError: name 'i' is not defined

In [None]:
result = await Runner.run(research_agent, user_query)
display(Markdown(result.final_output))

Recent developments in the options and futures markets have been marked by significant trading activity, product innovations, and strategic expansions by major exchanges.

**Record Trading Volumes and Open Interest**

- **Cboe Global Markets** reported an all-time high in U.S. options trading in February 2025, with an average daily trading volume (ADV) of 18.0 million contracts across its four exchanges. This surge was driven by a record 13.6 million multiply-listed options contracts and a historic 3.5 million contracts for S&P 500 Index (SPX) options. Notably, February 21 saw several records shattered, including a total U.S. options volume of 25.7 million contracts and an SPX options volume of 4.7 million contracts. ([prnewswire.com](https://www.prnewswire.com/news-releases/cboe-global-markets-reports-trading-volume-for-february-2025-302393737.html?utm_source=openai))

- **Intercontinental Exchange (ICE)** achieved a record open interest of over 100 million contracts across its global futures and options markets on February 20, 2025, marking an 11% year-over-year increase. ([s2.q4cdn.com](https://s2.q4cdn.com/154085107/files/doc_news/Open-Interest-across-ICEs-Global-Futures-and-Options-Markets-Reaches-a-Record-100-Million-Contracts-2025.pdf?utm_source=openai))

**Product Innovations and Expansions**

- **CME Group** announced plans to launch options on six E-mini Select Sector futures and Dow Jones U.S. Real Estate Index futures on October 28, 2024. These new options aim to help clients manage volatility in individual equity sectors, providing added flexibility and capital efficiencies. ([prnewswire.com](https://www.prnewswire.com/news-releases/cme-group-to-launch-options-on-e-mini-select-sector-futures-and-dow-jones-us-real-estate-index-futures-on-october-28-302265244.html?utm_source=openai))

- **CME Group** also revealed plans to offer 24/7 trading for cryptocurrency futures and options starting in early 2026, responding to growing client demand for continuous cryptocurrency risk management capabilities. ([investing.com](https://www.investing.com/news/company-news/cme-group-to-offer-247-trading-for-cryptocurrency-futures-and-options-93CH-4268931?utm_source=openai))

**Market Performance and Valuation Concerns**

- On November 5, 2025, U.S. stock index futures experienced slight declines as investor concerns over high valuations, particularly in AI-linked stocks, persisted. The Nasdaq was down 0.45%, the S&P 500 slipped 0.28%, and the Dow saw a marginal dip of 0.06%. This followed a steep 2% drop in the Nasdaq on Tuesday, its worst in nearly a month. ([reuters.com](https://www.reuters.com/business/sp-500-nasdaq-futures-slip-valuation-concerns-linger-2025-11-05/?utm_source=openai))

These developments underscore a dynamic landscape in the options and futures markets, characterized by record trading volumes, strategic product expansions, and ongoing market volatility. 

We will isolate command execution in a dedicated helper that filters inputs and streams output. The helper runs inside a sandbox folder so the rest of the project stays untouched. We keep the implementation small and well documented so future edits stay simple.

Next we will build the whitelist, argument sanitizer, and streaming runner. We will test each supported tool manually before exposing the final `run_command` helper to the agent. The tests confirm that stdout, stderr, and exit codes flow through the wrapper cleanly.


In [None]:
import subprocess
import threading
import time
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Any, Dict, List, Sequence


SANDBOX_ROOT = Path.cwd() / "command_sandbox"
# Keep the sandbox isolated from the repo root.
SANDBOX_ROOT.mkdir(parents=True, exist_ok=True)

ALLOWED_COMMANDS: Dict[str, Sequence[str]] = {
    "python": ["python"],
    "git": ["git"],
    "pip": ["pip"],
    "python3": ["python"],
}
# Keep the list small so we limit the attack surface.

BLOCKED_TOKENS: Sequence[str] = (";", "&&", "||", "|", ">", "<", "`", "\n")
# Block shell operators that could chain extra work.

DEFAULT_TIMEOUT_SECONDS = 60.0
# A timeout keeps long running commands under control.


class CommandNotAllowedError(ValueError):
    """Raised when the requested command is not on the whitelist."""


class UnsafeArgumentError(ValueError):
    """Raised when we detect unsafe characters in the arguments."""


@dataclass(slots=True)
class CommandResult:
    """Structured result for the agent.

    We keep stdout and stderr lines so the agent can inspect them later.
    The summary captures the final state in one short sentence.
    """

    command: str
    arguments: List[str]
    exit_code: int
    duration_seconds: float
    stdout: List[str]
    stderr: List[str]
    summary: str
    timed_out: bool


def _ensure_allowed(command: str, arguments: Sequence[str] | None) -> tuple[List[str], List[str]]:
    """Validate the command name and clean the arguments.

    Returns the safe command list and cleaned arguments list.
    """

    command_key = command.strip()
    if command_key not in ALLOWED_COMMANDS:
        raise CommandNotAllowedError(f"Command '{command_key}' is not whitelisted.")

    safe_args: List[str] = []
    for raw_arg in arguments or []:
        if not isinstance(raw_arg, str):
            raise UnsafeArgumentError("Arguments must be strings.")
        candidate = raw_arg.strip()
        for token in BLOCKED_TOKENS:
            if token and token in candidate:
                raise UnsafeArgumentError(f"Unsafe token '{token}' detected in argument '{candidate}'.")
        safe_args.append(candidate)
    return list(ALLOWED_COMMANDS[command_key]), safe_args


def _drain_pipe(pipe, buffer: List[str], label: str, stream: bool) -> None:
    """Read lines from a pipe and optionally echo them live."""

    try:
        for line in iter(pipe.readline, ""):
            text = line.rstrip("\n")
            buffer.append(text)
            if stream:
                print(f"[{label}] {text}")
    finally:
        pipe.close()


def _summarize(command: str, exit_code: int, stdout: List[str], stderr: List[str], timed_out: bool) -> str:
    """Craft a short plain-English summary for the agent."""

    status = "timed out" if timed_out else f"exit {exit_code}"
    tail = stdout[-1] if stdout else stderr[-1] if stderr else "no output"
    return f"Command '{command}' finished with {status}. Last line: {tail}."


def run_command(command: str, arguments: Sequence[str] | None = None, *, timeout: float | None = DEFAULT_TIMEOUT_SECONDS, stream: bool = True) -> CommandResult:
    """Run a whitelisted command inside the sandbox with live streaming.

    We sanitize every argument, mirror stdout and stderr, and return structured data.
    A timeout stops runaway commands and marks the result as timed out.
    """

    safe_command, safe_args = _ensure_allowed(command, arguments)
    stdout_buffer: List[str] = []
    stderr_buffer: List[str] = []

    start = time.monotonic()
    process = subprocess.Popen(
        [*safe_command, *safe_args],
        cwd=SANDBOX_ROOT,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
        bufsize=1,
    )

    threads = [
        threading.Thread(target=_drain_pipe, args=(process.stdout, stdout_buffer, "stdout", stream), daemon=True),
        threading.Thread(target=_drain_pipe, args=(process.stderr, stderr_buffer, "stderr", stream), daemon=True),
    ]
    for thread in threads:
        thread.start()

    timed_out = False
    try:
        process.wait(timeout=timeout)
    except subprocess.TimeoutExpired:
        timed_out = True
        process.kill()
    finally:
        for thread in threads:
            thread.join()

    exit_code = process.returncode if process.returncode is not None else -1
    duration = time.monotonic() - start
    summary = _summarize(command, exit_code, stdout_buffer, stderr_buffer, timed_out)

    return CommandResult(
        command=command,
        arguments=list(safe_args),
        exit_code=exit_code,
        duration_seconds=duration,
        stdout=stdout_buffer,
        stderr=stderr_buffer,
        summary=summary,
        timed_out=timed_out,
    )


def run_command_tool(command: str, arguments: Sequence[str] | None = None, *, timeout: float | None = DEFAULT_TIMEOUT_SECONDS, stream: bool = True) -> Dict[str, Any]:
    """Public helper that returns plain dictionaries for agent integrations.

    It wraps `run_command` and converts the dataclass result into JSON-friendly data.
    """

    result = run_command(command, arguments, timeout=timeout, stream=stream)
    return asdict(result)



In [None]:
# Smoke test every whitelisted command before wiring the helper into an agent.
test_matrix = {
    "python": ["--version"],
    "git": ["--version"],
    "pip": ["--version"],
}

manual_test_results: Dict[str, CommandResult] = {}
for cmd, args in test_matrix.items():
    print(f"\nRunning manual test for '{cmd} { ' '.join(args) }'")
    result = run_command(cmd, args)
    manual_test_results[cmd] = result
    print(result.summary)




Running manual test for 'python --version'
[stdout] Python 3.13.1
Command 'python' finished with exit 0. Last line: Python 3.13.1.

Running manual test for 'git --version'
[stdout] git version 2.49.0.windows.1
Command 'git' finished with exit 0. Last line: git version 2.49.0.windows.1.

Running manual test for 'pip --version'
[stdout] pip 25.3 from C:\Users\merri\Documents\Coding\github\simple-agent\venv\Lib\site-packages\pip (python 3.13)
[stdout] 
Command 'pip' finished with exit 0. Last line: .


In [13]:
# Example of the tool interface returning structured data without streaming.
example_result = run_command_tool("python", ["-c", "print('sandbox ok')"], stream=False)

example_result


{'command': 'python',
 'arguments': ['-c', "print('sandbox ok')"],
 'exit_code': 0,
 'duration_seconds': 0.01714050001464784,
 'stdout': ['sandbox ok'],
 'stderr': [],
 'summary': "Command 'python' finished with exit 0. Last line: sandbox ok.",
 'timed_out': False}

In [14]:
class SandboxCommandRequest(BaseModel):
    """Payload for the `sandbox_run_command` tool."""

    command: str
    arguments: List[str] = []
    stream: bool = False
    timeout_seconds: float | None = DEFAULT_TIMEOUT_SECONDS


@function_tool
def sandbox_run_command(request: SandboxCommandRequest) -> Dict[str, Any]:
    """Run a sandboxed command from the whitelist defined in `ALLOWED_COMMANDS`.

    The agent must supply the command key and optional arguments. Unsafe tokens raise errors.
    Returns stdout, stderr, and exit data in a JSON-friendly structure.
    """

    return run_command_tool(
        request.command,
        request.arguments,
        timeout=request.timeout_seconds,
        stream=request.stream,
    )


Next we give the agent a write-only tool for Python modules. The helper keeps writes inside the sandbox and enforces `.py` suffixes so command execution and file authoring stay compartmentalized.


In [18]:
class PythonFileRequest(BaseModel):
    """Payload for the `write_python_file` tool."""

    path: str
    content: str


def _validate_python_path(path_str: str) -> Path:
    """Ensure paths stay inside the sandbox and end with `.py`."""

    candidate = (SANDBOX_ROOT / path_str).resolve()
    try:
        candidate.relative_to(SANDBOX_ROOT)
    except ValueError as exc:
        raise ValueError("Python files must remain inside the sandbox root.") from exc
    if candidate.suffix != ".py":
        raise ValueError("Only .py files are allowed.")
    candidate.parent.mkdir(parents=True, exist_ok=True)
    return candidate


@function_tool
def write_python_file(request: PythonFileRequest) -> Dict[str, Any]:
    """Write a Python module into the sandbox.

    Refuses to leave `SANDBOX_ROOT` and rejects non-`.py` suffixes. Returns file stats for logging.
    """

    target_path = _validate_python_path(request.path)
    target_path.write_text(request.content, encoding="utf-8")
    relative_path = target_path.relative_to(SANDBOX_ROOT)
    return {
        "path": str(relative_path),
        "bytes_written": len(request.content.encode("utf-8")),
    }


In [19]:
harder_agent = Agent(
    name="Command Runner",
    model="gpt-4o-mini",
    instructions="""
    You can execute only the commands listed in ALLOWED_COMMANDS. 
    Use `sandbox_run_command` for shell work and `write_python_file` for module edits.
    Inspect the summary and avoid rerunning successful commands unless you have to.
    """,
    tools=[
        sandbox_run_command,
        write_python_file,
    ],
    model_settings=ModelSettings(
        tool_choice="required"
    )
)

In [20]:
with trace("command_runner_write_test"):
    result = await Runner.run(
        harder_agent,
        "Create a Python file called demo/util.py that prints 'hello from sandbox' and confirm python and pip versions.",
    )

In [21]:
print(result.final_output)

I successfully created the Python file `demo/util.py` with the following content:

```python
print('hello from sandbox')
```

I also checked the versions of `pip`, and here are the results:

- **pip version:** 25.3 (Python 3.13)

However, I encountered an issue when trying to check the Python version, as the command is not whitelisted.

Let me know if you need anything else!
