diff --git a/.github/workflows/sync-docs-code-blocks.yml b/.github/workflows/sync-docs-code-blocks.yml index 5146d0af..0d345528 100644 --- a/.github/workflows/sync-docs-code-blocks.yml +++ b/.github/workflows/sync-docs-code-blocks.yml @@ -38,14 +38,30 @@ jobs: with: python-version: '3.11' + - name: Install dependencies for API docs + run: | + python -m pip install --upgrade pip + pip install sphinx sphinx-markdown-builder myst-parser + - name: Sync code blocks - id: detect_changes env: AGENT_SDK_PATH: ${{ github.workspace }}/agent-sdk shell: bash run: | set -euo pipefail python .github/scripts/sync_code_blocks.py + + - name: Generate API documentation + shell: bash + run: | + set -euo pipefail + python scripts/generate-api-docs.py + + - name: Check for changes + id: detect_changes + shell: bash + run: | + set -euo pipefail if [[ -n "$(git status --porcelain)" ]]; then echo "changes=true" >> "$GITHUB_OUTPUT" else @@ -57,26 +73,25 @@ jobs: uses: peter-evans/create-pull-request@v7 with: commit-message: | - docs: sync code blocks from agent-sdk examples - + docs: sync code blocks and generate API reference + Synced from agent-sdk ref: ${{ github.event.inputs.agent_sdk_ref || 'main' }} - branch: sync-docs-code-blocks + branch: sync-docs-and-api branch-suffix: timestamp delete-branch: true - title: "docs: sync code blocks from agent-sdk examples" + title: "docs: sync code blocks and generate API reference" body: | ## Summary of changes - This PR automatically syncs code blocks in documentation with their corresponding source files from the agent-sdk repository. + This PR automatically syncs code blocks in documentation with their corresponding source files from the agent-sdk repository, and generates API reference documentation. **Agent SDK Reference**: `${{ github.event.inputs.agent_sdk_ref || 'main' }}` ### Changes Made - Updated code blocks in MDX files to match the current state of example files in agent-sdk + - Generated API reference markdown files - This is an automated sync performed by the `sync-docs-code-blocks` workflow ### Checklist - [x] I have read and reviewed the documentation changes to the best of my ability. - [x] If the change is significant, I have run the documentation site locally and confirmed it renders as expected. - - **Note**: This is an automated pull request. Please review the changes to ensure they are correct before merging. diff --git a/.gitignore b/.gitignore index 7eb46d79..e6dba0d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,34 @@ # Local checkout of agent-sdk for docs workflows and local testing agent-sdk/ +# Sphinx build artifacts +scripts/sphinx/build/ +scripts/sphinx/source/*.rst +!scripts/sphinx/source/index.rst +!scripts/sphinx/source/conf.py + +# Python cache +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +*.so + +# Virtual environments +venv/ +env/ +.env + +# IDE files +.vscode/ +.idea/ +*.swp +*.swo + # OS junk .DS_Store Thumbs.db + +# Keep generated API reference docs (these should be committed) +# api-reference/ - This directory should be committed diff --git a/docs.json b/docs.json index 810cfe10..0f3bd601 100644 --- a/docs.json +++ b/docs.json @@ -140,7 +140,9 @@ }, { "group": "Tips and Tricks", - "pages": ["openhands/usage/tips/prompting-best-practices"] + "pages": [ + "openhands/usage/tips/prompting-best-practices" + ] }, { "group": "Troubleshooting & Feedback", @@ -258,6 +260,19 @@ ] } ] + }, + { + "group": "API Reference", + "pages": [ + "sdk/api-reference/openhands.sdk.agent", + "sdk/api-reference/openhands.sdk.conversation", + "sdk/api-reference/openhands.sdk.event", + "sdk/api-reference/openhands.sdk.llm", + "sdk/api-reference/openhands.sdk.security", + "sdk/api-reference/openhands.sdk.tool", + "sdk/api-reference/openhands.sdk.utils", + "sdk/api-reference/openhands.sdk.workspace" + ] } ] }, @@ -267,7 +282,9 @@ }, { "tab": "Success Stories", - "pages": ["success-stories/index"] + "pages": [ + "success-stories/index" + ] } ], "global": { @@ -308,7 +325,7 @@ } }, "banner": { - "content": "📢 **GitHub Org Rename:** All-Hands-AI to OpenHands on Monday Oct 20th at 18:00 UTC. [Migration details →](https://github.com/OpenHands/OpenHands/issues/11376)", + "content": "\ud83d\udce2 **GitHub Org Rename:** All-Hands-AI to OpenHands on Monday Oct 20th at 18:00 UTC. [Migration details \u2192](https://github.com/OpenHands/OpenHands/issues/11376)", "dismissible": true }, "head": [ @@ -320,11 +337,22 @@ } ], "contextual": { - "options": ["copy", "view", "chatgpt", "claude"] + "options": [ + "copy", + "view", + "chatgpt", + "claude" + ] }, "redirects": [ - { "source": "/modules/:slug*", "destination": "/:slug*" }, - { "source": "/usage/:slug*", "destination": "/openhands/usage/:slug*" }, + { + "source": "/modules/:slug*", + "destination": "/:slug*" + }, + { + "source": "/usage/:slug*", + "destination": "/openhands/usage/:slug*" + }, { "source": "/openhands/usage/configuration-options", "destination": "/openhands/usage/advanced/configuration-options" @@ -414,4 +442,4 @@ "destination": "/openhands/usage/troubleshooting/feedback" } ] -} +} \ No newline at end of file diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 00000000..0094bf38 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,326 @@ +# API Documentation Generation Pipeline + +This directory contains the automated pipeline for generating API reference documentation from the [OpenHands software-agent-sdk](https://github.com/OpenHands/software-agent-sdk) repository. + +## Overview + +The pipeline uses Sphinx with the `sphinx-markdown-builder` extension to generate clean, parser-friendly Markdown files from Python docstrings. The output is specifically designed to avoid JavaScript parsing errors in Mintlify by using simple headers and clean formatting. + +### Key Features + +- **Simple headers**: Just class names (`### ClassName`) and method names (`#### method_name`) +- **No complex signatures**: Parameters documented as readable text, not in headers +- **Parser-friendly**: Eliminates asterisks, emphasis, and patterns that cause acorn parsing errors +- **Organized structure**: 9 module-level pages instead of 100+ individual files + +## Files Structure + +``` +scripts/ +├── README.md # This file +├── generate-api-docs.py # Main generation script +├── mint-config-snippet.json # Generated Mintlify config snippet +└── sphinx/ + └── source/ + ├── conf.py # Sphinx configuration + └── index.rst # Main documentation index +``` + +## Prerequisites + +### Required Python Packages + +Install the required dependencies: + +```bash +pip install sphinx sphinx-markdown-builder myst-parser +``` + +### System Requirements + +- Python 3.8+ + +## Usage + +### Basic Usage + +Generate API documentation with default settings: + +```bash +cd docs +python scripts/generate-api-docs.py +``` + +### Advanced Usage + +```bash +# Clean previous build and regenerate everything +python scripts/generate-api-docs.py --clean + +# Enable verbose output for debugging +python scripts/generate-api-docs.py --verbose + +# Combine options +python scripts/generate-api-docs.py --clean --verbose +``` + +### Command Line Options + +- `--clean`: Remove all previous build artifacts and generated documentation before starting +- `--verbose`, `-v`: Enable detailed logging output for debugging + +## How It Works + +The generation pipeline follows these steps: + +1. **Dependency Check**: Verifies that required Python packages are installed +2. **Repository Management**: Clones or updates the `software-agent-sdk` repository +3. **Sphinx Setup**: Creates necessary Sphinx directories and configuration +4. **RST Generation**: Uses `sphinx-apidoc` to generate RST files from Python source +5. **Markdown Build**: Runs Sphinx with the markdown builder to generate clean Markdown +6. **Content Organization**: Processes and organizes the generated Markdown files +7. **Mintlify Integration**: Creates configuration snippets for easy integration +8. **Cleanup**: Removes build artifacts while preserving generated documentation + +## Output + +The script generates the following: + +### Generated Documentation + +- **`sdk/api-reference/`**: Directory containing all generated API documentation + - `openhands.sdk.mdx`: Main SDK module documentation + - `openhands.sdk.agent.mdx`: Agent system documentation + - `openhands.sdk.conversation.mdx`: Conversation management documentation + - `openhands.sdk.event.mdx`: Event system documentation + - `openhands.sdk.llm.mdx`: LLM integration documentation + - `openhands.sdk.security.mdx`: Security features documentation + - `openhands.sdk.tool.mdx`: Tool system documentation + - `openhands.sdk.utils.mdx`: Utilities documentation + - `openhands.sdk.workspace.mdx`: Workspace management documentation + +### Configuration Files + +- **`scripts/mint-config-snippet.json`**: Ready-to-use configuration snippet for `docs.json` + +## Integration with Mintlify + +### Automatic Integration + +The generated `mint-config-snippet.json` contains the navigation structure for the API reference: + +```json +{ + "group": "API Reference", + "pages": [ + "sdk/api-reference/index", + "sdk/api-reference/sdk.agent", + "sdk/api-reference/sdk.conversation", + ... + ] +} +``` + +### Manual Integration + +To integrate the API reference into your `docs.json`: + +1. Run the generation script +2. Copy the contents of `scripts/mint-config-snippet.json` +3. Add it to the appropriate section in your `docs.json` navigation + +Example integration in `docs.json`: + +```json +{ + "navigation": { + "tabs": [ + { + "tab": "SDK", + "pages": [ + "sdk/index", + "sdk/getting-started", + { + "group": "Guides", + "pages": ["..."] + }, + { + "group": "API Reference", + "pages": [ + "api-reference/index", + "api-reference/openhands.agent", + "api-reference/openhands.conversation" + ] + } + ] + } + ] + } +} +``` + +## Customization + +### Sphinx Configuration + +Modify `scripts/sphinx/source/conf.py` to customize: + +- **Extensions**: Add or remove Sphinx extensions +- **Autodoc Options**: Control what gets documented +- **Napoleon Settings**: Configure docstring parsing +- **Markdown Output**: Adjust markdown generation settings + +### Content Processing + +The script includes content processing functions that can be customized: + +- `clean_markdown_file()`: Modify how individual files are processed +- `create_api_index()`: Customize the main index page +- `organize_output_docs()`: Change how files are organized + +### Module Selection + +To document specific modules only, modify the `generate_rst_files()` method in the script to include/exclude specific paths. + +## Troubleshooting + +### Common Issues + +1. **Missing Dependencies** + ``` + Error: Missing required packages: sphinx, sphinx_markdown_builder, myst_parser + ``` + **Solution**: Install the required packages with pip + +2. **SDK Repository Not Found** + ``` + Error: openhands-sdk directory not found + ``` + **Solution**: Ensure the SDK repository is properly cloned and contains the expected structure + +3. **Permission Errors** + ``` + Error: Permission denied when writing files + ``` + **Solution**: Check file permissions and ensure the script has write access to the docs directory + +### Debug Mode + +Use the `--verbose` flag to get detailed logging: + +```bash +python scripts/generate-api-docs.py --verbose +``` + +This will show: +- Command execution details +- File processing steps +- Sphinx build output +- Error stack traces + +### Manual Cleanup + +If the script fails partway through, you can manually clean up: + +```bash +# Remove build artifacts +rm -rf scripts/sphinx/build/ +rm -rf scripts/sphinx/source/openhands*.rst + +# Remove generated docs (if needed) +rm -rf api-reference/ + +# Remove cloned repository (if needed) +rm -rf agent-sdk/ +``` + +## Automation + +### GitHub Actions Workflow + +The repository includes an automated workflow (`.github/workflows/sync-docs-and-api.yml`) that: + +- **Runs daily at 2 AM UTC** to keep documentation current +- **Can be triggered manually** with custom options +- **Syncs both code blocks and API documentation** from the agent-sdk repository + +#### Manual Trigger Options + +You can manually trigger the workflow with these options: + +- **`agent_sdk_ref`**: Specify which branch/tag/commit to sync from (default: `main`) +- **`sync_code_blocks`**: Enable/disable code block synchronization (default: `true`) +- **`generate_api_docs`**: Enable/disable API documentation generation (default: `true`) + +#### Workflow Features + +- **Automatic dependency installation**: Installs Sphinx and required packages +- **Conditional execution**: Skip code sync or API generation as needed +- **Smart commit messages**: Describes exactly what was updated +- **Error handling**: Fails gracefully with detailed error messages + +### CI/CD Integration + +For custom CI/CD setups, the script is designed to be idempotent and safe: + +```yaml +# Example GitHub Actions step +- name: Generate API Documentation + run: | + cd docs + pip install sphinx sphinx-markdown-builder myst-parser + python scripts/generate-api-docs.py --clean +``` + +### Manual Scheduled Updates + +If you prefer custom scheduling, you can set up your own workflow: + +```yaml +# Example custom workflow +name: Update API Docs +on: + schedule: + - cron: '0 6 * * 1' # Weekly on Monday at 6 AM + workflow_dispatch: + +jobs: + update-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install dependencies + run: pip install sphinx sphinx-markdown-builder myst-parser + - name: Generate documentation + run: | + cd docs + python scripts/generate-api-docs.py --clean + - name: Commit changes + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git add sdk/api-reference/ + git diff --staged --quiet || git commit -m "Update API documentation" + git push +``` + +## Contributing + +When modifying the generation pipeline: + +1. Test changes locally with `--verbose` flag +2. Verify generated Markdown renders correctly in Mintlify +3. Check that all module documentation is complete +4. Update this README if adding new features or changing behavior + +## Support + +For issues with the documentation generation pipeline: + +1. Check the troubleshooting section above +2. Run with `--verbose` to get detailed error information +3. Open an issue in the OpenHands/docs repository with the full error output \ No newline at end of file diff --git a/scripts/generate-api-docs.py b/scripts/generate-api-docs.py new file mode 100755 index 00000000..a913b453 --- /dev/null +++ b/scripts/generate-api-docs.py @@ -0,0 +1,785 @@ +#!/usr/bin/env python3 +""" +Simple API documentation generator for OpenHands SDK. + +This script generates clean, parser-friendly markdown documentation +by extracting docstrings and presenting them in a simple format. +""" + +import os +import re +import json +import shutil +import logging +import subprocess +from pathlib import Path +from typing import Dict, List, Any + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +class SimpleAPIDocGenerator: + def __init__(self, docs_dir: Path): + self.docs_dir = docs_dir + self.agent_sdk_dir = docs_dir / "agent-sdk" + self.output_dir = docs_dir / "sdk" / "api-reference" + self.sphinx_dir = docs_dir / "scripts" / "sphinx" + + def run(self): + """Main execution method.""" + logger.info("Starting simple API documentation generation...") + + # Step 1: Setup agent-sdk repository + self.setup_agent_sdk() + + # Step 2: Fix MDX syntax issues in agent-sdk files + self.fix_agent_sdk_mdx_syntax() + + # Step 3: Install the SDK + self.install_sdk() + + # Step 4: Generate documentation using Sphinx + self.generate_sphinx_docs() + + # Step 5: Clean and simplify the generated markdown + self.clean_generated_docs() + + # Step 6: Update navigation + self.update_navigation() + + logger.info("API documentation generation completed successfully!") + + def setup_agent_sdk(self): + """Clone or update the agent-sdk repository.""" + if self.agent_sdk_dir.exists(): + logger.info("Updating existing agent-sdk repository...") + self.run_command(["git", "fetch", "origin"], cwd=self.agent_sdk_dir) + self.run_command(["git", "reset", "--hard", "origin/main"], cwd=self.agent_sdk_dir) + else: + logger.info("Cloning agent-sdk repository...") + self.run_command([ + "git", "clone", + "https://github.com/OpenHands/software-agent-sdk.git", + str(self.agent_sdk_dir) + ]) + + def install_sdk(self): + """Install the SDK package.""" + logger.info("Installing openhands-sdk package...") + sdk_path = self.agent_sdk_dir / "openhands-sdk" + self.run_command([ + "python", "-m", "pip", "install", "-e", str(sdk_path) + ]) + + def fix_agent_sdk_mdx_syntax(self): + """Fix MDX syntax issues in agent-sdk files to prevent Mintlify parsing errors.""" + logger.info("Fixing MDX syntax issues in agent-sdk files...") + + # Fix email addresses in repo.md + repo_md = self.agent_sdk_dir / ".openhands" / "microagents" / "repo.md" + if repo_md.exists(): + content = repo_md.read_text() + # Fix unescaped @ symbols in email addresses + content = re.sub(r'<([^<>]*@[^<>]*)>', r'<\1>', content) + repo_md.write_text(content) + + # Fix README.md + readme_md = self.agent_sdk_dir / "README.md" + if readme_md.exists(): + content = readme_md.read_text() + # Convert HTML comments to JSX format + content = re.sub(r'', r'{/* \1 */}', content, flags=re.DOTALL) + # Fix self-closing tags + content = re.sub(r'<(img|br|hr)([^>]*?)(?', r'<\1\2 />', content) + readme_md.write_text(content) + + def generate_sphinx_docs(self): + """Generate documentation using Sphinx.""" + logger.info("Generating documentation with Sphinx...") + + # Create Sphinx configuration + self.create_sphinx_config() + + # Generate RST files + self.create_rst_files() + + # Build documentation + self.build_sphinx_docs() + + def create_sphinx_config(self): + """Create a simple Sphinx configuration.""" + sphinx_source = self.sphinx_dir / "source" + sphinx_source.mkdir(parents=True, exist_ok=True) + + conf_py = sphinx_source / "conf.py" + conf_py.write_text(''' +import os +import sys +sys.path.insert(0, os.path.abspath('../../../agent-sdk/openhands-sdk')) + +project = 'OpenHands SDK' +copyright = '2024, OpenHands' +author = 'OpenHands' + +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.napoleon', + 'sphinx_markdown_builder', +] + +autodoc_default_options = { + 'members': True, + 'undoc-members': True, + 'show-inheritance': True, + 'special-members': '__init__', +} + +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = False +napoleon_include_private_with_doc = False + +html_theme = 'sphinx_rtd_theme' +''') + + def create_rst_files(self): + """Create RST files for the main SDK modules.""" + sphinx_source = self.sphinx_dir / "source" + + # Main index file + index_rst = sphinx_source / "index.rst" + index_rst.write_text(''' +OpenHands SDK API Reference +=========================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + openhands.sdk + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` +''') + + # Main SDK module + sdk_rst = sphinx_source / "openhands.sdk.rst" + sdk_rst.write_text(''' +openhands.sdk package +===================== + +.. automodule:: openhands.sdk + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + :maxdepth: 1 + + openhands.sdk.agent + openhands.sdk.conversation + openhands.sdk.event + openhands.sdk.llm + openhands.sdk.tool + openhands.sdk.workspace + openhands.sdk.security + openhands.sdk.utils +''') + + # Generate RST files for each major module + modules = [ + 'agent', 'conversation', 'event', 'llm', + 'tool', 'workspace', 'security', 'utils' + ] + + for module in modules: + module_rst = sphinx_source / f"openhands.sdk.{module}.rst" + module_rst.write_text(f''' +openhands.sdk.{module} module +{'=' * (len(f'openhands.sdk.{module} module'))} + +.. automodule:: openhands.sdk.{module} + :members: + :undoc-members: + :show-inheritance: +''') + + def build_sphinx_docs(self): + """Build the Sphinx documentation.""" + build_dir = self.sphinx_dir / "build" + source_dir = self.sphinx_dir / "source" + + # Clean previous build + if build_dir.exists(): + shutil.rmtree(build_dir) + + # Build markdown documentation + self.run_command([ + "sphinx-build", "-b", "markdown", + str(source_dir), str(build_dir) + ]) + + def clean_generated_docs(self): + """Clean and simplify the generated markdown files.""" + logger.info("Cleaning generated documentation...") + + build_dir = self.sphinx_dir / "build" + + # Remove old output directory + if self.output_dir.exists(): + shutil.rmtree(self.output_dir) + self.output_dir.mkdir(parents=True, exist_ok=True) + + # Process each markdown file + for md_file in build_dir.glob("*.md"): + if md_file.name == "index.md": + continue + + # Skip the top-level openhands.sdk.md file as it duplicates content + if md_file.name == "openhands.sdk.md": + logger.info(f"Skipping {md_file.name} (top-level duplicate)") + continue + + logger.info(f"Processing {md_file.name}") + content = md_file.read_text() + + # Clean the content + cleaned_content = self.clean_markdown_content(content, md_file.name) + + # Write to output directory with .mdx extension + output_filename = md_file.name.replace('.md', '.mdx') + output_file = self.output_dir / output_filename + output_file.write_text(cleaned_content) + + def clean_multiline_dictionaries(self, content: str) -> str: + """Clean multi-line dictionary patterns that cause parsing issues.""" + import re + + # Handle the specific problematic pattern that keeps appearing + # Pattern: For example: {"Reasoning:": "bold blue",\n "Thought:": "bold green"} + pattern1 = r'For example: \{"[^"]*":\s*"[^"]*",\s*\n\s*"[^"]*":\s*"[^"]*"\}' + content = re.sub(pattern1, 'For example: (configuration dictionary)', content, flags=re.DOTALL) + + # More general multi-line dictionary patterns + pattern2 = r'\{"[^"]*":\s*"[^"]*",\s*\n\s*"[^"]*":\s*"[^"]*"\}' + content = re.sub(pattern2, '(configuration dictionary)', content, flags=re.DOTALL) + + # Handle any remaining multi-line patterns with curly braces + pattern3 = r'\{[^{}]*\n[^{}]*\}' + content = re.sub(pattern3, '(configuration object)', content, flags=re.DOTALL) + + return content + + def fix_header_hierarchy(self, content: str) -> str: + """Fix header hierarchy to ensure proper nesting under class headers.""" + import re + + lines = content.split('\n') + result_lines = [] + in_class_section = False + + for line in lines: + # Check if we're entering a class section + if re.match(r'^### class ', line): + in_class_section = True + result_lines.append(line) + # Check if we're leaving a class section (another class or module header) + elif line.startswith('### ') and not line.startswith('### class '): + # This is a non-class h3 header within a class section - convert to h4 + if in_class_section: + line = '#' + line # Convert ### to #### + result_lines.append(line) + # Check if we hit another class or end of content + elif re.match(r'^### class ', line) or line.startswith('# '): + in_class_section = line.startswith('### class ') + result_lines.append(line) + else: + result_lines.append(line) + + return '\n'.join(result_lines) + + def reorganize_class_content(self, content: str) -> str: + """Reorganize class content to separate properties from methods.""" + import re + + lines = content.split('\n') + result_lines = [] + i = 0 + + while i < len(lines): + line = lines[i] + + # Check if this is a class header + if re.match(r'^### \*class\*', line): + # Process this class + class_lines, i = self.process_class_section(lines, i) + result_lines.extend(class_lines) + else: + result_lines.append(line) + i += 1 + + return '\n'.join(result_lines) + + def process_class_section(self, lines: list[str], start_idx: int) -> tuple[list[str], int]: + """Process a single class section, separating properties from methods.""" + import re + + result = [] + i = start_idx + + # Add the class header and description (including any ### Example sections) + while i < len(lines): + line = lines[i] + # Stop when we hit the first #### (class member) or another class + if line.startswith('####') or (line.startswith('### *class*') and i > start_idx): + break + # Fix Example headers to be h4 instead of h3 + if line.startswith('### ') and not line.startswith('### *class*'): + line = '#' + line # Convert ### to #### + result.append(line) + i += 1 + + # Collect all class members + properties = [] + methods = [] + + while i < len(lines): + line = lines[i] + + # Stop if we hit another class or module (but not ### Example sections) + if line.startswith('### *class*'): + break + + if line.startswith('####'): + # Determine if this is a property or method + member_lines, i = self.extract_member_section(lines, i) + + if self.is_property(member_lines[0]): + properties.extend(member_lines) + else: + methods.extend(member_lines) + else: + i += 1 + + # Add properties section if we have any + if properties: + result.append('') + result.append('#### Properties') + result.append('') + + # Convert property headers to list items + for prop_line in properties: + if prop_line.startswith('####'): + # Extract property name and type + prop_match = re.match(r'^####\s*([^*:]+)\s*\*?:?\s*(.*)$', prop_line) + if prop_match: + prop_name = prop_match.group(1).strip() + prop_type = prop_match.group(2).strip() + # Clean up the type annotation + prop_type = re.sub(r'^\*\s*', '', prop_type) # Remove leading * + prop_type = re.sub(r'\s*\*$', '', prop_type) # Remove trailing * + if prop_type: + result.append(f'- `{prop_name}`: {prop_type}') + else: + result.append(f'- `{prop_name}`') + elif prop_line.strip() and not prop_line.startswith('####'): + # Add description lines indented + result.append(f' {prop_line}') + + # Add methods section if we have any + if methods: + if properties: # Add spacing if we had properties + result.append('') + result.append('#### Methods') + result.append('') + result.extend(methods) + + return result, i + + def extract_member_section(self, lines: list[str], start_idx: int) -> tuple[list[str], int]: + """Extract all lines belonging to a single class member.""" + result = [] + i = start_idx + + # Add the header line + result.append(lines[i]) + i += 1 + + # Add all following lines until we hit another header or class + while i < len(lines): + line = lines[i] + if line.startswith('####') or line.startswith('###'): + break + result.append(line) + i += 1 + + return result, i + + def is_property(self, header_line: str) -> bool: + """Determine if a class member is a property or method.""" + import re + + # Properties typically have type annotations with *: type* pattern + if re.search(r'\*:\s*[^*]+\*', header_line): + return True + + # Methods have parentheses + if '(' in header_line and ')' in header_line: + return False + + # Properties often have : followed by type info + if ':' in header_line and not '(' in header_line: + return True + + # Default to method if unclear + return False + + def clean_markdown_content(self, content: str, filename: str) -> str: + """Clean markdown content to be parser-friendly.""" + # First handle multi-line dictionary patterns + content = self.clean_multiline_dictionaries(content) + + # Reorganize class content to separate properties from methods + content = self.reorganize_class_content(content) + + # Fix header hierarchy (Example sections should be h4 under class headers) + content = self.fix_header_hierarchy(content) + + lines = content.split('\n') + cleaned_lines = [] + + for line in lines: + # Skip empty lines and sphinx-specific content + if not line.strip(): + cleaned_lines.append(line) + continue + + # Clean headers - remove complex signatures, keep just names + if line.startswith('#'): + line = self.clean_header(line) + + # Skip module headers that duplicate the title + if line.startswith('# ') and ' module' in line: + continue + + # Remove problematic patterns + line = self.remove_problematic_patterns(line) + + cleaned_lines.append(line) + + # Add frontmatter + module_name = filename.replace('.md', '') + frontmatter = f'''--- +title: {module_name} +description: API reference for {module_name} module +--- + +''' + + return frontmatter + '\n'.join(cleaned_lines) + + def clean_header(self, line: str) -> str: + """Clean header lines to contain only class/method names.""" + # Extract just the class or method name from complex signatures + + # Pattern for class headers: "### *class* ClassName(...)" or "### class ClassName(...)" + class_match = re.match(r'^(#+)\s*\*?class\*?\s+([^(]+)', line) + if class_match: + level, class_name = class_match.groups() + # Extract just the class name (last part after the last dot) for readability + simple_class_name = class_name.strip().split('.')[-1] + return f"{level} class {simple_class_name}" + + # Pattern for method headers: "#### method_name(...)" + method_match = re.match(r'^(#+)\s*([^(]+)\(', line) + if method_match: + level, method_name = method_match.groups() + # Clean up the method name + method_name = method_name.strip().split('.')[-1] # Get just the method name + # Remove any decorators or prefixes + method_name = re.sub(r'^(static|class|abstract|property)\s+', '', method_name) + return f"{level} {method_name}()" + + # Pattern for property headers: "#### property property_name" + prop_match = re.match(r'^(#+)\s*property\s+([^:]+)', line) + if prop_match: + level, prop_name = prop_match.groups() + prop_name = prop_name.strip() + return f"{level} {prop_name}" + + # For other headers, just clean up basic formatting + line = re.sub(r'\*([^*]+)\*', r'\1', line) # Remove emphasis + return line + + def remove_problematic_patterns(self, line: str) -> str: + """Remove patterns that cause parsing issues.""" + # Remove all emphasis and bold formatting + line = re.sub(r'\*\*([^*]+)\*\*', r'\1', line) # Remove bold + line = re.sub(r'\*([^*]+)\*', r'\1', line) # Remove emphasis + + # Fix HTML-like tags (only actual HTML tags, not all < > characters) + # Only replace if it looks like an HTML tag: or + line = re.sub(r'<(/?\w+[^>]*)>', r'`<\1>`', line) + + # Fix Sphinx-generated blockquote markers that should be list continuations + if line.startswith('> ') and not line.startswith('> **'): + # This is likely a continuation of a bullet point, not a blockquote + line = ' ' + line[2:] # Replace '> ' with proper indentation + + # Remove escaped characters that cause issues + line = line.replace('\\*', '*') + line = line.replace('\\', '') + + # Fix dictionary/object literals that cause parsing issues + # Pattern: = {'key': 'value', 'key2': 'value2'} or = {} + if ' = {' in line and '}' in line: + # Replace with a simple description + line = re.sub(r' = \{[^}]*\}', ' = (configuration object)', line) + + # Fix JSON-like patterns that cause parsing issues + # Pattern: { "type": "function", "name": …, "description": …, "parameters": … } + if line.strip().startswith('{') and line.strip().endswith('}'): + # Replace with a simple description + line = '(JSON configuration object)' + + # Fix specific problematic dictionary patterns + if '{"Reasoning:": "bold blue",' in line or '"Thought:": "bold green"}' in line: + # Replace the entire line with a simple description + line = re.sub(r'.*\{"[^"]*":[^}]*\}.*', ' For example: (configuration dictionary)', line) + + # Fix ClassVar patterns + line = re.sub(r'ClassVar\[([^\]]+)\]', r'ClassVar[\1]', line) + + # Fix template string patterns like ${variable} + line = re.sub(r'\$\{[^}]+\}', '(variable)', line) + + # Fix asterisk in type annotations like "property name *: Type" + line = re.sub(r' \*:', ':', line) + + # Fix any remaining curly braces that cause parsing issues + if '{' in line and '}' in line: + line = re.sub(r'\{[^}]*\}', '(configuration object)', line) + + # Note: All cross-reference link conversion logic removed - we now just strip links entirely + class_to_module = { + 'Agent': 'agent', + 'AgentBase': 'agent', + 'AgentContext': 'agent', + 'Conversation': 'conversation', + 'BaseConversation': 'conversation', + 'LocalConversation': 'conversation', + 'RemoteConversation': 'conversation', + 'ConversationState': 'conversation', + 'ConversationStats': 'conversation', + 'Event': 'event', + 'LLMConvertibleEvent': 'event', + 'MessageEvent': 'event', + 'LLM': 'llm', + 'LLMRegistry': 'llm', + 'LLMResponse': 'llm', + 'Message': 'llm', + 'ImageContent': 'llm', + 'TextContent': 'llm', + 'ThinkingBlock': 'llm', + 'RedactedThinkingBlock': 'llm', + 'Metrics': 'llm', + 'RegistryEvent': 'llm', + 'SecurityManager': 'security', + 'Tool': 'tool', + 'ToolDefinition': 'tool', + 'Action': 'tool', + 'Observation': 'tool', + 'Workspace': 'workspace', + 'BaseWorkspace': 'workspace', + 'LocalWorkspace': 'workspace', + 'RemoteWorkspace': 'workspace', + 'WorkspaceFile': 'workspace', + 'WorkspaceFileEdit': 'workspace', + 'WorkspaceFileEditResult': 'workspace', + 'WorkspaceFileReadResult': 'workspace', + 'WorkspaceFileWriteResult': 'workspace', + 'WorkspaceListResult': 'workspace', + 'WorkspaceSearchResult': 'workspace', + 'WorkspaceSearchResultItem': 'workspace', + 'WorkspaceUploadResult': 'workspace', + 'WorkspaceWriteResult': 'workspace', + } + + # Fix anchor links - convert full module path anchors to simple class format + # Pattern: openhands.sdk.module.mdx#openhands.sdk.module.ClassName -> openhands.sdk.module#class-classname + def convert_anchor(match): + module_path = match.group(1) + full_class_path = match.group(2) + class_name = full_class_path.split('.')[-1].lower() + return f'openhands.sdk.{module_path}#class-{class_name}' + + line = re.sub(r'openhands\.sdk\.([^)#]+)\.mdx#openhands\.sdk\.\1\.([^)]+)', convert_anchor, line) + + # Also handle the .md# pattern before converting to .mdx + line = re.sub(r'openhands\.sdk\.([^)#]+)\.md#openhands\.sdk\.\1\.([^)]+)', convert_anchor, line) + + # Fix links pointing to the removed top-level openhands.sdk.md page + # Pattern: openhands.sdk.md#openhands.sdk.ClassName -> openhands.sdk.module#class-classname + def convert_toplevel_anchor(match): + full_class_path = match.group(1) + class_name = full_class_path.split('.')[-1] + + # Find the correct module for this class + if class_name in class_to_module: + module = class_to_module[class_name] + class_name_lower = class_name.lower() + return f'openhands.sdk.{module}#class-{class_name_lower}' + else: + # Fallback: try to guess module from class name + class_name_lower = class_name.lower() + return f'openhands.sdk.{class_name_lower}#class-{class_name_lower}' + + line = re.sub(r'openhands\.sdk\.md#openhands\.sdk\.([^)]+)', convert_toplevel_anchor, line) + + # Fix same-file anchor references (e.g., #openhands.sdk.llm.LLM -> #class-llm) + def convert_same_file_anchor(match): + full_class_path = match.group(1) + class_name = full_class_path.split('.')[-1].lower() + return f'#class-{class_name}' + + line = re.sub(r'#openhands\.sdk\.[^.]+\.([^)]+)', convert_same_file_anchor, line) + + # Fix invalid http:// links + line = re.sub(r'\[http://\]\(http://\)', 'http://', line) + + # Remove Python console prompt prefixes from examples + line = re.sub(r'^>`>`>` ', '', line) + + # Remove all cross-reference links - just keep the class names as plain text + # Pattern: [ClassName](openhands.sdk.module#class-classname) -> ClassName + line = re.sub(r'\[([^\]]+)\]\(openhands\.sdk\.[^)]+\)', r'\1', line) + + # Clean up malformed property entries with empty names + if '- ``:' in line and 'property ' in line: + # Extract the property name and type from malformed entries like: + # - ``: property service_to_llm : dict[str, [LLM](#openhands.sdk.llm.LLM)] + # - ``: abstract property conversation_stats : ConversationStats + match = re.search(r'- ``: (?:abstract )?property (\w+) : (.+)', line) + if match: + prop_name = match.group(1) + prop_type = match.group(2) + line = f'- `{prop_name}`: {prop_type}' + + # Format parameter names in backticks for parameter lists + # Pattern: " parameter_name – Description" -> " `parameter_name` – Description" + if line.strip().startswith('* ') or (line.startswith(' ') and ' – ' in line): + # This looks like a parameter line in a parameter list + # Match pattern: " * parameter_name – description" or " parameter_name – description" + param_match = re.match(r'^(\s*\*?\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*–\s*(.+)$', line) + if param_match: + indent = param_match.group(1) + param_name = param_match.group(2) + description = param_match.group(3) + line = f'{indent}`{param_name}` – {description}' + + return line + + def update_navigation(self): + """Update the navigation configuration.""" + logger.info("Updating navigation configuration...") + + # Generate navigation entries for all API files + api_files = list(self.output_dir.glob("*.mdx")) + nav_entries = [] + + for api_file in sorted(api_files): + module_name = api_file.stem + nav_entries.append(f'"sdk/api-reference/{module_name}"') + + # Create navigation snippet + nav_config = { + "navigation": [ + { + "group": "API Reference", + "pages": [entry.strip('"') for entry in nav_entries] + } + ] + } + + # Save navigation snippet + nav_file = self.docs_dir / "scripts" / "mint-config-snippet.json" + nav_file.write_text(json.dumps(nav_config, indent=2)) + + # Also update the main docs.json file + self.update_main_docs_json([entry.strip('"') for entry in nav_entries]) + + logger.info(f"Generated navigation for {len(nav_entries)} API reference files") + + def update_main_docs_json(self, nav_entries): + """Update the main docs.json file with the new API reference navigation.""" + docs_json_path = self.docs_dir / "docs.json" + + if not docs_json_path.exists(): + logger.warning("docs.json not found, skipping main navigation update") + return + + try: + with open(docs_json_path, 'r') as f: + docs_config = json.load(f) + + # Find and update the API Reference section + updated = False + for tab in docs_config.get("navigation", {}).get("tabs", []): + if tab.get("tab") == "SDK": + for page in tab.get("pages", []): + if isinstance(page, dict) and page.get("group") == "API Reference": + page["pages"] = nav_entries + updated = True + logger.info("Updated API Reference navigation in docs.json") + break + if updated: + break + + if updated: + with open(docs_json_path, 'w') as f: + json.dump(docs_config, f, indent=2) + else: + logger.warning("Could not find API Reference section in docs.json to update") + + except Exception as e: + logger.error(f"Error updating docs.json: {e}") + + def run_command(self, cmd: List[str], cwd: Path = None): + """Run a shell command with error handling.""" + try: + result = subprocess.run( + cmd, + cwd=cwd or self.docs_dir, + capture_output=True, + text=True, + check=True + ) + if result.stdout: + logger.debug(f"STDOUT: {result.stdout}") + if result.stderr: + logger.warning(f"STDERR: {result.stderr}") + except subprocess.CalledProcessError as e: + logger.error(f"Command failed: {' '.join(cmd)}") + logger.error(f"Exit code: {e.returncode}") + logger.error(f"STDOUT: {e.stdout}") + logger.error(f"STDERR: {e.stderr}") + raise + + +def main(): + """Main entry point.""" + docs_dir = Path(__file__).parent.parent + generator = SimpleAPIDocGenerator(docs_dir) + generator.run() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/generate-api-docs.sh b/scripts/generate-api-docs.sh new file mode 100755 index 00000000..c9d10f95 --- /dev/null +++ b/scripts/generate-api-docs.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +# API Documentation Generation Script (Shell Version) +# +# This is a simple shell wrapper around the Python script for convenience. +# For full functionality and error handling, use the Python version. + +set -e + +# Get the directory of this script +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DOCS_ROOT="$(dirname "$SCRIPT_DIR")" + +# Change to docs directory +cd "$DOCS_ROOT" + +# Check if Python script exists +if [ ! -f "scripts/generate-api-docs.py" ]; then + echo "Error: Python script not found at scripts/generate-api-docs.py" + exit 1 +fi + +# Check if required packages are installed +echo "Checking dependencies..." +python3 -c "import sphinx, sphinx_markdown_builder, myst_parser" 2>/dev/null || { + echo "Error: Required packages not installed." + echo "Please install them with: pip install sphinx sphinx-markdown-builder myst-parser" + exit 1 +} + +# Parse command line arguments +CLEAN="" +VERBOSE="" + +while [[ $# -gt 0 ]]; do + case $1 in + --clean) + CLEAN="--clean" + shift + ;; + --verbose|-v) + VERBOSE="--verbose" + shift + ;; + -h|--help) + echo "Usage: $0 [--clean] [--verbose]" + echo "" + echo "Options:" + echo " --clean Clean previous build artifacts before generating" + echo " --verbose Enable verbose output" + echo " --help Show this help message" + echo "" + echo "This script generates API reference documentation from the OpenHands SDK." + echo "Generated files will be placed in the sdk/api-reference/ directory." + exit 0 + ;; + *) + echo "Unknown option: $1" + echo "Use --help for usage information." + exit 1 + ;; + esac +done + +# Run the Python script +echo "Generating API documentation..." +python3 scripts/generate-api-docs.py $CLEAN $VERBOSE + +echo "" +echo "✅ API documentation generation completed!" +echo "📁 Generated files are in: sdk/api-reference/" +echo "⚙️ Mint.json config snippet: scripts/mint-config-snippet.json" +echo "" +echo "Next steps:" +echo "1. Review the generated documentation in sdk/api-reference/" +echo "2. Copy the configuration from scripts/mint-config-snippet.json" +echo "3. Add it to your docs.json navigation structure" \ No newline at end of file diff --git a/scripts/mint-config-snippet.json b/scripts/mint-config-snippet.json new file mode 100644 index 00000000..74571d27 --- /dev/null +++ b/scripts/mint-config-snippet.json @@ -0,0 +1,17 @@ +{ + "navigation": [ + { + "group": "API Reference", + "pages": [ + "sdk/api-reference/openhands.sdk.agent", + "sdk/api-reference/openhands.sdk.conversation", + "sdk/api-reference/openhands.sdk.event", + "sdk/api-reference/openhands.sdk.llm", + "sdk/api-reference/openhands.sdk.security", + "sdk/api-reference/openhands.sdk.tool", + "sdk/api-reference/openhands.sdk.utils", + "sdk/api-reference/openhands.sdk.workspace" + ] + } + ] +} \ No newline at end of file diff --git a/scripts/sphinx/source/conf.py b/scripts/sphinx/source/conf.py new file mode 100644 index 00000000..1c941190 --- /dev/null +++ b/scripts/sphinx/source/conf.py @@ -0,0 +1,28 @@ + +import os +import sys +sys.path.insert(0, os.path.abspath('../../../agent-sdk/openhands-sdk')) + +project = 'OpenHands SDK' +copyright = '2024, OpenHands' +author = 'OpenHands' + +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.napoleon', + 'sphinx_markdown_builder', +] + +autodoc_default_options = { + 'members': True, + 'undoc-members': True, + 'show-inheritance': True, + 'special-members': '__init__', +} + +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = False +napoleon_include_private_with_doc = False + +html_theme = 'sphinx_rtd_theme' diff --git a/scripts/sphinx/source/index.rst b/scripts/sphinx/source/index.rst new file mode 100644 index 00000000..b63bba30 --- /dev/null +++ b/scripts/sphinx/source/index.rst @@ -0,0 +1,16 @@ + +OpenHands SDK API Reference +=========================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + openhands.sdk + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/sdk/api-reference/openhands.sdk.agent.mdx b/sdk/api-reference/openhands.sdk.agent.mdx new file mode 100644 index 00000000..6a6912ae --- /dev/null +++ b/sdk/api-reference/openhands.sdk.agent.mdx @@ -0,0 +1,159 @@ +--- +title: openhands.sdk.agent +description: API reference for openhands.sdk.agent module +--- + + +### class Agent + +Bases: [`AgentBase`](#class-agentbase) + +Main agent implementation for OpenHands. + +The Agent class provides the core functionality for running AI agents that can +interact with tools, process messages, and execute actions. It inherits from +AgentBase and implements the agent execution logic. + +#### Example + +```pycon +>>> from openhands.sdk import LLM, Agent, Tool +>>> llm = LLM(model="claude-sonnet-4-20250514", api_key=SecretStr("key")) +>>> tools = [Tool(name="BashTool"), Tool(name="FileEditorTool")] +>>> agent = Agent(llm=llm, tools=tools) +``` + + +#### Properties + +- `agent_context`: AgentContext | None +- `condenser`: CondenserBase | None +- `filter_tools_regex`: str | None +- `kind`: Literal['Agent'] +- `llm`: LLM +- `mcp_config`: dict[str, Any] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `security_analyzer`: analyzer.SecurityAnalyzerBase | None +- `system_prompt_filename`: str +- `system_prompt_kwargs`: dict[str, object] +- `tools`: list[Tool] + +#### Methods + +#### init_state() + +Initialize the empty conversation state to prepare the agent for user +messages. + +Typically this involves adding system message + +NOTE: state will be mutated in-place. + +#### model_post_init() + +Override this method to perform additional initialization after __init__ and model_construct. +This is useful if you want to do some validation that requires the entire model to be initialized. + +#### step() + +Taking a step in the conversation. + +Typically this involves: +1. Making a LLM call +2. Executing the tool +3. Updating the conversation state with + + LLM calls (role=”assistant”) and tool results (role=”tool”) + +4.1 If conversation is finished, set state.execution_status to FINISHED +4.2 Otherwise, just return, Conversation will kick off the next step + +NOTE: state will be mutated in-place. + +### class AgentBase + +Bases: `DiscriminatedUnionMixin`, `ABC` + +Abstract base class for OpenHands agents. + +Agents are stateless and should be fully defined by their configuration. +This base class provides the common interface and functionality that all +agent implementations must follow. + + +#### Properties + +- `agent_context`: AgentContext | None +- `condenser`: CondenserBase | None +- `filter_tools_regex`: str | None +- `kind`: str +- `llm`: LLM +- `mcp_config`: dict[str, Any] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `name`: str + Returns the name of the Agent. +- `prompt_dir`: str + Returns the directory where this class’s module file is located. +- `security_analyzer`: SecurityAnalyzerBase | None +- `system_message`: str + Compute system message on-demand to maintain statelessness. +- `system_prompt_filename`: str +- `system_prompt_kwargs`: dict[str, object] +- `tools`: list[Tool] +- `tools_map`: dictstr, [ToolDefinition] + Get the initialized tools map. + :raises RuntimeError: If the agent has not been initialized. + +#### Methods + +#### get_all_llms() + +Recursively yield unique base-class LLM objects reachable from self. + +- Returns actual object references (not copies). +- De-dupes by id(LLM). +- Cycle-safe via a visited set for all traversed objects. +- Only yields objects whose type is exactly LLM (no subclasses). +- Does not handle dataclasses. + +#### init_state() + +Initialize the empty conversation state to prepare the agent for user +messages. + +Typically this involves adding system message + +NOTE: state will be mutated in-place. + +#### model_dump_succint() + +Like model_dump, but excludes None fields by default. + +#### model_post_init() + +Override this method to perform additional initialization after __init__ and model_construct. +This is useful if you want to do some validation that requires the entire model to be initialized. + +#### resolve_diff_from_deserialized() + +Return a new AgentBase instance equivalent to persisted but with +explicitly whitelisted fields (e.g. api_key, security_analyzer) taken from +self. + +#### abstractmethod step() + +Taking a step in the conversation. + +Typically this involves: +1. Making a LLM call +2. Executing the tool +3. Updating the conversation state with + + LLM calls (role=”assistant”) and tool results (role=”tool”) + +4.1 If conversation is finished, set state.execution_status to FINISHED +4.2 Otherwise, just return, Conversation will kick off the next step + +NOTE: state will be mutated in-place. diff --git a/sdk/api-reference/openhands.sdk.conversation.mdx b/sdk/api-reference/openhands.sdk.conversation.mdx new file mode 100644 index 00000000..5656fdd3 --- /dev/null +++ b/sdk/api-reference/openhands.sdk.conversation.mdx @@ -0,0 +1,544 @@ +--- +title: openhands.sdk.conversation +description: API reference for openhands.sdk.conversation module +--- + + +### class BaseConversation + +Bases: `ABC` + +Abstract base class for conversation implementations. + +This class defines the interface that all conversation implementations must follow. +Conversations manage the interaction between users and agents, handling message +exchange, execution control, and state management. + + +#### Properties + +- `confirmation_policy_active`: bool +- `conversation_stats`: ConversationStats +- `id`: UUID +- `is_confirmation_mode_active`: bool + Check if confirmation mode is active. + Returns True if BOTH conditions are met: + 1. The agent has a security analyzer set (not None) + 2. The confirmation policy is active +- `state`: ConversationStateProtocol + +#### Methods + +#### __init__() + +Initialize the base conversation with span tracking. + +#### abstractmethod close() + +#### static compose_callbacks() + +Compose multiple callbacks into a single callback function. + +* Parameters: + `callbacks` – An iterable of callback functions +* Returns: + A single callback function that calls all provided callbacks + +#### abstractmethod generate_title() + +Generate a title for the conversation based on the first user message. + +* Parameters: + * `llm` – Optional LLM to use for title generation. If not provided, + uses the agent’s LLM. + * `max_length` – Maximum length of the generated title. +* Returns: + A generated title for the conversation. +* Raises: + `ValueError` – If no user messages are found in the conversation. + +#### static get_persistence_dir() + +Get the persistence directory for the conversation. + +#### abstractmethod pause() + +#### abstractmethod reject_pending_actions() + +#### abstractmethod run() + +Execute the agent to process messages and perform actions. + +This method runs the agent until it finishes processing the current +message or reaches the maximum iteration limit. + +#### abstractmethod send_message() + +Send a message to the agent. + +#### abstractmethod set_confirmation_policy() + +Set the confirmation policy for the conversation. + +#### abstractmethod update_secrets() + +### class Conversation + +### class Conversation + +Bases: `object` + +Factory class for creating conversation instances with OpenHands agents. + +This factory automatically creates either a LocalConversation or RemoteConversation +based on the workspace type provided. LocalConversation runs the agent locally, +while RemoteConversation connects to a remote agent server. + +* Returns: + LocalConversation if workspace is local, RemoteConversation if workspace + is remote. + +#### Example + +```pycon +>>> from openhands.sdk import LLM, Agent, Conversation +>>> llm = LLM(model="claude-sonnet-4-20250514", api_key=SecretStr("key")) +>>> agent = Agent(llm=llm, tools=[]) +>>> conversation = Conversation(agent=agent, workspace="./workspace") +>>> conversation.send_message("Hello!") +>>> conversation.run() +``` + +### class ConversationState + +Bases: `OpenHandsModel` + + +#### Properties + +- `activated_knowledge_skills`: list[str] +- `agent`: AgentBase +- `confirmation_policy`: ConfirmationPolicyBase +- `events`: [EventLog](#class-eventlog) +- `execution_status`: ConversationExecutionStatus +- `id`: UUID +- `max_iterations`: int +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `persistence_dir`: str | None +- `secret_registry`: [SecretRegistry](#class-secretregistry) +- `stats`: ConversationStats +- `stuck_detection`: bool +- `workspace`: BaseWorkspace + +#### Methods + +#### acquire() + +Acquire the lock. + +* Parameters: + * `blocking` – If True, block until lock is acquired. If False, return + immediately. + * `timeout` – Maximum time to wait for lock (ignored if blocking=False). + -1 means wait indefinitely. +* Returns: + True if lock was acquired, False otherwise. + +#### classmethod create() + +If base_state.json exists: resume (attach EventLog, +: reconcile agent, enforce id). + +Else: create fresh (agent required), persist base, and return. + +#### static get_unmatched_actions() + +Find actions in the event history that don’t have matching observations. + +This method identifies ActionEvents that don’t have corresponding +ObservationEvents or UserRejectObservations, which typically indicates +actions that are pending confirmation or execution. + +* Parameters: + `events` – List of events to search through +* Returns: + List of ActionEvent objects that don’t have corresponding observations, + in chronological order + +#### locked() + +Return True if the lock is currently held by any thread. + +#### model_post_init() + +Override this method to perform additional initialization after __init__ and model_construct. +This is useful if you want to do some validation that requires the entire model to be initialized. + +#### owned() + +Return True if the lock is currently held by the calling thread. + +#### release() + +Release the lock. + +* Raises: + `RuntimeError` – If the current thread doesn’t own the lock. + +#### set_on_state_change() + +Set a callback to be called when state changes. + +* Parameters: + `callback` – A function that takes an Event (ConversationStateUpdateEvent) + or None to remove the callback + +### class ConversationVisualizer + +Bases: `object` + +Handles visualization of conversation events with Rich formatting. + +Provides Rich-formatted output with panels and complete content display. + +#### Methods + +#### __init__() + +Initialize the visualizer. + +* Parameters: + * `highlight_regex` – Dictionary mapping regex patterns to Rich color styles + for highlighting keywords in the visualizer. + For example: (configuration object) + * `skip_user_messages` – If True, skip displaying user messages. Useful for + scenarios where user input is not relevant to show. + * `conversation_stats` – ConversationStats object to display metrics information. + * `name_for_visualization` – Optional name to prefix in panel titles to identify + which agent/conversation is speaking. + +#### on_event() + +Main event handler that displays events with Rich formatting. + +### class EventLog + +Bases: [`EventsListBase`](#class-eventslistbase) + +#### Methods + +#### __init__() + +#### append() + +Add a new event to the list. + +#### get_id() + +Return the event_id for a given index. + +#### get_index() + +Return the integer index for a given event_id. + +### class EventsListBase + +Bases: `Sequence`[`Event`], `ABC` + +Abstract base class for event lists that can be appended to. + +This provides a common interface for both local EventLog and remote +RemoteEventsList implementations, avoiding circular imports in protocols. + +#### Methods + +#### abstractmethod append() + +Add a new event to the list. + +### class LocalConversation + +Bases: [`BaseConversation`](#class-baseconversation) + + +#### Properties + +- `agent`: AgentBase +- `id`: UUID + Get the unique ID of the conversation. +- `llm_registry`: LLMRegistry +- `max_iteration_per_run`: int +- `state`: [ConversationState](#class-conversationstate) + Get the conversation state. + It returns a protocol that has a subset of ConversationState methods + and properties. We will have the ability to access the same properties + of ConversationState on a remote conversation object. + But we won’t be able to access methods that mutate the state. +- `stuck_detector`: [StuckDetector](#class-stuckdetector) | None + Get the stuck detector instance if enabled. +- `workspace`: LocalWorkspace + +#### Methods + +#### __init__() + +Initialize the conversation. + +* Parameters: + * `agent` – The agent to use for the conversation + * `workspace` – Working directory for agent operations and tool execution + * `persistence_dir` – Directory for persisting conversation state and events + * `conversation_id` – Optional ID for the conversation. If provided, will + be used to identify the conversation. The user might want to + suffix their persistent filestore with this ID. + * `callbacks` – Optional list of callback functions to handle events + * `max_iteration_per_run` – Maximum number of iterations per run + * `visualize` – Whether to enable default visualization. If True, adds + a default visualizer callback. If False, relies on + application to provide visualization through callbacks. + * `name_for_visualization` – Optional name to prefix in panel titles to identify + which agent/conversation is speaking. + * `stuck_detection` – Whether to enable stuck detection + +#### close() + +Close the conversation and clean up all tool executors. + +#### property conversation_stats + +#### generate_title() + +Generate a title for the conversation based on the first user message. + +* Parameters: + * `llm` – Optional LLM to use for title generation. If not provided, + uses self.agent.llm. + * `max_length` – Maximum length of the generated title. +* Returns: + A generated title for the conversation. +* Raises: + `ValueError` – If no user messages are found in the conversation. + +#### pause() + +Pause agent execution. + +This method can be called from any thread to request that the agent +pause execution. The pause will take effect at the next iteration +of the run loop (between agent steps). + +Note: If called during an LLM completion, the pause will not take +effect until the current LLM call completes. + +#### reject_pending_actions() + +Reject all pending actions from the agent. + +This is a non-invasive method to reject actions between run() calls. +Also clears the agent_waiting_for_confirmation flag. + +#### run() + +Runs the conversation until the agent finishes. + +In confirmation mode: +- First call: creates actions but doesn’t execute them, stops and waits +- Second call: executes pending actions (implicit confirmation) + +In normal mode: +- Creates and executes actions immediately + +Can be paused between steps + +#### send_message() + +Send a message to the agent. + +* Parameters: + `message` – Either a string (which will be converted to a user message) + or a Message object + +#### set_confirmation_policy() + +Set the confirmation policy and store it in conversation state. + +#### update_secrets() + +Add secrets to the conversation. + +* Parameters: + `secrets` – Dictionary mapping secret keys to values or no-arg callables. + SecretValue = str | Callable[[], str]. Callables are invoked lazily + when a command references the secret key. + +### class RemoteConversation + +Bases: [`BaseConversation`](#class-baseconversation) + + +#### Properties + +- `agent`: AgentBase +- `conversation_stats`: ConversationStats + Get conversation stats from remote server. +- `id`: UUID +- `max_iteration_per_run`: int +- `state`: RemoteState + Access to remote conversation state. +- `workspace`: RemoteWorkspace + +#### Methods + +#### __init__() + +Remote conversation proxy that talks to an agent server. + +* Parameters: + * `agent` – Agent configuration (will be sent to the server) + * `workspace` – The working directory for agent operations and tool execution. + * `conversation_id` – Optional existing conversation id to attach to + * `callbacks` – Optional callbacks to receive events (not yet streamed) + * `max_iteration_per_run` – Max iterations configured on server + * `stuck_detection` – Whether to enable stuck detection on server + * `visualize` – Whether to enable the default visualizer callback + * `name_for_visualization` – Optional name to prefix in panel titles to identify + which agent/conversation is speaking. + * `secrets` – Optional secrets to initialize the conversation with + +#### close() + +#### generate_title() + +Generate a title for the conversation based on the first user message. + +* Parameters: + * `llm` – Optional LLM to use for title generation. If provided, its usage_id + will be sent to the server. If not provided, uses the agent’s LLM. + * `max_length` – Maximum length of the generated title. +* Returns: + A generated title for the conversation. + +#### pause() + +#### reject_pending_actions() + +#### run() + +Execute the agent to process messages and perform actions. + +This method runs the agent until it finishes processing the current +message or reaches the maximum iteration limit. + +#### send_message() + +Send a message to the agent. + +#### set_confirmation_policy() + +Set the confirmation policy for the conversation. + +#### property stuck_detector + +Stuck detector for compatibility. +Not implemented for remote conversations. + +#### update_secrets() + +### class SecretRegistry + +Bases: `OpenHandsModel` + +Manages secrets and injects them into bash commands when needed. + +The secret registry stores a mapping of secret keys to SecretSources +that retrieve the actual secret values. When a bash command is about to be +executed, it scans the command for any secret keys and injects the corresponding +environment variables. + +Secret sources will redact / encrypt their sensitive values as appropriate when +serializing, depending on the content of the context. If a context is present +and contains a ‘cipher’ object, this is used for encryption. If it contains a +boolean ‘expose_secrets’ flag set to True, secrets are dunped in plain text. +Otherwise secrets are redacted. + +Additionally, it tracks the latest exported values to enable consistent masking +even when callable secrets fail on subsequent calls. + + +#### Properties + +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `secret_sources`: dict[str, SecretSource] + +#### Methods + +#### find_secrets_in_text() + +Find all secret keys mentioned in the given text. + +* Parameters: + `text` – The text to search for secret keys +* Returns: + Set of secret keys found in the text + +#### get_secrets_as_env_vars() + +Get secrets that should be exported as environment variables for a command. + +* Parameters: + `command` – The bash command to check for secret references +* Returns: + Dictionary of environment variables to export (key -> value) + +#### mask_secrets_in_output() + +Mask secret values in the given text. + +This method uses both the current exported values and attempts to get +fresh values from callables to ensure comprehensive masking. + +* Parameters: + `text` – The text to mask secrets in +* Returns: + Text with secret values replaced by `` + +#### model_post_init() + +Override this method to perform additional initialization after __init__ and model_construct. +This is useful if you want to do some validation that requires the entire model to be initialized. + +#### update_secrets() + +Add or update secrets in the manager. + +* Parameters: + `secrets` – Dictionary mapping secret keys to either string values + or callable functions that return string values + +### class StuckDetector + +Bases: `object` + +Detects when an agent is stuck in repetitive or unproductive patterns. + +This detector analyzes the conversation history to identify various stuck patterns: +1. Repeating action-observation cycles +2. Repeating action-error cycles +3. Agent monologue (repeated messages without user input) +4. Repeating alternating action-observation patterns +5. Context window errors indicating memory issues + + +#### Properties + +- `state`: [ConversationState](#class-conversationstate) + +#### Methods + +#### __init__() + +#### is_stuck() + +Check if the agent is currently stuck. diff --git a/sdk/api-reference/openhands.sdk.event.mdx b/sdk/api-reference/openhands.sdk.event.mdx new file mode 100644 index 00000000..3e141f99 --- /dev/null +++ b/sdk/api-reference/openhands.sdk.event.mdx @@ -0,0 +1,330 @@ +--- +title: openhands.sdk.event +description: API reference for openhands.sdk.event module +--- + + +### class ActionEvent + +Bases: [`LLMConvertibleEvent`](#class-llmconvertibleevent) + + +#### Properties + +- `action`: Action | None +- `kind`: Literal['ActionEvent'] +- `llm_response_id`: str +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `reasoning_content`: str | None +- `responses_reasoning_item`: ReasoningItemModel | None +- `security_risk`: SecurityRisk +- `source`: Literal['agent', 'user', 'environment'] +- `thinking_blocks`: list[ThinkingBlock | RedactedThinkingBlock] +- `thought`: Sequence[TextContent] +- `tool_call`: MessageToolCall +- `tool_call_id`: str +- `tool_name`: str +- `visualize`: Text + Return Rich Text representation of this action event. + +#### Methods + +#### to_llm_message() + +Individual message - may be incomplete for multi-action batches + +### class AgentErrorEvent + +Bases: [`ObservationBaseEvent`](#class-observationbaseevent) + +Error triggered by the agent. + +Note: This event should not contain model “thought” or “reasoning_content”. It +represents an error produced by the agent/scaffold, not model output. + + +#### Properties + +- `error`: str +- `kind`: Literal['AgentErrorEvent'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source`: Literal['agent', 'user', 'environment'] +- `visualize`: Text + Return Rich Text representation of this agent error event. + +#### Methods + +#### to_llm_message() + +### class Condensation + +Bases: [`Event`](#class-event) + +This action indicates a condensation of the conversation history is happening. + + +#### Properties + +- `forgotten_event_ids`: list[str] +- `kind`: Literal['Condensation'] +- `llm_response_id`: str +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source`: Literal['agent', 'user', 'environment'] +- `summary`: str | None +- `summary_offset`: int | None +- `visualize`: Text + Return Rich Text representation of this event. + This is a fallback implementation for unknown event types. + Subclasses should override this method to provide specific visualization. +### class CondensationRequest + +Bases: [`Event`](#class-event) + +This action is used to request a condensation of the conversation history. + + +#### Properties + +- `kind`: Literal['CondensationRequest'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source`: Literal['agent', 'user', 'environment'] + +#### Methods + +#### action + +The action type, namely ActionType.CONDENSATION_REQUEST. + +* Type: + str + +### class CondensationSummaryEvent + +Bases: [`LLMConvertibleEvent`](#class-llmconvertibleevent) + +This event represents a summary generated by a condenser. + + +#### Properties + +- `kind`: Literal['CondensationSummaryEvent'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source`: Literal['agent', 'user', 'environment'] +- `summary`: str + The summary text. + +#### Methods + +#### to_llm_message() + +### class ConversationStateUpdateEvent + +Bases: [`Event`](#class-event) + +Event that contains conversation state updates. + +This event is sent via websocket whenever the conversation state changes, +allowing remote clients to stay in sync without making REST API calls. + +All fields are serialized versions of the corresponding ConversationState fields +to ensure compatibility with websocket transmission. + + +#### Properties + +- `key`: str +- `kind`: Literal['ConversationStateUpdateEvent'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source`: Literal['agent', 'user', 'environment'] +- `value`: Any + +#### Methods + +#### classmethod from_conversation_state() + +Create a state update event from a ConversationState object. + +This creates an event containing a snapshot of important state fields. + +* Parameters: + * `state` – The ConversationState to serialize + * `conversation_id` – The conversation ID for the event +* Returns: + A ConversationStateUpdateEvent with serialized state data + +#### classmethod validate_key() + +#### classmethod validate_value() + +### class Event + +Bases: `DiscriminatedUnionMixin`, `ABC` + +Base class for all events. + + +#### Properties + +- `id`: str +- `kind`: str +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source`: Literal['agent', 'user', 'environment'] +- `timestamp`: str +- `visualize`: Text + Return Rich Text representation of this event. + This is a fallback implementation for unknown event types. + Subclasses should override this method to provide specific visualization. +### class LLMConvertibleEvent + +Bases: [`Event`](#class-event), `ABC` + +Base class for events that can be converted to LLM messages. + + +#### Properties + +- `id`: EventID +- `kind`: str +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source`: SourceType +- `timestamp`: str + +#### Methods + +#### static events_to_messages() + +Convert event stream to LLM message stream, handling multi-action batches + +#### abstractmethod to_llm_message() + +### class MessageEvent + +Bases: [`LLMConvertibleEvent`](#class-llmconvertibleevent) + +Message from either agent or user. + +This is originally the “MessageAction”, but it suppose not to be tool call. + + +#### Properties + +- `activated_skills`: list[str] +- `extended_content`: list[TextContent] +- `id`: EventID +- `kind`: Literal['MessageEvent'] +- `llm_message`: Message +- `llm_response_id`: str | None +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `reasoning_content`: str +- `source`: Literal['agent', 'user', 'environment'] +- `thinking_blocks`: Sequence[ThinkingBlock | RedactedThinkingBlock] + Return the Anthropic thinking blocks from the LLM message. +- `timestamp`: str +- `visualize`: Text + Return Rich Text representation of this message event. + +#### Methods + +#### to_llm_message() + +### class ObservationBaseEvent + +Bases: [`LLMConvertibleEvent`](#class-llmconvertibleevent) + +Base class for anything as a response to a tool call. + +Examples include tool execution, error, user reject. + + +#### Properties + +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source`: Literal['agent', 'user', 'environment'] +- `tool_call_id`: str +- `tool_name`: str +### class ObservationEvent + +Bases: [`ObservationBaseEvent`](#class-observationbaseevent) + + +#### Properties + +- `action_id`: str +- `kind`: Literal['ObservationEvent'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `observation`: Observation +- `visualize`: Text + Return Rich Text representation of this observation event. + +#### Methods + +#### to_llm_message() + +### class PauseEvent + +Bases: [`Event`](#class-event) + +Event indicating that the agent execution was paused by user request. + + +#### Properties + +- `kind`: Literal['PauseEvent'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source`: Literal['agent', 'user', 'environment'] +- `visualize`: Text + Return Rich Text representation of this pause event. +### class SystemPromptEvent + +Bases: [`LLMConvertibleEvent`](#class-llmconvertibleevent) + +System prompt added by the agent. + + +#### Properties + +- `kind`: Literal['SystemPromptEvent'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source`: Literal['agent', 'user', 'environment'] +- `system_prompt`: TextContent +- `tools`: list[ChatCompletionToolParam] +- `visualize`: Text + Return Rich Text representation of this system prompt event. + +#### Methods + +#### to_llm_message() + +### class UserRejectObservation + +Bases: [`ObservationBaseEvent`](#class-observationbaseevent) + +Observation when user rejects an action in confirmation mode. + + +#### Properties + +- `action_id`: str +- `kind`: Literal['UserRejectObservation'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `rejection_reason`: str +- `visualize`: Text + Return Rich Text representation of this user rejection event. + +#### Methods + +#### to_llm_message() diff --git a/sdk/api-reference/openhands.sdk.llm.mdx b/sdk/api-reference/openhands.sdk.llm.mdx new file mode 100644 index 00000000..d3dfada0 --- /dev/null +++ b/sdk/api-reference/openhands.sdk.llm.mdx @@ -0,0 +1,646 @@ +--- +title: openhands.sdk.llm +description: API reference for openhands.sdk.llm module +--- + + +### class ImageContent + +Bases: `BaseContent` + + +#### Properties + +- `cache_prompt`: bool +- `image_urls`: list[str] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `type`: Literal['image'] + +#### Methods + +#### to_llm_dict() + +Convert to LLM API format. + +### class LLM + +Bases: `BaseModel`, `RetryMixin`, `NonNativeToolCallingMixin` + +Language model interface for OpenHands agents. + +The LLM class provides a unified interface for interacting with various +language models through the litellm library. It handles model configuration, +API authentication, +retry logic, and tool calling capabilities. + +#### Example + +```pycon +>>> from openhands.sdk import LLM +>>> from pydantic import SecretStr +>>> llm = LLM( +... model="claude-sonnet-4-20250514", +... api_key=SecretStr("your-api-key"), +... usage_id="my-agent" +... ) +>>> # Use with agent or conversation +``` + + +#### Properties + +- `OVERRIDE_ON_SERIALIZE`: tuple[str, ...] +- `api_key`: str | SecretStr | None +- `api_version`: str | None +- `aws_access_key_id`: str | SecretStr | None +- `aws_region_name`: str | None +- `aws_secret_access_key`: str | SecretStr | None +- `base_url`: str | None +- `caching_prompt`: bool +- `custom_llm_provider`: str | None +- `custom_tokenizer`: str | None +- `disable_stop_word`: bool | None +- `disable_vision`: bool | None +- `drop_params`: bool +- `enable_encrypted_reasoning`: bool +- `extended_thinking_budget`: int | None +- `extra_headers`: dict[str, str] | None +- `input_cost_per_token`: float | None +- `litellm_extra_body`: dict[str, Any] +- `log_completions`: bool +- `log_completions_folder`: str +- `max_input_tokens`: int | None +- `max_message_chars`: int +- `max_output_tokens`: int | None +- `metrics`: [Metrics](#class-metrics) + Get usage metrics for this LLM instance. + * Returns: + Metrics object containing token usage, costs, and other statistics. +- `model`: str +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `model_info`: dict | None + Returns the model info dictionary. +- `modify_params`: bool +- `native_tool_calling`: bool +- `num_retries`: int +- `ollama_base_url`: str | None +- `openrouter_app_name`: str +- `openrouter_site_url`: str +- `output_cost_per_token`: float | None +- `reasoning_effort`: Literal['low', 'medium', 'high', 'none'] | None +- `reasoning_summary`: Literal['auto', 'concise', 'detailed'] | None +- `retry_listener`: SkipJsonSchema[Callable[[int, int], None] | None] +- `retry_max_wait`: int +- `retry_min_wait`: int +- `retry_multiplier`: float +- `safety_settings`: list[dict[str, str]] | None +- `seed`: int | None +- `service_id`: str +- `temperature`: float | None +- `timeout`: int | None +- `top_k`: float | None +- `top_p`: float | None +- `usage_id`: str + +#### Methods + +#### completion() + +Generate a completion from the language model. + +This is the method for getting responses from the model via Completion API. +It handles message formatting, tool calling, and response processing. + +* Returns: + LLMResponse containing the model’s response and metadata. +* Raises: + `ValueError` – If streaming is requested (not supported). + +#### format_messages_for_llm() + +Formats Message objects for LLM consumption. + +#### format_messages_for_responses() + +Prepare (instructions, input[]) for the OpenAI Responses API. + +- Skips prompt caching flags and string serializer concerns +- Uses Message.to_responses_value to get either instructions (system) + + or input items (others) +- Concatenates system instructions into a single instructions string + +#### get_token_count() + +#### is_caching_prompt_active() + +Check if prompt caching is supported and enabled for current model. + +* Returns: + True if prompt caching is supported and enabled for the given + : model. +* Return type: + boolean + +#### classmethod load_from_env() + +#### classmethod load_from_json() + +#### model_post_init() + +This function is meant to behave like a BaseModel method to initialise private attributes. + +It takes context as an argument since that’s what pydantic-core passes when calling it. + +* Parameters: + * `self` – The BaseModel instance. + * `context` – The context. + +#### resolve_diff_from_deserialized() + +Resolve differences between a deserialized LLM and the current instance. + +This is due to fields like api_key being serialized to “ + +``` +** +``` + +``` +** +``` + +” in dumps, +and we want to ensure that when loading from a file, we still use the +runtime-provided api_key in the self instance. + +Return a new LLM instance equivalent to persisted but with +explicitly whitelisted fields (e.g. api_key) taken from self. + +#### responses() + +Alternative invocation path using OpenAI Responses API via LiteLLM. + +Maps Message[] -> (instructions, input[]) and returns LLMResponse. +Non-stream only for v1. + +#### restore_metrics() + +#### uses_responses_api() + +Whether this model uses the OpenAI Responses API path. + +#### vision_is_active() + +### class LLMRegistry + +Bases: `object` + +A minimal LLM registry for managing LLM instances by usage ID. + +This registry provides a simple way to manage multiple LLM instances, +avoiding the need to recreate LLMs with the same configuration. + + +#### Properties + +- `registry_id`: str +- `retry_listener`: Callable[[int, int], None] | None +- `service_to_llm`: dict[str, [LLM](#class-llm)] +- `subscriber`: Callable[[[RegistryEvent](#class-registryevent)], None] | None +- `usage_to_llm`: dict[str, [LLM](#class-llm)] + Access the internal usage-ID-to-LLM mapping. + +#### Methods + +#### __init__() + +Initialize the LLM registry. + +* Parameters: + `retry_listener` – Optional callback for retry events. + +#### add() + +Add an LLM instance to the registry. + +* Parameters: + `llm` – The LLM instance to register. +* Raises: + `ValueError` – If llm.usage_id already exists in the registry. + +#### get() + +Get an LLM instance from the registry. + +* Parameters: + `usage_id` – Unique identifier for the LLM usage slot. +* Returns: + The LLM instance. +* Raises: + `KeyError` – If usage_id is not found in the registry. + +#### list_services() + +Deprecated alias for [`list_usage_ids()`](#class-list_usage_ids). + +#### list_usage_ids() + +List all registered usage IDs. + +#### notify() + +Notify subscribers of registry events. + +* Parameters: + `event` – The registry event to notify about. + +#### subscribe() + +Subscribe to registry events. + +* Parameters: + `callback` – Function to call when LLMs are created or updated. + +### class LLMResponse + +Bases: `BaseModel` + +Result of an LLM completion request. + +This type provides a clean interface for LLM completion results, exposing +only OpenHands-native types to consumers while preserving access to the +raw LiteLLM response for internal use. + + +#### Properties + +- `id`: str + Get the response ID from the underlying LLM response. + This property provides a clean interface to access the response ID, + supporting both completion mode (ModelResponse) and response API modes + (ResponsesAPIResponse). + * Returns: + The response ID from the LLM response +- `message`: [Message](#class-message) +- `metrics`: [MetricsSnapshot](#class-metricssnapshot) +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `raw_response`: ModelResponse | ResponsesAPIResponse + +#### Methods + +#### message + +The completion message converted to OpenHands Message type + +* Type: + [openhands.sdk.llm.message.Message](#class-message) + +#### metrics + +Snapshot of metrics from the completion request + +* Type: + [openhands.sdk.llm.utils.metrics.MetricsSnapshot](#class-metricssnapshot) + +#### raw_response + +The original LiteLLM response (ModelResponse or +ResponsesAPIResponse) for internal use + +* Type: + litellm.types.utils.ModelResponse | litellm.types.llms.openai.ResponsesAPIResponse + +### class Message + +Bases: `BaseModel` + + +#### Properties + +- `cache_enabled`: bool +- `contains_image`: bool +- `content`: Sequence[[TextContent](#class-textcontent) | [ImageContent](#class-imagecontent)] +- `force_string_serializer`: bool +- `function_calling_enabled`: bool +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `name`: str | None +- `reasoning_content`: str | None +- `responses_reasoning_item`: [ReasoningItemModel](#class-reasoningitemmodel) | None +- `role`: Literal['user', 'system', 'assistant', 'tool'] +- `thinking_blocks`: Sequence[[ThinkingBlock](#class-thinkingblock) | [RedactedThinkingBlock](#class-redactedthinkingblock)] +- `tool_call_id`: str | None +- `tool_calls`: list[[MessageToolCall](#class-messagetoolcall)] | None +- `vision_enabled`: bool + +#### Methods + +#### classmethod from_llm_chat_message() + +Convert a LiteLLMMessage (Chat Completions) to our Message class. + +Provider-agnostic mapping for reasoning: +- Prefer message.reasoning_content if present (LiteLLM normalized field) +- Extract thinking_blocks from content array (Anthropic-specific) + +#### classmethod from_llm_responses_output() + +Convert OpenAI Responses API output items into a single assistant Message. + +Policy (non-stream): +- Collect assistant text by concatenating output_text parts from message items +- Normalize function_call items to MessageToolCall list + +#### to_chat_dict() + +Serialize message for OpenAI Chat Completions. + +Chooses the appropriate content serializer and then injects threading keys: +- Assistant tool call turn: role == “assistant” and self.tool_calls +- Tool result turn: role == “tool” and self.tool_call_id (with name) + +#### to_responses_dict() + +Serialize message for OpenAI Responses (input parameter). + +Produces a list of “input” items for the Responses API: +- system: returns [], system content is expected in ‘instructions’ +- user: one ‘message’ item with content parts -> input_text / input_image +(when vision enabled) +- assistant: emits prior assistant content as input_text, +and function_call items for tool_calls +- tool: emits function_call_output items (one per TextContent) +with matching call_id + +#### to_responses_value() + +Return serialized form. + +Either an instructions string (for system) or input items (for other roles). + +### class MessageToolCall + +Bases: `BaseModel` + +Transport-agnostic tool call representation. + +One canonical id is used for linking across actions/observations and +for Responses function_call_output call_id. + + +#### Properties + +- `arguments`: str +- `id`: str +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `name`: str +- `origin`: Literal['completion', 'responses'] + +#### Methods + +#### classmethod from_chat_tool_call() + +Create a MessageToolCall from a Chat Completions tool call. + +#### classmethod from_responses_function_call() + +Create a MessageToolCall from a typed OpenAI Responses function_call item. + +Note: OpenAI Responses function_call.arguments is already a JSON string. + +#### to_chat_dict() + +Serialize to OpenAI Chat Completions tool_calls format. + +#### to_responses_dict() + +Serialize to OpenAI Responses ‘function_call’ input item format. + +### class Metrics + +Bases: [`MetricsSnapshot`](#class-metricssnapshot) + +Metrics class can record various metrics during running and evaluation. +We track: + + - accumulated_cost and costs + - max_budget_per_task (budget limit) + - A list of ResponseLatency + - A list of TokenUsage (one per call). + + +#### Properties + +- `costs`: list[Cost] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `response_latencies`: list[ResponseLatency] +- `token_usages`: list[TokenUsage] + +#### Methods + +#### add_cost() + +#### add_response_latency() + +#### add_token_usage() + +Add a single usage record. + +#### deep_copy() + +Create a deep copy of the Metrics object. + +#### diff() + +Calculate the difference between current metrics and a baseline. + +This is useful for tracking metrics for specific operations like delegates. + +* Parameters: + `baseline` – A metrics object representing the baseline state +* Returns: + A new Metrics object containing only the differences since the baseline + +#### get() + +Return the metrics in a dictionary. + +#### get_snapshot() + +Get a snapshot of the current metrics without the detailed lists. + +#### initialize_accumulated_token_usage() + +#### log() + +Log the metrics. + +#### merge() + +Merge ‘other’ metrics into this one. + +#### classmethod validate_accumulated_cost() + +### class MetricsSnapshot + +Bases: `BaseModel` + +A snapshot of metrics at a point in time. + +Does not include lists of individual costs, latencies, or token usages. + + +#### Properties + +- `accumulated_cost`: float +- `accumulated_token_usage`: TokenUsage | None +- `max_budget_per_task`: float | None +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `model_name`: str +### class ReasoningItemModel + +Bases: `BaseModel` + +OpenAI Responses reasoning item (non-stream, subset we consume). + +Do not log or render encrypted_content. + + +#### Properties + +- `content`: list[str] | None +- `encrypted_content`: str | None +- `id`: str | None +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `status`: str | None +- `summary`: list[str] +### class RedactedThinkingBlock + +Bases: `BaseModel` + +Redacted thinking block for previous responses without extended thinking. + +This is used as a placeholder for assistant messages that were generated +before extended thinking was enabled. + + +#### Properties + +- `data`: str +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `type`: Literal['redacted_thinking'] +### class RegistryEvent + +Bases: `BaseModel` + + +#### Properties + +- `llm`: [LLM](#class-llm) +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +### class RouterLLM + +Bases: [`LLM`](#class-llm) + +Base class for multiple LLM acting as a unified LLM. +This class provides a foundation for implementing model routing by +inheriting from LLM, allowing routers to work with multiple underlying +LLM models while presenting a unified LLM interface to consumers. +Key features: +- Works with multiple LLMs configured via llms_for_routing +- Delegates all other operations/properties to the selected LLM +- Provides routing interface through select_llm() method + + +#### Properties + +- `active_llm`: [LLM](#class-llm) | None +- `llms_for_routing`: dict[str, [LLM](#class-llm)] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `router_name`: str + +#### Methods + +#### completion() + +This method intercepts completion calls and routes them to the appropriate +underlying LLM based on the routing logic implemented in select_llm(). + +#### model_post_init() + +This function is meant to behave like a BaseModel method to initialise private attributes. + +It takes context as an argument since that’s what pydantic-core passes when calling it. + +* Parameters: + * `self` – The BaseModel instance. + * `context` – The context. + +#### abstractmethod select_llm() + +Select which LLM to use based on messages and events. + +This method implements the core routing logic for the RouterLLM. +Subclasses should analyze the provided messages to determine which +LLM from llms_for_routing is most appropriate for handling the request. + +* Parameters: + `messages` – List of messages in the conversation that can be used + to inform the routing decision. +* Returns: + The key/name of the LLM to use from llms_for_routing dictionary. + +#### classmethod set_placeholder_model() + +Guarantee model exists before LLM base validation runs. + +#### classmethod validate_llms_not_empty() + +### class TextContent + +Bases: `BaseContent` + + +#### Properties + +- `cache_prompt`: bool +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `text`: str +- `type`: Literal['text'] + +#### Methods + +#### to_llm_dict() + +Convert to LLM API format. + +### class ThinkingBlock + +Bases: `BaseModel` + +Anthropic thinking block for extended thinking feature. + +This represents the raw thinking blocks returned by Anthropic models +when extended thinking is enabled. These blocks must be preserved +and passed back to the API for tool use scenarios. + + +#### Properties + +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `signature`: str +- `thinking`: str +- `type`: Literal['thinking'] \ No newline at end of file diff --git a/sdk/api-reference/openhands.sdk.security.mdx b/sdk/api-reference/openhands.sdk.security.mdx new file mode 100644 index 00000000..41fdd321 --- /dev/null +++ b/sdk/api-reference/openhands.sdk.security.mdx @@ -0,0 +1,62 @@ +--- +title: openhands.sdk.security +description: API reference for openhands.sdk.security module +--- + + +### class SecurityRisk + +Bases: `str`, `Enum` + +Security risk levels for actions. + +Based on OpenHands security risk levels but adapted for agent-sdk. +Integer values allow for easy comparison and ordering. + + +#### Properties + +- `description`: str + Get a human-readable description of the risk level. +- `visualize`: Text + Return Rich Text representation of this risk level. + +#### Methods + +#### HIGH = 'HIGH' + +#### LOW = 'LOW' + +#### MEDIUM = 'MEDIUM' + +#### UNKNOWN = 'UNKNOWN' + +#### get_color() + +Get the color for displaying this risk level in Rich text. + +#### is_riskier() + +Check if this risk level is riskier than another. + +Risk levels follow the natural ordering: LOW is less risky than MEDIUM, which is +less risky than HIGH. UNKNOWN is not comparable to any other level. + +To make this act like a standard well-ordered domain, we reflexively consider +risk levels to be riskier than themselves. That is: + + for risk_level in list(SecurityRisk): + : assert risk_level.is_riskier(risk_level) + + # More concretely: + assert SecurityRisk.HIGH.is_riskier(SecurityRisk.HIGH) + assert SecurityRisk.MEDIUM.is_riskier(SecurityRisk.MEDIUM) + assert SecurityRisk.LOW.is_riskier(SecurityRisk.LOW) + +This can be disabled by setting the reflexive parameter to False. + +* Parameters: + other ([SecurityRisk*](#class-securityrisk)) – The other risk level to compare against. + reflexive (bool*) – Whether the relationship is reflexive. +* Raises: + `ValueError` – If either risk level is UNKNOWN. diff --git a/sdk/api-reference/openhands.sdk.tool.mdx b/sdk/api-reference/openhands.sdk.tool.mdx new file mode 100644 index 00000000..f8196235 --- /dev/null +++ b/sdk/api-reference/openhands.sdk.tool.mdx @@ -0,0 +1,352 @@ +--- +title: openhands.sdk.tool +description: API reference for openhands.sdk.tool module +--- + + +### class Action + +Bases: `Schema`, `ABC` + +Base schema for input action. + + +#### Properties + +- `kind`: str +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `visualize`: Text + Return Rich Text representation of this action. + This method can be overridden by subclasses to customize visualization. + The base implementation displays all action fields systematically. +### class ExecutableTool + +Bases: `Protocol` + +Protocol for tools that are guaranteed to have a non-None executor. + +This eliminates the need for runtime None checks and type narrowing +when working with tools that are known to be executable. + + +#### Properties + +- `executor`: [ToolExecutor](#class-toolexecutor)[Any, Any] +- `name`: str + +#### Methods + +#### __init__() + +### class FinishTool + +Bases: `ToolDefinition[FinishAction, FinishObservation]` + +Tool for signaling the completion of a task or conversation. + + +#### Properties + +- `kind`: Literal['FinishTool'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `name`: ClassVar[str] = 'finish' + +#### Methods + +#### classmethod create() + +Create FinishTool instance. + +* Parameters: + * `conv_state` – Optional conversation state (not used by FinishTool). + params* – Additional parameters (none supported). +* Returns: + A sequence containing a single FinishTool instance. +* Raises: + `ValueError` – If any parameters are provided. + +### class Observation + +Bases: `Schema`, `ABC` + +Base schema for output observation. + + +#### Properties + +- `ERROR_MESSAGE_HEADER`: ClassVar[str] = '[An error occurred during execution.]n' +- `content`: list[TextContent | ImageContent] +- `is_error`: bool +- `kind`: str +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `text`: str + Extract all text content from the observation. + * Returns: + Concatenated text from all TextContent items in content. +- `to_llm_content`: Sequence[TextContent | ImageContent] + Default content formatting for converting observation to LLM readable content. + Subclasses can override to provide richer content (e.g., images, diffs). +- `visualize`: Text + Return Rich Text representation of this observation. + Subclasses can override for custom visualization; by default we show the + same text that would be sent to the LLM. + +#### Methods + +#### classmethod from_text() + +Utility to create an Observation from a simple text string. + +* Parameters: + * `text` – The text content to include in the observation. + * `is_error` – Whether this observation represents an error. + kwargs* – Additional fields for the observation subclass. +* Returns: + An Observation instance with the text wrapped in a TextContent. + +### class ThinkTool + +Bases: `ToolDefinition[ThinkAction, ThinkObservation]` + +Tool for logging thoughts without making changes. + + +#### Properties + +- `kind`: Literal['ThinkTool'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `name`: ClassVar[str] = 'think' + +#### Methods + +#### classmethod create() + +Create ThinkTool instance. + +* Parameters: + * `conv_state` – Optional conversation state (not used by ThinkTool). + params* – Additional parameters (none supported). +* Returns: + A sequence containing a single ThinkTool instance. +* Raises: + `ValueError` – If any parameters are provided. + +### class Tool + +Bases: `BaseModel` + +Defines a tool to be initialized for the agent. + +This is only used in agent-sdk for type schema for server use. + + +#### Properties + +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `name`: str +- `params`: dict[str, Any] + +#### Methods + +#### classmethod validate_name() + +Validate that name is not empty. + +#### classmethod validate_params() + +Convert None params to empty dict. + +### class ToolAnnotations + +Bases: `BaseModel` + +Annotations to provide hints about the tool’s behavior. + +Based on Model Context Protocol (MCP) spec: +[https://github.com/modelcontextprotocol/modelcontextprotocol/blob/caf3424488b10b4a7b1f8cb634244a450a1f4400/schema/2025-06-18/schema.ts#L838](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/caf3424488b10b4a7b1f8cb634244a450a1f4400/schema/2025-06-18/schema.ts#L838) + + +#### Properties + +- `destructiveHint`: bool +- `idempotentHint`: bool +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `openWorldHint`: bool +- `readOnlyHint`: bool +- `title`: str | None +### class ToolDefinition + +Bases: `DiscriminatedUnionMixin`, `ABC`, `Generic` + +Base class for all tool implementations. + +This class serves as a base for the discriminated union of all tool types. +All tools must inherit from this class and implement the .create() method for +proper initialization with executors and parameters. + +Features: +- Normalize input/output schemas (class or dict) into both model+schema. +- Validate inputs before execute. +- Coerce outputs only if an output model is defined; else return vanilla JSON. +- Export MCP tool description. + +#### Examples + +Simple tool with no parameters: +: class FinishTool(ToolDefinition[FinishAction, FinishObservation]): + : @classmethod + def create(cls, conv_state=None, + `
` + ``` + ** + ``` + `
` + params): + `
` + > return [cls(name=”finish”, …, executor=FinishExecutor())] + +Complex tool with initialization parameters: +: class BashTool(ToolDefinition[ExecuteBashAction, ExecuteBashObservation]): + : @classmethod + def create(cls, conv_state, + `
` + ``` + ** + ``` + `
` + params): + `
` + > executor = BashExecutor( + > : working_dir=conv_state.workspace.working_dir, + > `
` + > ``` + > ** + > ``` + > `
` + > params, + `
` + > ) + > return [cls(name=”bash”, …, executor=executor)] + + +#### Properties + +- `action_type`: type[[Action](#class-action)] +- `annotations`: [ToolAnnotations](#class-toolannotations) | None +- `description`: str +- `executor`: Annotated[[ToolExecutor](#class-toolexecutor) | None, SkipJsonSchema()] +- `kind`: str +- `meta`: dict[str, Any] | None +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `name`: ClassVar[str] = '' +- `observation_type`: type[[Observation](#class-observation)] | None +- `title`: str + +#### Methods + +#### action_from_arguments() + +Create an action from parsed arguments. + +This method can be overridden by subclasses to provide custom logic +for creating actions from arguments (e.g., for MCP tools). + +* Parameters: + `arguments` – The parsed arguments from the tool call. +* Returns: + The action instance created from the arguments. + +#### as_executable() + +Return this tool as an ExecutableTool, ensuring it has an executor. + +This method eliminates the need for runtime None checks by guaranteeing +that the returned tool has a non-None executor. + +* Returns: + This tool instance, typed as ExecutableTool. +* Raises: + `NotImplementedError` – If the tool has no executor. + +#### abstractmethod classmethod create() + +Create a sequence of Tool instances. + +This method must be implemented by all subclasses to provide custom +initialization logic, typically initializing the executor with parameters +from conv_state and other optional parameters. + +* Parameters: + args** – Variable positional arguments (typically conv_state as first arg). + kwargs* – Optional parameters for tool initialization. +* Returns: + A sequence of Tool instances. Even single tools are returned as a sequence + to provide a consistent interface and eliminate union return types. + +#### classmethod resolve_kind() + +Resolve a kind string to its corresponding tool class. + +* Parameters: + `kind` – The name of the tool class to resolve +* Returns: + The tool class corresponding to the kind +* Raises: + `ValueError` – If the kind is unknown + +#### set_executor() + +Create a new Tool instance with the given executor. + +#### to_mcp_tool() + +Convert a Tool to an MCP tool definition. + +Allow overriding input/output schemas (usually by subclasses). + +* Parameters: + * `input_schema` – Optionally override the input schema. + * `output_schema` – Optionally override the output schema. + +#### to_openai_tool() + +Convert a Tool to an OpenAI tool. + +* Parameters: + * `add_security_risk_prediction` – Whether to add a security_risk field + to the action schema for LLM to predict. This is useful for + tools that may have safety risks, so the LLM can reason about + the risk level before calling the tool. + * `action_type` – Optionally override the action_type to use for the schema. + This is useful for MCPTool to use a dynamically created action type + based on the tool’s input schema. + +#### to_responses_tool() + +Convert a Tool to a Responses API function tool (LiteLLM typed). + +For Responses API, function tools expect top-level keys: +(JSON configuration object) + +### class ToolExecutor + +Bases: `ABC`, `Generic` + +Executor function type for a Tool. + +#### Methods + +#### close() + +Close the executor and clean up resources. + +Default implementation does nothing. Subclasses should override +this method to perform cleanup (e.g., closing connections, +terminating processes, etc.). diff --git a/sdk/api-reference/openhands.sdk.utils.mdx b/sdk/api-reference/openhands.sdk.utils.mdx new file mode 100644 index 00000000..1a9c9b0f --- /dev/null +++ b/sdk/api-reference/openhands.sdk.utils.mdx @@ -0,0 +1,21 @@ +--- +title: openhands.sdk.utils +description: API reference for openhands.sdk.utils module +--- + + +Utility functions for the OpenHands SDK. + +### maybe_truncate() + +Truncate the middle of content if it exceeds the specified length. + +Keeps the head and tail of the content to preserve context at both ends. + +* Parameters: + * `content` – The text content to potentially truncate + * `truncate_after` – Maximum length before truncation. If None, no truncation occurs + * `truncate_notice` – Notice to insert in the middle when content is truncated +* Returns: + Original content if under limit, or truncated content with head and tail + preserved diff --git a/sdk/api-reference/openhands.sdk.workspace.mdx b/sdk/api-reference/openhands.sdk.workspace.mdx new file mode 100644 index 00000000..a339a717 --- /dev/null +++ b/sdk/api-reference/openhands.sdk.workspace.mdx @@ -0,0 +1,358 @@ +--- +title: openhands.sdk.workspace +description: API reference for openhands.sdk.workspace module +--- + + +### class BaseWorkspace + +Bases: `DiscriminatedUnionMixin`, `ABC` + +Abstract base class for workspace implementations. + +Workspaces provide a sandboxed environment where agents can execute commands, +read/write files, and perform other operations. All workspace implementations +support the context manager protocol for safe resource management. + +#### Example + +```pycon +>>> with workspace: +... result = workspace.execute_command("echo 'hello'") +... content = workspace.read_file("example.txt") +``` + + +#### Properties + +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `working_dir`: str + +#### Methods + +#### abstractmethod execute_command() + +Execute a bash command on the system. + +* Parameters: + * `command` – The bash command to execute + * `cwd` – Working directory for the command (optional) + * `timeout` – Timeout in seconds (defaults to 30.0) +* Returns: + Result containing stdout, stderr, exit_code, and other + : metadata +* Return type: + [CommandResult](#class-commandresult) +* Raises: + `Exception` – If command execution fails + +#### abstractmethod file_download() + +Download a file from the system. + +* Parameters: + * `source_path` – Path to the source file on the system + * `destination_path` – Path where the file should be downloaded +* Returns: + Result containing success status and metadata +* Return type: + [FileOperationResult](#class-fileoperationresult) +* Raises: + `Exception` – If file download fails + +#### abstractmethod file_upload() + +Upload a file to the system. + +* Parameters: + * `source_path` – Path to the source file + * `destination_path` – Path where the file should be uploaded +* Returns: + Result containing success status and metadata +* Return type: + [FileOperationResult](#class-fileoperationresult) +* Raises: + `Exception` – If file upload fails + +#### abstractmethod git_changes() + +Get the git changes for the repository at the path given. + +* Parameters: + `path` – Path to the git repository +* Returns: + List of changes +* Return type: + list[GitChange] +* Raises: + `Exception` – If path is not a git repository or getting changes failed + +#### abstractmethod git_diff() + +Get the git diff for the file at the path given. + +* Parameters: + `path` – Path to the file +* Returns: + Git diff +* Return type: + GitDiff +* Raises: + `Exception` – If path is not a git repository or getting diff failed + +### class CommandResult + +Bases: `BaseModel` + +Result of executing a command in the workspace. + + +#### Properties + +- `command`: str +- `exit_code`: int +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `stderr`: str +- `stdout`: str +- `timeout_occurred`: bool +### class FileOperationResult + +Bases: `BaseModel` + +Result of a file upload or download operation. + + +#### Properties + +- `destination_path`: str +- `error`: str | None +- `file_size`: int | None +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `source_path`: str +- `success`: bool +### class LocalWorkspace + +Bases: [`BaseWorkspace`](#class-baseworkspace) + +Local workspace implementation that operates on the host filesystem. + +LocalWorkspace provides direct access to the local filesystem and command execution +environment. It’s suitable for development and testing scenarios where the agent +should operate directly on the host system. + +#### Example + +```pycon +>>> workspace = LocalWorkspace(working_dir="/path/to/project") +>>> with workspace: +... result = workspace.execute_command("ls -la") +... content = workspace.read_file("README.md") +``` + + +#### Properties + +- `kind`: Literal['LocalWorkspace'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `working_dir`: str + +#### Methods + +#### execute_command() + +Execute a bash command locally. + +Uses the shared shell execution utility to run commands with proper +timeout handling, output streaming, and error management. + +* Parameters: + * `command` – The bash command to execute + * `cwd` – Working directory (optional) + * `timeout` – Timeout in seconds +* Returns: + Result with stdout, stderr, exit_code, command, and + : timeout_occurred +* Return type: + [CommandResult](#class-commandresult) + +#### file_download() + +Download (copy) a file locally. + +For local systems, file download is implemented as a file copy operation +using shutil.copy2 to preserve metadata. + +* Parameters: + * `source_path` – Path to the source file + * `destination_path` – Path where the file should be copied +* Returns: + Result with success status and file information +* Return type: + [FileOperationResult](#class-fileoperationresult) + +#### file_upload() + +Upload (copy) a file locally. + +For local systems, file upload is implemented as a file copy operation +using shutil.copy2 to preserve metadata. + +* Parameters: + * `source_path` – Path to the source file + * `destination_path` – Path where the file should be copied +* Returns: + Result with success status and file information +* Return type: + [FileOperationResult](#class-fileoperationresult) + +#### git_changes() + +Get the git changes for the repository at the path given. + +* Parameters: + `path` – Path to the git repository +* Returns: + List of changes +* Return type: + list[GitChange] +* Raises: + `Exception` – If path is not a git repository or getting changes failed + +#### git_diff() + +Get the git diff for the file at the path given. + +* Parameters: + `path` – Path to the file +* Returns: + Git diff +* Return type: + GitDiff +* Raises: + `Exception` – If path is not a git repository or getting diff failed + +### class RemoteWorkspace + +Bases: `RemoteWorkspaceMixin`, [`BaseWorkspace`](#class-baseworkspace) + +Remote workspace implementation that connects to an OpenHands agent server. + +RemoteWorkspace provides access to a sandboxed environment running on a remote +OpenHands agent server. This is the recommended approach for production deployments +as it provides better isolation and security. + +#### Example + +```pycon +>>> workspace = RemoteWorkspace( +... host="https://agent-server.example.com", +... working_dir="/workspace" +... ) +>>> with workspace: +... result = workspace.execute_command("ls -la") +... content = workspace.read_file("README.md") +``` + + +#### Properties + +- `api_key`: str | None +- `client`: Client +- `host`: str +- `kind`: Literal['RemoteWorkspace'] +- `model_config`: ClassVar[ConfigDict] = (configuration object) + Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict]. +- `working_dir`: str + +#### Methods + +#### execute_command() + +Execute a bash command on the remote system. + +This method starts a bash command via the remote agent server API, +then polls for the output until the command completes. + +* Parameters: + * `command` – The bash command to execute + * `cwd` – Working directory (optional) + * `timeout` – Timeout in seconds +* Returns: + Result with stdout, stderr, exit_code, and other metadata +* Return type: + [CommandResult](#class-commandresult) + +#### file_download() + +Download a file from the remote system. + +Requests the file from the remote system via HTTP API and saves it locally. + +* Parameters: + * `source_path` – Path to the source file on remote system + * `destination_path` – Path where the file should be saved locally +* Returns: + Result with success status and metadata +* Return type: + [FileOperationResult](#class-fileoperationresult) + +#### file_upload() + +Upload a file to the remote system. + +Reads the local file and sends it to the remote system via HTTP API. + +* Parameters: + * `source_path` – Path to the local source file + * `destination_path` – Path where the file should be uploaded on remote system +* Returns: + Result with success status and metadata +* Return type: + [FileOperationResult](#class-fileoperationresult) + +#### git_changes() + +Get the git changes for the repository at the path given. + +* Parameters: + `path` – Path to the git repository +* Returns: + List of changes +* Return type: + list[GitChange] +* Raises: + `Exception` – If path is not a git repository or getting changes failed + +#### git_diff() + +Get the git diff for the file at the path given. + +* Parameters: + `path` – Path to the file +* Returns: + Git diff +* Return type: + GitDiff +* Raises: + `Exception` – If path is not a git repository or getting diff failed + +#### model_post_init() + +Override this method to perform additional initialization after __init__ and model_construct. +This is useful if you want to do some validation that requires the entire model to be initialized. + +### class Workspace + +### class Workspace + +Bases: `object` + +Factory entrypoint that returns a LocalWorkspace or RemoteWorkspace. + +Usage: +: - Workspace(working_dir=…) -> LocalWorkspace + - Workspace(working_dir=…, host=”http://…”) -> RemoteWorkspace