From 3cf072bfd0d2115fd61293a5ec52b9eaa2251e40 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 4 May 2026 15:01:31 +0000 Subject: [PATCH] docs: add QA Changes use case and SDK guide MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add documentation for the new QA Changes plugin that validates PR changes by actually running the code — setting up the environment, exercising changed behavior, and posting structured QA reports. New pages: - Use Case: openhands/usage/use-cases/qa-changes.mdx - SDK Guide: sdk/guides/github-workflows/qa-changes.mdx Updated pages: - docs.json: Added navigation entries for both new pages - overview.mdx: Added QA Changes card to use cases overview Related: OpenHands/software-agent-sdk#2924 Related: OpenHands/extensions#135 Co-authored-by: openhands --- docs.json | 2 + openhands/usage/use-cases/overview.mdx | 7 + openhands/usage/use-cases/qa-changes.mdx | 339 +++++++++++++++++++++ sdk/guides/github-workflows/qa-changes.mdx | 207 +++++++++++++ 4 files changed, 555 insertions(+) create mode 100644 openhands/usage/use-cases/qa-changes.mdx create mode 100644 sdk/guides/github-workflows/qa-changes.mdx diff --git a/docs.json b/docs.json index f18fd5a6..b91f17bf 100644 --- a/docs.json +++ b/docs.json @@ -186,6 +186,7 @@ "pages": [ "openhands/usage/use-cases/vulnerability-remediation", "openhands/usage/use-cases/code-review", + "openhands/usage/use-cases/qa-changes", "openhands/usage/use-cases/incident-triage", "openhands/usage/use-cases/cobol-modernization", "openhands/usage/use-cases/dependency-upgrades", @@ -283,6 +284,7 @@ "pages": [ "sdk/guides/github-workflows/assign-reviews", "sdk/guides/github-workflows/pr-review", + "sdk/guides/github-workflows/qa-changes", "sdk/guides/github-workflows/todo-management" ] } diff --git a/openhands/usage/use-cases/overview.mdx b/openhands/usage/use-cases/overview.mdx index 8229b463..21edb8e1 100644 --- a/openhands/usage/use-cases/overview.mdx +++ b/openhands/usage/use-cases/overview.mdx @@ -22,6 +22,13 @@ Each use case can be implemented in different ways—as a one-off conversation, > Set up automated PR reviews to maintain code quality and catch bugs early. + + Validate PR changes by running the code — exercise behavior as a real user would. + - + Validate PR changes by actually running the code — not just reading it. +--- + + + Check out the complete QA Changes plugin with ready-to-use code and configuration. + + +Automated QA validation goes beyond code review by **actually running the code** to verify PR changes work as described. While [code review](/openhands/usage/use-cases/code-review) reads diffs and posts inline comments, QA validation sets up the environment, exercises changed behavior as a real user would, and posts a structured QA report. + +## Overview + +The OpenHands QA Changes workflow is a GitHub Actions workflow that: + +- **Triggers automatically** when PRs are opened, marked ready for review, or on demand +- **Sets up the environment** — installs dependencies, builds the project +- **Exercises changed behavior** — runs CLI commands, makes HTTP requests, opens browsers +- **Posts a structured QA report** with evidence and a clear verdict + +## How It Differs from Code Review + +| Aspect | Code Review | QA Changes | +|--------|-------------|------------| +| Method | Reads the diff | Runs the code | +| Speed | 2-3 minutes | 5-15 minutes | +| Catches | Style, security, logic issues | Regressions, broken features, build failures | +| Output | Inline code comments | Structured QA report with evidence | + +Use both together for comprehensive PR validation: code review catches issues in the code itself, while QA validation catches issues in how the code behaves. + +## How It Works + +The QA agent follows a four-phase methodology: + +1. **Understand** — Reads the PR diff, title, and description. Classifies changes and identifies entry points (CLI commands, API endpoints, UI pages). +2. **Setup** — Bootstraps the repo: installs dependencies, builds the project. Notes CI status but does not re-run tests. +3. **Exercise** — The core phase. Actually uses the software the way a human would: spins up servers, opens browsers, runs CLI commands, makes HTTP requests. Focuses on functional verification that CI and code review cannot do. +4. **Report** — Posts a structured QA report as a PR comment with evidence (commands, outputs, screenshots) and a verdict. + +The agent sets a high bar: if the PR changes a web UI, it spins up the server and verifies it in a real browser. If it changes a CLI, it runs the CLI with real inputs. It does not settle for "the tests pass" — it actually uses the software. + +## Quick Start + + + + Create `.github/workflows/qa-changes-by-openhands.yml` in your repository: + + ```yaml + name: QA Changes by OpenHands + + on: + pull_request: + types: [opened, ready_for_review, labeled, review_requested] + + permissions: + contents: read + pull-requests: write + issues: write + + jobs: + qa-changes: + if: | + (github.event.action == 'opened' + && github.event.pull_request.draft == false + && github.event.pull_request.author_association != 'FIRST_TIME_CONTRIBUTOR' + && github.event.pull_request.author_association != 'NONE') + || (github.event.action == 'ready_for_review' + && github.event.pull_request.author_association != 'FIRST_TIME_CONTRIBUTOR' + && github.event.pull_request.author_association != 'NONE') + || github.event.label.name == 'qa-this' + || github.event.requested_reviewer.login == 'openhands-agent' + concurrency: + group: qa-changes-${{ github.event.pull_request.number }} + cancel-in-progress: true + runs-on: ubuntu-24.04 + timeout-minutes: 30 + steps: + - name: Run QA Changes + uses: OpenHands/extensions/plugins/qa-changes@main + with: + llm-model: anthropic/claude-sonnet-4-5-20250929 + max-budget: '10.0' + timeout-minutes: '30' + max-iterations: '500' + llm-api-key: ${{ secrets.LLM_API_KEY }} + github-token: ${{ secrets.GITHUB_TOKEN }} + ``` + + + + Go to your repository's **Settings → Secrets and variables → Actions** and add: + - **`LLM_API_KEY`**: Your LLM API key (get one from [OpenHands LLM Provider](/openhands/usage/llms/openhands-llms)) + + + + Create a `qa-this` label in your repository: + 1. Go to **Issues → Labels** + 2. Click **New label** + 3. Name: `qa-this` + 4. Description: `Trigger OpenHands QA validation` + + + + Open a PR and either: + - Add the `qa-this` label, OR + - Request `openhands-agent` as a reviewer + + + +## Composite Action + +The workflow uses a reusable composite action from the [extensions repository](https://github.com/OpenHands/extensions/tree/main/plugins/qa-changes) that handles: + +- Checking out the extensions repository and PR code +- Setting up Python and dependencies +- Running the QA agent inside the PR repository +- Uploading logs and trace artifacts + +### Action Inputs + +| Input | Required | Default | Description | +|-------|----------|---------|-------------| +| `llm-model` | No | `anthropic/claude-sonnet-4-5-20250929` | LLM model to use | +| `llm-base-url` | No | `''` | Custom LLM endpoint URL | +| `extensions-repo` | No | `OpenHands/extensions` | Extensions repository | +| `extensions-version` | No | `main` | Git ref (tag, branch, or SHA) | +| `max-budget` | No | `10.0` | Maximum LLM cost in dollars — agent stops when exceeded | +| `timeout-minutes` | No | `30` | Wall-clock timeout for the QA step | +| `max-iterations` | No | `500` | Maximum agent iterations (each is one LLM call + action) | +| `llm-api-key` | Yes | - | LLM API key | +| `github-token` | Yes | - | GitHub token for API access | +| `lmnr-api-key` | No | `''` | Laminar API key for observability | + + +Use `extensions-version` to pin to a specific version tag (e.g., `v1.0.0`) for production stability, or use `main` to always get the latest features. + + +## QA Report Format + +The agent posts a structured QA report as a PR comment. Reports are designed to be **scannable** — a reviewer can grasp the verdict in under 10 seconds, with detailed evidence available in collapsible sections. + +```markdown +## ✅ QA Report: PASS + +All changed behavior verified successfully. + +### Does this PR achieve its stated goal? + +Yes. The new CLI flag `--format json` produces valid JSON output +for all tested commands. + +| Phase | Result | +|-------|--------| +| Environment Setup | ✅ Dependencies installed, project built | +| CI Status | ✅ All checks passing | +| Functional Verification | ✅ 3/3 verifications passed | + +
Functional Verification +[Detailed evidence with commands, outputs, and interpretation] +
+ +### Issues Found + +None. +``` + +### Verdict Values + +- ✅ **PASS**: Change works as described, no regressions. +- ⚠️ **PASS WITH ISSUES**: Change mostly works, but issues were found. +- ❌ **FAIL**: Change does not work as described, or introduces regressions. +- 🟡 **PARTIAL**: Some behavior verified, some could not be verified. + +## Customization + +### Repository-Specific QA Guidelines + +Add project-specific QA guidelines by creating a skill file at `.agents/skills/qa-guide.md`: + +```markdown +--- +name: qa-guide +description: Project-specific QA guidelines +triggers: +- /qa-changes +--- + +# Project QA Guidelines + +## Setup Commands +- `make install` to install dependencies +- `make build` to build the project + +## How to Run the App +- `make serve` to start the dev server on port 8080 +- `python -m myapp --help` for CLI usage + +## Key Behaviors to Verify +- User authentication flow works end-to-end +- API responses include correct pagination headers +- Dashboard loads within 3 seconds +``` + + +The skill file must use `/qa-changes` as the trigger so it activates alongside the default QA behavior. + + +### Using AGENTS.md + +You can also add setup and verification guidance to `AGENTS.md` at your repository root. The QA agent reads this file automatically and uses it to understand how to build, run, and test your project. + +### Workflow Configuration + +Customize the workflow by modifying the action inputs: + +```yaml +- name: Run QA Changes + uses: OpenHands/extensions/plugins/qa-changes@main + with: + # Change the LLM model + llm-model: anthropic/claude-sonnet-4-5-20250929 + # Use a custom LLM endpoint + llm-base-url: https://your-llm-proxy.example.com + # Increase budget for complex projects + max-budget: '20.0' + # Allow more time for large repos + timeout-minutes: '45' + # Pin to a specific extensions version + extensions-version: main + # Secrets + llm-api-key: ${{ secrets.LLM_API_KEY }} + github-token: ${{ secrets.GITHUB_TOKEN }} +``` + +### Trigger Customization + +Modify when QA runs by editing the workflow conditions: + +```yaml +# Only trigger on label (disable auto-QA on PR open) +if: github.event.label.name == 'qa-this' + +# Only trigger when specific reviewer is requested +if: github.event.requested_reviewer.login == 'openhands-agent' + +# Trigger on all PRs (including drafts) +if: | + github.event.action == 'opened' || + github.event.action == 'synchronize' +``` + +## Security Considerations + +The workflow uses `pull_request` (not `pull_request_target`) so that fork PRs do **not** get access to the base repository's secrets. Since the QA agent *executes code* from the PR, using `pull_request_target` would allow untrusted fork code to run with the repo's `GITHUB_TOKEN` and `LLM_API_KEY`. + + +**Important**: Unlike code review which only reads diffs, QA validation **executes code** from the PR. The `FIRST_TIME_CONTRIBUTOR` and `NONE` author associations are excluded from automatic triggers as an additional safety layer. Only trusted contributors' PRs are automatically validated. + + +The trade-off is that fork PRs won't have access to repository secrets. The action detects this case and exits successfully with a clear skip notice instead of failing. Maintainers can run QA locally for fork PRs. + +## QA Evaluation (Optional) + +The plugin includes an optional evaluation workflow that assesses QA effectiveness when PRs are closed. This helps you understand how well the QA agent is performing over time. + +To enable evaluation, add a second workflow file (`.github/workflows/qa-changes-evaluation.yml`) that runs on `pull_request_target: [closed]` and uses the evaluation script from the extensions repository. See the [plugin documentation](https://github.com/OpenHands/extensions/tree/main/plugins/qa-changes) for the complete evaluation workflow. + +## Troubleshooting + + + + - Ensure the `LLM_API_KEY` secret is set correctly + - Check that the label name matches exactly (`qa-this`) + - Verify the workflow file is in `.github/workflows/` + - Check the Actions tab for workflow run errors + - For fork PRs, QA is intentionally skipped (see Security section) + + + + - Ensure `GITHUB_TOKEN` has `pull-requests: write` permission + - Check the workflow logs for API errors + - The agent may still be running — check the Actions tab for in-progress workflows + + + + - Add setup instructions to your `AGENTS.md` file + - Create a custom QA skill with specific build commands (see Customization section) + - Check that your project's dependencies are compatible with Ubuntu 24.04 + + + + - Increase `timeout-minutes` and `max-budget` for complex projects + - Add specific verification guidance in AGENTS.md to help the agent focus + - Consider which PRs truly need QA — use the `qa-this` label for selective triggering instead of auto-triggering on all PRs + + + + - This is expected for features requiring external services, credentials, or special hardware + - The agent will report what it could not verify and suggest AGENTS.md improvements + - Add guidance to your QA skill or AGENTS.md to help future runs succeed + + + +## Automate This + +You can schedule periodic QA runs using [OpenHands Automations](/openhands/usage/automations/overview). +Copy this prompt into a new conversation to set one up: + +``` +Create an automation called "Weekly QA Validation" that runs every Monday at 10 AM. + +It should: +1. Find all open PRs that have been updated in the last week +2. For each PR, check if it has a QA report already +3. For PRs without QA reports, add the "qa-this" label to trigger validation + +Learn more at https://docs.openhands.dev/openhands/usage/use-cases/qa-changes +``` + +For automated QA on every PR, use the +[qa-changes plugin](https://github.com/OpenHands/extensions/tree/main/plugins/qa-changes) +as a GitHub Action instead. + +## Related Resources + +- [QA Changes Plugin](https://github.com/OpenHands/extensions/tree/main/plugins/qa-changes) - Full plugin with workflow, action, and scripts +- [QA Changes SDK Guide](/sdk/guides/github-workflows/qa-changes) - SDK-level documentation and configuration reference +- [Automated Code Review](/openhands/usage/use-cases/code-review) - Complement QA with automated code review +- [Software Agent SDK](/sdk/index) - Build your own AI-powered workflows +- [Skills Documentation](/overview/skills) - Learn more about OpenHands skills diff --git a/sdk/guides/github-workflows/qa-changes.mdx b/sdk/guides/github-workflows/qa-changes.mdx new file mode 100644 index 00000000..ec93a613 --- /dev/null +++ b/sdk/guides/github-workflows/qa-changes.mdx @@ -0,0 +1,207 @@ +--- +title: QA Changes +description: Automated QA validation of PR changes using OpenHands Agent +--- + +> The reference workflow is available [here](#reference-workflow)! + +Automatically validate pull request changes by running the code — setting up the environment, exercising changed behavior, and posting a structured QA report. Validations can be triggered in two ways: +- Adding the `qa-this` label to the PR +- Requesting `openhands-agent` as a reviewer + + +The reference workflow also triggers automatically on PR open and ready-for-review events for trusted contributors. `FIRST_TIME_CONTRIBUTOR` and `NONE` author associations are excluded for security, since the QA agent executes code from the PR. + + +## Quick Start + +```bash +# 1. Copy workflow to your repository +cp examples/03_github_workflows/05_qa_changes/workflow.yml \ + .github/workflows/qa-changes-by-openhands.yml + +# 2. Configure secrets in GitHub Settings → Secrets +# Add: LLM_API_KEY + +# 3. (Optional) Create a "qa-this" label in your repository +# Go to Issues → Labels → New label +# You can also trigger QA by requesting "openhands-agent" as a reviewer +``` + +## Features + +- **Runs the Code** — Goes beyond reading diffs to actually execute the software +- **Four-Phase Methodology** — Understand → Setup → Exercise → Report +- **Structured Reports** — Posts QA reports with evidence, commands, outputs, and a clear verdict +- **Smart Retries** — Tries multiple approaches before giving up, then reports honestly +- **Customizable** — Add project-specific QA guidelines via skills or AGENTS.md + +## How It Differs from PR Review + +| Aspect | PR Review | QA Changes | +|--------|-----------|------------| +| Method | Reads the diff | Runs the code | +| Speed | 2-3 minutes | 5-15 minutes | +| Catches | Style, security, logic issues | Regressions, broken features, build failures | +| Output | Inline code comments | Structured QA report with evidence | + +## Security + +- The workflow uses `pull_request` (not `pull_request_target`) since the QA agent executes code +- Fork PRs are automatically skipped with a clear notice (no access to repository secrets) +- `FIRST_TIME_CONTRIBUTOR` and `NONE` author associations are excluded from automatic triggers +- Maintainers can trigger QA for any PR using the `qa-this` label + +## Customizing QA Behavior + +Instead of forking the agent script, you can customize QA behavior by adding a skill file to your repository. This is the **recommended approach** for customization. + +### How It Works + +The QA agent uses skills from the [OpenHands/extensions](https://github.com/OpenHands/extensions) repository by default. You can add project-specific guidelines alongside the default skill by creating a custom skill file. + + +**Skill paths**: Place skills in `.agents/skills/` (recommended). The legacy path `.openhands/skills/` is also supported. See [Skill Loading Precedence](/overview/skills#skill-loading-precedence) for details. + + +### Example: Custom QA Skill + +Create `.agents/skills/qa-guide.md` in your repository: + +```markdown +--- +name: qa-guide +description: Project-specific QA guidelines for MyProject +triggers: +- /qa-changes +--- + +# MyProject QA Guidelines + +In addition to general QA methodology, use these project-specific instructions: + +## Setup Commands +- `make install` to install dependencies +- `make build` to build the project + +## How to Run the App +- `make serve` to start the dev server on port 8080 +- The API is available at http://localhost:8080/api/v1 +- `python -m myapp --help` for CLI usage + +## Key Behaviors to Verify +- User login/signup flow works end-to-end +- API responses include correct pagination headers +- Dashboard renders within 3 seconds + +## Known Limitations +- OAuth login requires external service — skip if unavailable +- Email sending is mocked in dev — verify the mock is called +``` + + +**Note**: These rules supplement the default `qa-changes` skill, not replace it. + + + +**How skill merging works**: Using a unique name like `qa-guide` allows BOTH your custom skill AND the default `qa-changes` skill to be triggered by `/qa-changes`. When triggered, skill content is concatenated into the agent's context. There is no smart merging — if guidelines conflict, the agent sees both and must reconcile them. + +If your skill has `name: qa-changes` (matching the default skill's name), it will completely **override** the default skill instead of supplementing it. + + +### Benefits of Custom Skills + +1. **No forking required**: Keep using the official plugin while customizing behavior +2. **Version controlled**: Your QA guidelines live in your repository +3. **Easy updates**: Plugin updates don't overwrite your customizations +4. **Team alignment**: Everyone uses the same QA standards +5. **Composable**: Add project-specific rules alongside default methodology + +## Reference Workflow + + +The QA Changes plugin is available in the extensions repository: [OpenHands/extensions/plugins/qa-changes](https://github.com/OpenHands/extensions/tree/main/plugins/qa-changes) + + +```yaml +--- +# OpenHands QA Changes Workflow +# +# To set this up: +# 1. Copy this file to .github/workflows/qa-changes-by-openhands.yml +# 2. Add LLM_API_KEY to repository secrets +# 3. Customize the inputs below as needed +# 4. Commit this file to your repository +# 5. Trigger QA by either: +# - Adding the "qa-this" label to any PR, OR +# - Requesting openhands-agent as a reviewer +# +# For more information, see: +# https://github.com/OpenHands/extensions/tree/main/plugins/qa-changes +name: QA Changes by OpenHands + +on: + pull_request: + types: [opened, ready_for_review, labeled, review_requested] + +permissions: + contents: read + pull-requests: write + issues: write + +jobs: + qa-changes: + if: | + (github.event.action == 'opened' + && github.event.pull_request.draft == false + && github.event.pull_request.author_association != 'FIRST_TIME_CONTRIBUTOR' + && github.event.pull_request.author_association != 'NONE') + || (github.event.action == 'ready_for_review' + && github.event.pull_request.author_association != 'FIRST_TIME_CONTRIBUTOR' + && github.event.pull_request.author_association != 'NONE') + || github.event.label.name == 'qa-this' + || github.event.requested_reviewer.login == 'openhands-agent' + concurrency: + group: qa-changes-${{ github.event.pull_request.number }} + cancel-in-progress: true + runs-on: ubuntu-24.04 + timeout-minutes: 30 + steps: + - name: Run QA Changes + uses: OpenHands/extensions/plugins/qa-changes@main + with: + llm-model: anthropic/claude-sonnet-4-5-20250929 + max-budget: '10.0' + timeout-minutes: '30' + max-iterations: '500' + llm-api-key: ${{ secrets.LLM_API_KEY }} + github-token: ${{ secrets.GITHUB_TOKEN }} +``` + +### Action Inputs + +| Input | Description | Required | Default | +|-------|-------------|----------|---------| +| `llm-model` | LLM model to use for QA validation | No | `anthropic/claude-sonnet-4-5-20250929` | +| `llm-base-url` | LLM base URL (for custom endpoints) | No | `''` | +| `extensions-repo` | Extensions repository (owner/repo) | No | `OpenHands/extensions` | +| `extensions-version` | Git ref for extensions (tag, branch, or commit SHA) | No | `main` | +| `max-budget` | Maximum LLM cost in dollars — agent stops when exceeded | No | `10.0` | +| `timeout-minutes` | Wall-clock timeout for the QA step | No | `30` | +| `max-iterations` | Maximum agent iterations (each is one LLM call + action) | No | `500` | +| `llm-api-key` | LLM API key | Yes | - | +| `github-token` | GitHub token for API access | Yes | - | +| `lmnr-api-key` | Laminar API key for observability | No | `''` | + + +Use `extensions-version` to pin to a specific version tag (e.g., `v1.0.0`) for production stability, or use `main` to always get the latest features. + + +## Related Files + +- [QA Changes Plugin](https://github.com/OpenHands/extensions/tree/main/plugins/qa-changes) - Complete plugin with scripts and skills (in extensions repo) +- [Agent Script](https://github.com/OpenHands/extensions/blob/main/plugins/qa-changes/scripts/agent_script.py) - Main QA agent script +- [Prompt Template](https://github.com/OpenHands/extensions/blob/main/plugins/qa-changes/scripts/prompt.py) - QA prompt template +- [QA Skill](https://github.com/OpenHands/extensions/blob/main/skills/qa-changes/SKILL.md) - QA methodology skill +- [Example Workflow](https://github.com/OpenHands/extensions/blob/main/plugins/qa-changes/workflows/qa-changes-by-openhands.yml) - Example workflow +- [Composite Action](https://github.com/OpenHands/extensions/blob/main/plugins/qa-changes/action.yml) - Reusable GitHub Action