diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index 5a9b26b..0f04006 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -36,7 +36,7 @@ Contribution guidelines live in `CONTRIBUTING.md` at the repo root.
   - Local evaluation via `azure-ai-evaluation` SDK (fallback)
 - **Secondary backend**: subprocess-based (generic)
 - **Azure SDK dependencies** (runtime, for Foundry backend):
-  - `azure-ai-projects>=2.0.0b1` — Foundry project client, `get_openai_client()`
+  - `azure-ai-projects>=2.0.1` — Foundry project client, `get_openai_client()`
   - `azure-ai-evaluation` — Local evaluator classes (SimilarityEvaluator, etc.)
   - `azure-identity` — `DefaultAzureCredential` authentication
   - `openai` — Evals API types (`DataSourceConfigCustom`, etc.)
@@ -233,7 +233,7 @@ Do not implement the following unless explicitly discussed:
 
 This repository also defines workflow-oriented Copilot skills under `.github/skills/`.
 
-- Use these skills for operational guidance on running evaluations, investigating regressions, and observability triage workflows.
+- Use these skills for operational guidance on running evaluations, investigating regressions, observability triage, and release management workflows.
 - Treat the CLI as the source of truth and keep planned/stubbed commands clearly marked as not yet implemented.
 - Do not duplicate architecture or code-structure guidance from this file inside workflow skills.
 
@@ -241,6 +241,7 @@ When generating or modifying code:
 
 - **Read `docs/how-it-works.md` first** — it is the single source of truth for architecture
 - **Read `CONTRIBUTING.md`** for contribution rules and workflow
+- Treat the CLI as the source of truth and keep planned/stubbed commands clearly marked as not yet implemented.
 - Do not invent new concepts or commands
 - Prefer clarity and determinism over cleverness
 - Optimize for maintainability and CI usage
diff --git a/.github/extensions/agentops-skills/extension.mjs b/.github/extensions/agentops-skills/extension.mjs
new file mode 100644
index 0000000..5901e92
--- /dev/null
+++ b/.github/extensions/agentops-skills/extension.mjs
@@ -0,0 +1,149 @@
+// Extension: agentops-skills
+// Injects AgentOps workflow skills as context when relevant prompts are detected.
+
+import { joinSession } from "@github/copilot-sdk/extension";
+
+const SKILLS = {
+    "run-evals": {
+        keywords: [
+            "run eval", "start agentops", "run.yaml", "regenerate report",
+            "evaluation results", "agentops init", "agentops eval", "agentops report",
+            "run an evaluation", "initialize agentops", "results.json", "report.md",
+            "eval run", "run config", "evaluation output",
+        ],
+        context: `## Skill: Run Evaluations
+
+### Purpose
+Guide through the implemented AgentOps evaluation workflow from workspace setup to report interpretation.
+
+### Available Commands
+- agentops init [--path <dir>] — Initialize workspace
+- agentops eval run — Execute evaluation
+- agentops report — Regenerate report from results.json
+
+### Typical Workflow
+1. Initialize workspace: agentops init
+2. Confirm run config exists (.agentops/run.yaml)
+3. Execute evaluation: agentops eval run
+4. Regenerate markdown report: agentops report
+5. Inspect outputs under .agentops/results/latest/
+
+### Outputs
+- results.json (machine-readable normalized results)
+- report.md (human-readable summary)
+- cloud_evaluation.json (cloud evaluation flows only)
+- Latest pointers: .agentops/results/latest/
+
+### Interpretation
+- Start with report.md for quick pass/fail narrative and threshold view.
+- Use results.json for metric-level details, row-level checks, and automation.
+- Distinguish: thresholds passing, threshold failures, runtime/config errors.
+
+### Guardrails
+- Do not invent commands or flags beyond documented CLI behavior.
+- Planned commands (compare, run-history) are stubbed — pivot to artifact inspection.`,
+    },
+
+    "investigate-regression": {
+        keywords: [
+            "regression", "score dropped", "threshold started failing",
+            "compare runs", "eval got worse", "debug evaluation",
+            "evaluation drift", "quality drop", "pass rate dropped",
+            "ci failing", "scores lower", "metrics degraded",
+        ],
+        context: `## Skill: Investigate Regression
+
+### Purpose
+Guide through regression investigation using currently available AgentOps outputs.
+
+### Available Commands
+- agentops eval run — Generate fresh artifacts
+- agentops report — Regenerate report
+
+### Planned (not implemented)
+- agentops eval compare --runs ID1,ID2
+
+### Investigation Steps
+1. Run fresh evaluation: agentops eval run
+2. Regenerate report: agentops report
+3. Compare current artifacts to baseline manually
+4. Report factual deltas, then propose controlled next steps
+
+### Required Inputs
+- At least one recent artifact set (results.json + report.md)
+- Preferably a baseline for side-by-side comparison
+- Context about what changed (prompt, model, dataset, bundle, backend, environment)
+
+### Interpretation
+- Separate observations (artifact-backed) from hypotheses (plausible causes).
+- Prioritize impact: which thresholds flipped, which metrics degraded most, broad vs concentrated failures.
+- End with actionable next checks (rerun with controlled changes, validate dataset, verify config).
+
+### Guardrails
+- agentops eval compare is NOT implemented — use manual artifact comparison.
+- Do not infer causality from correlation alone.
+- Keep remediation tied to reproducible checks.`,
+    },
+
+    "observability-triage": {
+        keywords: [
+            "tracing", "monitoring", "dashboard", "alerts", "triage",
+            "observability", "run health", "production triage",
+            "monitor evals", "set up tracing", "failed evaluation",
+            "quality monitoring",
+        ],
+        context: `## Skill: Observability Triage
+
+### Purpose
+Provide honest observability guidance: use current reporting artifacts today, frame tracing/monitoring as planned future work.
+
+### Available Commands (for triage today)
+- agentops eval run
+- agentops report
+
+### Planned/Stubbed (NOT implemented)
+- agentops trace init
+- agentops monitor setup
+- agentops monitor dashboard
+- agentops monitor alert
+
+### Current Triage Approach
+- Use report.md for quick operational triage (what failed, severity).
+- Use results.json for detailed metric and threshold inspection.
+- Keep run artifacts organized for future compare/monitor automation.
+
+### When Users Ask for Unimplemented Features
+1. State explicitly: planned/stubbed, not available yet.
+2. Provide immediate fallback: artifact-based troubleshooting.
+3. Suggest preparation: organize artifacts for future tooling.
+
+### Guardrails
+- Do not present tracing or monitoring commands as available.
+- Do not imply real-time dashboards/alerts exist in CLI.
+- Always pivot to concrete available outputs (results.json, report.md).`,
+    },
+};
+
+function matchSkills(prompt) {
+    const lower = prompt.toLowerCase();
+    const matched = [];
+    for (const [name, skill] of Object.entries(SKILLS)) {
+        if (skill.keywords.some((kw) => lower.includes(kw))) {
+            matched.push(skill.context);
+        }
+    }
+    return matched;
+}
+
+const session = await joinSession({
+    hooks: {
+        onUserPromptSubmitted: async (input) => {
+            const matched = matchSkills(input.prompt);
+            if (matched.length > 0) {
+                return {
+                    additionalContext: `<agentops_skills>\n${matched.join("\n\n---\n\n")}\n</agentops_skills>`,
+                };
+            }
+        },
+    },
+});
diff --git a/.github/skills/investigate-regression/SKILL.md b/.github/skills/investigate-regression/SKILL.md
deleted file mode 100644
index 93144f2..0000000
--- a/.github/skills/investigate-regression/SKILL.md
+++ /dev/null
@@ -1,70 +0,0 @@
----
-name: investigate-regression
-description: Help users investigate potential evaluation regressions in AgentOps outputs using implemented tooling and artifact review. Trigger when users say "regression", "score dropped", "threshold started failing", "compare runs", "why did this eval get worse", or "debug evaluation drift". Relevant current commands: `agentops eval run`, `agentops report`. Planned but stubbed command: `agentops eval compare --runs ID1,ID2`.
----
-
-# Investigate Regression
-
-## Purpose
-Help Copilot guide users through regression investigation using currently available AgentOps outputs while clearly marking compare automation as planned/stubbed.
-
-## When to Use
-- User reports lower scores versus previous runs.
-- User reports new threshold failures.
-- User asks to compare current and prior evaluation outcomes.
-- CI gating changed from pass to fail and root cause is unclear.
-
-## Required Inputs
-- At least one recent run artifact set:
-  - `results.json`
-  - `report.md`
-- Preferably a baseline run artifact set for side-by-side checks.
-- Context about what changed (prompt, model/deployment, dataset, bundle, backend mode, environment).
-
-## Recommended Command Patterns
-Use available commands to generate fresh artifacts:
-
-```bash
-agentops eval run
-agentops report
-```
-
-For historical comparison, use manual artifact analysis until compare tooling is implemented.
-
-Planned/stubbed (not available yet):
-
-```bash
-agentops eval compare --runs ID1,ID2
-```
-
-## Expected Outputs
-- Fresh run outputs in the results directory, including `results.json` and `report.md`.
-- Manual comparison notes covering:
-  - Metric deltas
-  - Threshold pass/fail changes
-  - Any row-level concentration of failures
-
-## Interpretation Guidance
-- Separate statements into two buckets:
-  - Observations: directly supported by artifacts.
-  - Hypotheses: plausible causes requiring validation.
-- Compare baseline vs current using concrete fields from `results.json` and summary sections from `report.md`.
-- Prioritize impact-first interpretation:
-  - Which thresholds flipped
-  - Which metrics degraded most
-  - Whether degradation is broad or concentrated in specific rows/use-cases
-- End with actionable next checks (rerun with controlled changes, validate dataset consistency, verify configuration inputs).
-
-## Guardrails
-- Do not claim `agentops eval compare` is implemented.
-- Do not infer causality from correlation alone.
-- Do not mix speculation into factual summary sections.
-- Keep remediation advice tied to reproducible checks using generated artifacts.
-
-## Examples
-- "My pass rate dropped after changing model deployment."
-  - Re-run with `agentops eval run`, regenerate with `agentops report`, compare current artifacts to baseline, report factual deltas, then propose controlled rollback/retest.
-- "Can you compare run 42 and 43?"
-  - Explain `agentops eval compare --runs ...` is planned/stubbed; perform manual comparison of each run's `results.json` and `report.md`.
-- "Why is CI failing now?"
-  - Identify which thresholds now fail in current artifacts, then list likely causes as hypotheses and propose targeted confirmation steps.
diff --git a/.github/skills/observability-triage/SKILL.md b/.github/skills/observability-triage/SKILL.md
deleted file mode 100644
index 1806abe..0000000
--- a/.github/skills/observability-triage/SKILL.md
+++ /dev/null
@@ -1,68 +0,0 @@
----
-name: observability-triage
-description: Guide users on observability and triage workflows for AgentOps while accurately reflecting current CLI maturity. Trigger when users ask about tracing, monitoring, dashboards, alerts, run health, or production triage for evaluations. Common phrases: "set up tracing", "monitor evals", "create alerts", "triage failed evaluations", "observability for agentops". Current commands to anchor troubleshooting: `agentops eval run`, `agentops report`. Planned/stubbed commands: `agentops trace init`, `agentops monitor setup|dashboard|alert`.
----
-
-# Observability Triage
-
-## Purpose
-Help Copilot provide honest, practical observability guidance: use current reporting artifacts today and frame tracing/monitoring commands as planned future workflow.
-
-## When to Use
-- User asks how to monitor ongoing evaluation quality.
-- User asks for tracing setup in AgentOps.
-- User asks for dashboards/alerts around regressions.
-- User needs triage steps after an unexpected evaluation outcome.
-
-## Required Inputs
-- Current run artifacts (`results.json`, `report.md`) from the run under investigation.
-- Optional prior run artifacts for trend context.
-- Deployment/runtime context (backend target, environment, notable recent changes).
-
-## Recommended Command Patterns
-Use currently implemented commands for triage artifact generation:
-
-```bash
-agentops eval run
-agentops report
-```
-
-Planned/stubbed observability commands (not implemented yet):
-
-```bash
-agentops trace init
-agentops monitor setup
-agentops monitor dashboard
-agentops monitor alert
-```
-
-## Expected Outputs
-Current state:
-- `results.json` and `report.md` as primary triage artifacts.
-- Optional `cloud_evaluation.json` in cloud evaluation mode.
-
-Future direction (planned):
-- Tracing initialization workflow.
-- Monitoring setup, dashboard views, and alert configuration.
-
-## Interpretation Guidance
-- Use `report.md` for quick operational triage (what failed, how severe).
-- Use `results.json` for detailed metric and threshold inspection.
-- When users ask for observability features not yet implemented, provide:
-  - Explicit status: planned/stubbed.
-  - Immediate fallback: artifact-based troubleshooting.
-  - Suggested preparation: keep run artifacts organized for future compare/monitor automation.
-
-## Guardrails
-- Do not present tracing or monitoring commands as available today.
-- Do not imply real-time dashboards/alerts currently exist in CLI.
-- Always pivot unsupported requests to concrete, available outputs (`results.json`, `report.md`).
-- Keep language explicit about current capability vs roadmap.
-
-## Examples
-- "How do I set up tracing for AgentOps?"
-  - Explain `agentops trace init` is planned/stubbed; use current eval/report outputs for troubleshooting today.
-- "Can AgentOps create monitoring alerts right now?"
-  - State `agentops monitor setup|dashboard|alert` are planned/stubbed; recommend run/report artifact checks for current triage.
-- "What should I do after a sudden quality drop?"
-  - Run `agentops eval run`, regenerate with `agentops report`, inspect threshold and metric changes, then define follow-up checks.
diff --git a/.github/skills/release-management/SKILL.md b/.github/skills/release-management/SKILL.md
new file mode 100644
index 0000000..91838be
--- /dev/null
+++ b/.github/skills/release-management/SKILL.md
@@ -0,0 +1,266 @@
+---
+name: release-management
+description: Guide maintainers and contributors through branching, versioning, changelog updates, and publishing agentops-toolkit. Trigger when users ask about branching strategy, creating a release, version tagging, publishing to PyPI, updating the changelog, cutting a release, opening a PR, or syncing a fork. Common phrases: "cut a release", "how do I publish", "create release branch", "tag a version", "update changelog", "release process", "bump version", "what branch should I use", "feature branch", "prepare release".
+---
+
+# Release Management
+
+## Purpose
+Guide contributors and maintainers through the AgentOps branching strategy, versioning conventions, changelog lifecycle, and PyPI release process.
+
+## When to Use
+- User asks what branch to base work on or where to raise a PR.
+- User asks how to create a feature or release branch.
+- User asks how to prepare a release or cut a version.
+- User asks how to update the changelog.
+- User asks how to tag a version or publish to PyPI.
+- User asks how to sync their fork after a release.
+- Instructions about branching or versioning are ambiguous.
+
+---
+
+## Branching Model
+
+| Branch | Purpose |
+|---|---|
+| `main` | Always stable and deployment-ready. Only receives merges from `release/vx.y.z` branches. |
+| `develop` | Integration branch. All feature PRs target here. |
+| `release/vx.y.z` | Created by maintainers from `develop` when a release is ready to ship. |
+| `feature/<name>` | Created by contributors from `develop` for all new work. |
+
+**Default rule:** unless explicitly told otherwise, all work starts from `develop`.
+
+---
+
+## Feature Development Workflow
+
+### Branch naming
+```
+feature/<short-description>
+```
+Examples: `feature/conversation-metadata`, `feature/add-evaluation-logging`
+
+### Flow
+1. Start from `develop`
+2. Create `feature/<name>`
+3. Implement changes
+4. Commit with [conventional commit messages](#commit-guidelines)
+5. Open PR → `develop`
+
+### PR contract
+- Source: `feature/*`
+- Target: `develop`
+- Never open a feature PR directly to `main`
+
+---
+
+## Release Workflow (Maintainers)
+
+### Release branch naming
+```
+release/vx.y.z
+```
+Examples: `release/v2.4.2`, `release/v0.2.0`
+
+### Flow
+
+**Preferred: One-click via Cut Release workflow**
+1. Confirm `develop` is green (CI passes) and all intended changes are merged.
+2. Go to **Actions** tab → **Cut Release** → **Run workflow** → enter version (e.g. `0.2.0`, no `v` prefix).
+3. The workflow automatically:
+   - Creates `release/v0.2.0` from `develop`
+   - Updates `CHANGELOG.md` (`[Unreleased]` → `[0.2.0] - YYYY-MM-DD`)
+   - Pushes the branch (triggers staging pipeline automatically)
+   - Opens a PR: `release/v0.2.0` → `main`
+4. Wait for staging pipeline to pass (build → TestPyPI → verify).
+5. Get the PR reviewed and merge into `main`.
+6. Tag the release on `main` — this triggers the production release pipeline:
+   ```bash
+   git checkout main
+   git pull origin main
+   git tag v0.2.0
+   git push origin v0.2.0
+   ```
+7. Approve the PyPI publish in the GitHub Actions UI when prompted.
+8. Sync `develop` after release:
+   ```bash
+   git checkout develop
+   git pull origin develop
+   git merge main
+   git push origin develop
+   ```
+9. Delete the release branch:
+   ```bash
+   git push origin --delete release/v0.2.0
+   git branch -d release/v0.2.0
+   ```
+
+**Alternative: Manual release branch creation**
+1. Confirm `develop` is green.
+2. Create release branch from `develop`:
+   ```bash
+   git checkout develop
+   git pull origin develop
+   git checkout -b release/v0.2.0
+   ```
+3. Update `CHANGELOG.md` — see [Changelog Lifecycle](#changelog-lifecycle) below.
+4. Commit and push:
+   ```bash
+   git add CHANGELOG.md
+   git commit -m "chore: prepare release 0.2.0"
+   git push origin release/v0.2.0
+   ```
+   This triggers the staging pipeline automatically.
+5. Open PR: `release/v0.2.0` → `main`.
+6. After staging passes and review is complete, merge to `main`.
+7. Tag and push (triggers production release pipeline):
+   ```bash
+   git checkout main
+   git pull origin main
+   git tag v0.2.0
+   git push origin v0.2.0
+   ```
+8. Approve PyPI publish, sync develop, and delete release branch (same as above).
+
+### Release PR contract
+- Source: `release/vx.y.z`
+- Target: `main`
+- Do NOT introduce new feature work in a release branch — only changelog updates.
+
+---
+
+## Versioning Rules
+
+Follow [Semantic Versioning](https://semver.org/): `MAJOR.MINOR.PATCH`
+
+| Type | When to use |
+|---|---|
+| `PATCH` | Bug fixes and minor backward-compatible improvements |
+| `MINOR` | New backward-compatible features |
+| `MAJOR` | Breaking changes to the CLI contract or output schema |
+
+Version numbers follow a consistent pattern across artifacts. The git tag and GitHub Release use a `v` prefix. The release branch also uses the `v` prefix. Versioning is fully automatic via **setuptools-scm** — there is no `version` field in `pyproject.toml`.
+
+| Artifact | Format | Example |
+|---|---|---|
+| Release branch | `release/vx.y.z` | `release/v2.4.2` |
+| `pyproject.toml` | `dynamic = ["version"]` | Version derived from git tags via setuptools-scm |
+| Git tag / GitHub Release | `vx.y.z` | `v2.4.2` |
+| Changelog heading | `## [x.y.z] - YYYY-MM-DD` | `## [2.4.2] - 2026-03-22` |
+
+**Never add `version = "..."` to `pyproject.toml`** — this will conflict with setuptools-scm.
+
+### Version on `develop`
+- The version on `develop` is derived automatically by setuptools-scm (e.g., `0.1.3.dev12`).
+- Do NOT preemptively bump any version on `develop` for an upcoming release.
+- Feature branches should not modify `pyproject.toml` version.
+
+---
+
+## Changelog Lifecycle
+
+The changelog follows a two-phase lifecycle: development on `develop`, finalization on `release/vx.y.z`.
+
+### Development phase (`develop`)
+- Always maintain an `[Unreleased]` section at the top.
+- Add all user-visible changes under `[Unreleased]`.
+- Do NOT assign a version number on `develop`.
+- Do NOT create future version sections on `develop`.
+
+```markdown
+## [Unreleased]
+
+### Added
+- New orchestration strategy for multi-turn evaluations.
+
+### Fixed
+- Corrected resource cleanup order in Foundry backend shutdown.
+```
+
+### Release phase (`release/vx.y.z`)
+When creating the release branch, convert `[Unreleased]` into the versioned release entry, then add a fresh empty `[Unreleased]` section above it.
+
+**Before:**
+```markdown
+## [Unreleased]
+
+### Added
+- New orchestration strategy...
+```
+
+**After:**
+```markdown
+## [Unreleased]
+
+## [2.4.2] - 2026-03-22
+
+### Added
+- New orchestration strategy...
+```
+
+All release artifacts must be in sync:
+
+| Artifact | Value |
+|---|---|
+| Release branch | `release/v2.4.2` |
+| Changelog heading | `## [2.4.2] - YYYY-MM-DD` |
+| Git tag / GitHub Release | `v2.4.2` |
+
+### Changelog sections
+Use when applicable: `Added`, `Changed`, `Fixed`, `Removed`, `Deprecated`, `Security`.
+
+### Writing style
+- Start each entry with a **bold title**, followed by a brief technical explanation.
+- Explain what changed and why it matters — include relevant technical context.
+- Avoid vague wording: no "minor updates", "improvements", or "fixes" as standalone entries.
+
+### Safety rules
+- Never remove the `[Unreleased]` section.
+- Never create more than one `[Unreleased]` section.
+- Never assign a release version on `develop`.
+- Never leave a release branch without converting `[Unreleased]` to the versioned entry.
+- Never mismatch version numbers across branch name, changelog, and tag.
+
+---
+
+## Commit Guidelines
+
+Use conventional commit format:
+
+```
+feat: add conversation metadata support
+fix: correct chat history persistence issue
+docs: update changelog for 2.4.2
+chore: prepare release 2.4.2
+```
+
+---
+
+## Required Secrets
+
+Set in GitHub repo Settings → Secrets and variables → Actions:
+
+| Secret | Purpose |
+|---|---|
+| `PIPY_TOKEN` | PyPI API token scoped to `agentops-toolkit` — used on merge to `main` |
+| `TESTPYPI_API_TOKEN` | TestPyPI API token — used on tag push for pre-release validation |
+
+---
+
+## Default Decision Logic
+
+| Situation | Action |
+|---|---|
+| Feature or code change | Base on `develop`, create `feature/*`, PR to `develop` |
+| Release preparation | Base on `develop`, create `release/x.y.z`, update `pyproject.toml` + `CHANGELOG.md`, PR to `main` |
+| Ambiguous instructions | Default to feature workflow on `develop`; do not assume a release unless explicitly requested |
+
+---
+
+## Guardrails
+- Never create feature branches from `main`.
+- Never open feature PRs to `main`.
+- Never mix new feature work into a release branch.
+- Never assign a release version on `develop`.
+- Never tag without a green CI run.
+- Never publish without running `python -m pytest tests/ -x -q` first.
diff --git a/.github/skills/run-evals/SKILL.md b/.github/skills/run-evals/SKILL.md
deleted file mode 100644
index 2bf5e0e..0000000
--- a/.github/skills/run-evals/SKILL.md
+++ /dev/null
@@ -1,72 +0,0 @@
----
-name: run-evals
-description: Guide users through running AgentOps evaluations end to end using implemented CLI commands. Trigger when users ask to initialize AgentOps, run an evaluation, regenerate a report, locate run.yaml, or summarize results.json/report.md. Common phrases: "run eval", "start agentops", "how do I use run.yaml", "regenerate report", "explain evaluation results". Relevant commands: `agentops init [--path DIR]`, `agentops eval run`, `agentops report`.
----
-
-# Run Evaluations
-
-## Purpose
-Help Copilot guide users through the currently implemented AgentOps evaluation workflow from workspace setup to report interpretation.
-
-## When to Use
-- User wants to start using AgentOps in a repo.
-- User asks how to run an evaluation with `run.yaml`.
-- User asks how to regenerate `report.md` from `results.json`.
-- User asks where evaluation outputs are written.
-- User asks for a quick summary of the latest run outcome.
-
-## Required Inputs
-- Project root path where `agentops` commands should run.
-- Evaluation run config path when not using default (commonly `.agentops/run.yaml`).
-- Optional output directory if the user wants non-default output placement.
-- Access to generated artifacts for interpretation:
-  - `results.json`
-  - `report.md`
-
-## Recommended Command Patterns
-Use only implemented commands.
-
-```bash
-agentops init [--path <dir>]
-agentops eval run
-agentops report
-```
-
-Common operational sequence:
-1. Initialize workspace: `agentops init`
-2. Confirm run config exists (typically `.agentops/run.yaml`).
-3. Execute evaluation: `agentops eval run`
-4. Regenerate markdown report when needed: `agentops report`
-5. Inspect outputs under `.agentops/results/latest/`.
-
-## Expected Outputs
-- `results.json` (machine-readable normalized results)
-- `report.md` (human-readable summary)
-- In cloud evaluation flows, `cloud_evaluation.json` may also be present.
-
-Typical latest pointers:
-- `.agentops/results/latest/results.json`
-- `.agentops/results/latest/report.md`
-
-## Interpretation Guidance
-- Start with `report.md` for a quick pass/fail narrative and threshold view.
-- Use `results.json` for metric-level details, row-level checks, and automation.
-- Distinguish execution states clearly:
-  - Run completed with thresholds passing.
-  - Run completed with threshold failures.
-  - Run/config/runtime error.
-- When summarizing, report concrete facts first (metrics and threshold outcomes), then brief interpretation.
-
-## Guardrails
-- Do not invent commands or flags beyond documented CLI behavior.
-- Do not present planned commands as available.
-- If a user asks for compare or run-history commands, state they are planned/stubbed and pivot to artifact inspection using `results.json` and `report.md`.
-- Keep guidance operational; avoid architecture duplication from `.github/copilot-instructions.md`.
-
-## Examples
-- "Initialize and run an eval in this repo."
-  - Use: `agentops init`, then `agentops eval run`, then `agentops report`.
-- "Where is my run config?"
-  - Check `.agentops/run.yaml` first; if custom path is used, run with that config.
-- "Summarize my last evaluation."
-  - Read `.agentops/results/latest/report.md` and confirm details with `.agentops/results/latest/results.json`.
diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml
new file mode 100644
index 0000000..479673c
--- /dev/null
+++ b/.github/workflows/_build.yml
@@ -0,0 +1,61 @@
+# AgentOps Toolkit — Reusable Build Workflow
+#
+# Workflows:
+#   1. ci.yml          — Lint + test on every push/PR; publish dev builds to TestPyPI on develop
+#   2. _build.yml      — Reusable build (test + package), called by staging and release
+#   3. staging.yml     — Staging: release/* branch → TestPyPI → verify
+#   4. release.yml     — Production: v* tag → TestPyPI → verify → PyPI → GitHub Release
+#   5. cut-release.yml — Manual dispatch: create release branch + PR from develop
+#
+# Called by staging.yml and release.yml via workflow_call.
+# Runs tests, builds the package (version via setuptools-scm), and uploads
+# the dist/ artifacts for downstream jobs.
+#
+# Usage in caller workflows:
+#   jobs:
+#     build:
+#       uses: ./.github/workflows/_build.yml
+
+name: _build
+
+on:
+  workflow_call:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Full history required for setuptools-scm
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          version: ">=0.9.0"
+
+      - name: Set up Python
+        run: uv python install 3.12
+
+      - name: Install dependencies
+        run: uv sync --group dev
+
+      - name: Lint with ruff
+        run: uv run ruff check src/ tests/
+
+      - name: Run tests
+        run: uv run pytest tests/ -v --tb=short
+
+      - name: Build package
+        run: uv build
+
+      - name: Show build info
+        run: |
+          ls -la dist/
+          uv run python -c "from importlib.metadata import version; print(f'Version: {version(\"agentops-toolkit\")}')"
+
+      - name: Upload build artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
diff --git a/.github/workflows/agentops-eval.yml b/.github/workflows/agentops-eval.yml
deleted file mode 100644
index 9392710..0000000
--- a/.github/workflows/agentops-eval.yml
+++ /dev/null
@@ -1,202 +0,0 @@
-# AgentOps Evaluation — GitHub Actions Workflow
-#
-# Runs `agentops eval run` after CI passes, or on manual dispatch.
-# Uploads evaluation artifacts (results.json, report.md, logs).
-# Fails the job when thresholds are not met (exit code 2) or on errors (exit code 1).
-#
-# Dependency: This workflow runs AFTER the CI workflow completes successfully.
-# On manual dispatch it runs independently (no CI gate).
-#
-# Authentication:
-#   Uses Workload Identity Federation (OIDC) — no secrets to rotate.
-#   Set AZURE_CLIENT_ID, AZURE_TENANT_ID, and AZURE_SUBSCRIPTION_ID as GitHub
-#   repository variables (not secrets). See docs/ci-github-actions.md for setup.
-#
-# Prerequisites:
-#   1. An initialised .agentops/ workspace in your repo (run `agentops init`)
-#   2. A valid .agentops/run.yaml pointing to your bundle and dataset
-#   3. Azure federated credential configured for your GitHub repo (see docs/ci-github-actions.md)
-#
-# Copy this file into your consumer repo at .github/workflows/agentops-eval.yml
-
-name: AgentOps Evaluation
-
-on:
-  workflow_run:
-    workflows: ["CI"]
-    types: [completed]
-  workflow_dispatch:
-    inputs:
-      config:
-        description: "Path to run.yaml (default: .agentops/run.yaml)"
-        required: false
-        default: ".agentops/run.yaml"
-      output:
-        description: "Output directory for results"
-        required: false
-        default: ""
-
-permissions:
-  contents: read
-  id-token: write # Required for OIDC / Workload Identity Federation
-  pull-requests: write # Required for optional PR comment step
-
-env:
-  PYTHON_VERSION: "3.11"
-
-jobs:
-  evaluate:
-    name: Run AgentOps Evaluation
-    runs-on: ubuntu-latest
-    # Skip if CI failed (workflow_run triggers on all completions)
-    if: >-
-      github.event_name == 'workflow_dispatch' ||
-      github.event.workflow_run.conclusion == 'success'
-
-    env:
-      # Foundry project endpoint — set as a GitHub repository secret
-      AZURE_AI_FOUNDRY_PROJECT_ENDPOINT: ${{ secrets.AZURE_AI_FOUNDRY_PROJECT_ENDPOINT }}
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      # ----------------------------------------------------------------
-      # Azure login via Workload Identity Federation (OIDC) — recommended
-      # Uses a federated credential; no client secret to manage.
-      # Set AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_SUBSCRIPTION_ID as
-      # GitHub repository variables (Settings → Secrets → Variables).
-      # ----------------------------------------------------------------
-      - name: Azure login (OIDC)
-        uses: azure/login@v2
-        with:
-          client-id: ${{ vars.AZURE_CLIENT_ID }}
-          tenant-id: ${{ vars.AZURE_TENANT_ID }}
-          subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
-
-      - name: Set up Python ${{ env.PYTHON_VERSION }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-          cache: "pip"
-
-      - name: Install agentops-toolkit
-        run: pip install agentops-toolkit
-
-      - name: Resolve config path
-        id: config
-        run: |
-          CONFIG="${{ github.event.inputs.config || '.agentops/run.yaml' }}"
-          echo "path=$CONFIG" >> "$GITHUB_OUTPUT"
-
-      - name: Resolve output directory
-        id: output
-        run: |
-          OUTPUT="${{ github.event.inputs.output }}"
-          if [ -n "$OUTPUT" ]; then
-            echo "flag=--output $OUTPUT" >> "$GITHUB_OUTPUT"
-          else
-            echo "flag=" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Run evaluation
-        id: eval
-        run: |
-          set +e
-          agentops eval run --config "${{ steps.config.outputs.path }}" ${{ steps.output.outputs.flag }}
-          EXIT_CODE=$?
-          echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT"
-
-          # Surface the exit code meaning
-          if [ $EXIT_CODE -eq 0 ]; then
-            echo "## ✅ Evaluation Passed" >> "$GITHUB_STEP_SUMMARY"
-            echo "All thresholds met." >> "$GITHUB_STEP_SUMMARY"
-          elif [ $EXIT_CODE -eq 2 ]; then
-            echo "## ❌ Evaluation Failed — Threshold(s) Not Met" >> "$GITHUB_STEP_SUMMARY"
-            echo "One or more evaluation thresholds were not satisfied." >> "$GITHUB_STEP_SUMMARY"
-          else
-            echo "## ⚠️ Evaluation Error" >> "$GITHUB_STEP_SUMMARY"
-            echo "A runtime or configuration error occurred (exit code $EXIT_CODE)." >> "$GITHUB_STEP_SUMMARY"
-          fi
-
-          # Append report.md to job summary if it exists
-          REPORT=".agentops/results/latest/report.md"
-          if [ -f "$REPORT" ]; then
-            echo "" >> "$GITHUB_STEP_SUMMARY"
-            cat "$REPORT" >> "$GITHUB_STEP_SUMMARY"
-          fi
-
-          exit $EXIT_CODE
-
-      - name: Upload evaluation artifacts
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: agentops-eval-results
-          path: |
-            .agentops/results/latest/results.json
-            .agentops/results/latest/report.md
-            .agentops/results/latest/backend_metrics.json
-            .agentops/results/latest/cloud_evaluation.json
-            .agentops/results/latest/backend.stdout.log
-            .agentops/results/latest/backend.stderr.log
-          if-no-files-found: warn
-
-      - name: Post report as PR comment
-        if: always() && github.event.workflow_run.event == 'pull_request'
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const fs = require('fs');
-            const reportPath = '.agentops/results/latest/report.md';
-
-            if (!fs.existsSync(reportPath)) {
-              console.log('No report.md found — skipping PR comment.');
-              return;
-            }
-
-            const body = fs.readFileSync(reportPath, 'utf8');
-            const marker = '<!-- agentops-eval-report -->';
-            const commentBody = `${marker}\n${body}`;
-
-            // Find the PR associated with the workflow_run head branch
-            const { data: pulls } = await github.rest.pulls.list({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              head: `${context.repo.owner}:${context.payload.workflow_run.head_branch}`,
-              state: 'open',
-            });
-
-            if (pulls.length === 0) {
-              console.log('No open PR found for this branch — skipping comment.');
-              return;
-            }
-
-            const prNumber = pulls[0].number;
-
-            // Find existing comment to update (avoid duplicates)
-            const { data: comments } = await github.rest.issues.listComments({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number: prNumber,
-            });
-
-            const existing = comments.find(c => c.body.includes(marker));
-
-            if (existing) {
-              await github.rest.issues.updateComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                comment_id: existing.id,
-                body: commentBody,
-              });
-              console.log(`Updated existing PR comment #${existing.id}`);
-            } else {
-              await github.rest.issues.createComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                issue_number: prNumber,
-                body: commentBody,
-              });
-              console.log('Created new PR comment with evaluation report.');
-            }
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cf43a52..fad45d2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,16 +1,19 @@
 # AgentOps Toolkit — CI/CD Pipelines
 #
 # Workflows:
-#   1. ci.yml          — Lint + test on every push/PR
-#   2. release.yml     — Build + publish to PyPI on tag push
+#   1. ci.yml          — Lint + test on every push/PR; publish dev builds to TestPyPI on develop
+#   2. _build.yml      — Reusable build (test + package), called by staging and release
+#   3. staging.yml     — Staging: release/* branch → TestPyPI → verify
+#   4. release.yml     — Production: v* tag → TestPyPI → verify → PyPI → GitHub Release
+#   5. cut-release.yml — Manual dispatch: create release branch + PR from develop
 
 name: CI
 
 on:
   push:
-    branches: [main, develop]
+    branches: [develop]
   pull_request:
-    branches: [main, develop]
+    branches: [develop]
 
 permissions:
   contents: read
@@ -44,11 +47,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ubuntu-latest, windows-latest]
         python-version: ["3.11", "3.12", "3.13"]
-        exclude:
-          - os: macos-latest
-            python-version: "3.11"
     steps:
       - uses: actions/checkout@v4
 
@@ -98,3 +98,91 @@ jobs:
         with:
           name: coverage-report
           path: coverage.xml
+
+  # Publish dev build to TestPyPI on every push to develop (not PRs)
+  publish-dev:
+    if: github.event_name == 'push' && github.ref == 'refs/heads/develop'
+    needs: [lint, test]
+    runs-on: ubuntu-latest
+    environment: staging
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Full history for setuptools-scm
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          version: ">=0.9.0"
+
+      - name: Set up Python
+        run: uv python install 3.12
+
+      - name: Install dependencies
+        run: uv sync --group dev
+
+      - name: Build package
+        run: uv build
+
+      - name: Show version
+        run: |
+          ls -la dist/
+          uv run python -c "from importlib.metadata import version; print(f'Dev version: {version(\"agentops-toolkit\")}')" 
+
+      - name: Publish to TestPyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          repository-url: https://test.pypi.org/legacy/
+          password: ${{ secrets.TEST_PYPI_TOKEN }}
+          verbose: true
+          skip-existing: true
+
+  # Verify the dev build installs correctly
+  verify-dev:
+    if: github.event_name == 'push' && github.ref == 'refs/heads/develop'
+    needs: publish-dev
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Determine expected version
+        id: version
+        run: |
+          pip install setuptools-scm
+          VERSION=$(python -m setuptools_scm)
+          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
+          echo "Expected dev version: $VERSION"
+
+      - name: Install from TestPyPI
+        run: |
+          for i in 1 2 3 4 5; do
+            echo "Attempt $i: installing agentops-toolkit==${{ steps.version.outputs.version }}"
+            pip install \
+              "agentops-toolkit==${{ steps.version.outputs.version }}" \
+              --index-url https://test.pypi.org/simple/ \
+              --extra-index-url https://pypi.org/simple/ \
+              && break
+            echo "Not available yet, waiting 30s..."
+            sleep 30
+          done
+
+      - name: Smoke test
+        run: |
+          agentops --version
+          agentops --help
+
+      - name: Summary
+        run: |
+          echo "## ✅ Dev build published and verified" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+          echo "- Version: \`${{ steps.version.outputs.version }}\`" >> "$GITHUB_STEP_SUMMARY"
+          echo "- TestPyPI: https://test.pypi.org/project/agentops-toolkit/${{ steps.version.outputs.version }}/" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+          echo "Install: \`pip install agentops-toolkit==${{ steps.version.outputs.version }} --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/\`" >> "$GITHUB_STEP_SUMMARY"
diff --git a/.github/workflows/cut-release.yml b/.github/workflows/cut-release.yml
new file mode 100644
index 0000000..9d2cbc4
--- /dev/null
+++ b/.github/workflows/cut-release.yml
@@ -0,0 +1,128 @@
+# AgentOps Toolkit — Cut Release
+#
+# Workflows:
+#   1. ci.yml          — Lint + test on every push/PR
+#   2. _build.yml      — Reusable build (test + package), called by staging and release
+#   3. staging.yml     — Staging: release/* branch → TestPyPI → verify
+#   4. release.yml     — Production: v* tag → TestPyPI → verify → PyPI → GitHub Release
+#   5. cut-release.yml — Manual dispatch: create release branch + PR from develop
+#
+# One-click release branch creation. Triggered manually from the Actions tab.
+# Creates a release branch from develop, updates CHANGELOG.md, and opens a PR to main.
+# The branch push then triggers staging.yml automatically.
+#
+# Usage:
+#   Actions tab → "Cut Release" → "Run workflow" → enter version (e.g. 0.2.0)
+
+name: Cut Release
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: "Release version (e.g. 0.2.0) — no 'v' prefix"
+        required: true
+        type: string
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  cut-release:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Validate version format
+        run: |
+          VERSION="${{ inputs.version }}"
+          if [[ ! "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            echo "::error::Version must be in semver format (e.g. 0.2.0), got: $VERSION"
+            exit 1
+          fi
+          echo "version=$VERSION" >> "$GITHUB_ENV"
+
+      - name: Checkout develop
+        uses: actions/checkout@v4
+        with:
+          ref: develop
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Check release branch does not exist
+        run: |
+          if git ls-remote --exit-code origin "refs/heads/release/v${{ env.version }}" >/dev/null 2>&1; then
+            echo "::error::Branch release/v${{ env.version }} already exists. Delete it first or use a different version."
+            exit 1
+          fi
+
+      - name: Check CHANGELOG has Unreleased section
+        run: |
+          if ! grep -q '## \[Unreleased\]' CHANGELOG.md; then
+            echo "::error::CHANGELOG.md is missing the [Unreleased] section."
+            exit 1
+          fi
+
+      - name: Create release branch
+        run: |
+          git checkout -b "release/v${{ env.version }}"
+
+      - name: Update CHANGELOG
+        run: |
+          DATE=$(date +%Y-%m-%d)
+          # Replace [Unreleased] with versioned section, add fresh Unreleased above
+          sed -i "s/## \[Unreleased\]/## [Unreleased]\n\n## [${{ env.version }}] - $DATE/" CHANGELOG.md
+
+      - name: Configure git
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+      - name: Commit and push
+        run: |
+          git add CHANGELOG.md
+          git commit -m "chore: prepare release ${{ env.version }}"
+          git push origin "release/v${{ env.version }}"
+
+      - name: Create PR to main
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh pr create \
+            --base main \
+            --head "release/v${{ env.version }}" \
+            --title "Release v${{ env.version }}" \
+            --body "## Release v${{ env.version }}
+
+          Automated release branch created from \`develop\`.
+
+          ### What happened
+          - Branch \`release/v${{ env.version }}\` created from \`develop\`
+          - \`CHANGELOG.md\` updated: \`[Unreleased]\` → \`[${{ env.version }}]\`
+          - Staging pipeline triggered automatically (build → TestPyPI → verify)
+
+          ### Next steps
+          1. Wait for the **Staging** pipeline to pass
+          2. Review and approve this PR
+          3. Merge to \`main\`
+          4. Tag and push: \`git tag v${{ env.version }} && git push origin v${{ env.version }}\`
+          5. Approve the PyPI publish in the **Release** workflow
+          6. Sync develop: \`git checkout develop && git merge main && git push origin develop\`
+
+          ### Checklist
+          - [ ] Staging pipeline passes (build + TestPyPI + verify)
+          - [ ] CHANGELOG entries reviewed
+          - [ ] PR approved and merged to main
+          - [ ] Tag \`v${{ env.version }}\` pushed
+          - [ ] PyPI publish approved
+          - [ ] develop synced from main"
+
+      - name: Summary
+        run: |
+          echo "## ✅ Release branch created" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+          echo "- Branch: \`release/v${{ env.version }}\`" >> "$GITHUB_STEP_SUMMARY"
+          echo "- CHANGELOG updated with version **${{ env.version }}**" >> "$GITHUB_STEP_SUMMARY"
+          echo "- PR opened: \`release/v${{ env.version }}\` → \`main\`" >> "$GITHUB_STEP_SUMMARY"
+          echo "- Staging pipeline triggered automatically" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+          echo "### Next: wait for staging, then tag \`v${{ env.version }}\` to publish" >> "$GITHUB_STEP_SUMMARY"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 1120a55..402ac96 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,64 +1,123 @@
-# Publish to PyPI on version tag push
+# AgentOps Toolkit — Production Release
 #
-# Trigger: push a tag like v0.1.0
-#   git tag v0.1.0
-#   git push origin v0.1.0
+# Workflows:
+#   1. ci.yml          — Lint + test on every push/PR
+#   2. _build.yml      — Reusable build (test + package), called by staging and release
+#   3. staging.yml     — Staging: release/* branch → TestPyPI → verify
+#   4. release.yml     — Production: v* tag → TestPyPI → verify → PyPI → GitHub Release
 #
-# Uses PyPI API tokens stored as GitHub repository secrets.
+# Triggered by v* tag pushes (e.g. v0.2.0).
+# Calls the reusable _build.yml, then publishes to TestPyPI for final
+# verification, then to PyPI (requires 'release' environment approval),
+# and finally creates a GitHub Release.
+#
+# Versioning:
+#   Uses setuptools-scm — version is derived from the git tag automatically.
+#   Tagged commit v0.2.0 → version 0.2.0. No manual version in pyproject.toml.
+#
+# Required GitHub secrets (in respective environments):
+#   TEST_PYPI_TOKEN  — TestPyPI API token (environment: staging)
+#   PYPI_TOKEN       — PyPI API token     (environment: release)
+#
+# Required GitHub environments:
+#   staging  — for TestPyPI publish (optional approval)
+#   release  — for PyPI publish (requires approval from designated reviewers)
 #
 # Setup:
-#   1. Go to https://pypi.org/manage/account/token/ → Create token (scope: project "agentops-toolkit")
-#   2. Go to https://test.pypi.org/manage/account/token/ → Create token
-#   3. In GitHub repo → Settings → Secrets and variables → Actions:
-#      - Add secret PYPI_API_TOKEN      (value: pypi-xxxx...)
-#      - Add secret TESTPYPI_API_TOKEN  (value: pypi-xxxx...)
+#   1. https://test.pypi.org/manage/account/token/  → Create TEST_PYPI_TOKEN
+#   2. https://pypi.org/manage/account/token/        → Create PYPI_TOKEN
+#   3. GitHub repo → Settings → Secrets → Actions    → Add secrets to environments
+#   4. GitHub repo → Settings → Environments         → Create "release" with required reviewers
 
-name: Release to PyPI
+name: Release
 
 on:
   push:
     tags:
       - "v*"
+  workflow_dispatch:
 
 permissions:
-  contents: write # For GitHub Release
+  contents: write  # For GitHub Release
 
 jobs:
+
+  # Reusable build: test + package
   build:
+    uses: ./.github/workflows/_build.yml
+
+  # Publish to TestPyPI for final pre-release verification
+  publish-testpypi:
+    needs: build
     runs-on: ubuntu-latest
+    environment: staging
     steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v6
+      - name: Download build artifacts
+        uses: actions/download-artifact@v4
         with:
-          version: ">=0.9.0"
-
-      - name: Set up Python
-        run: uv python install 3.12
+          name: dist
+          path: dist/
 
-      - name: Install dependencies
-        run: uv sync --group dev
+      - name: Publish to TestPyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          repository-url: https://test.pypi.org/legacy/
+          password: ${{ secrets.TEST_PYPI_TOKEN }}
+          verbose: true
+          skip-existing: true
 
-      - name: Run tests
-        run: uv run pytest tests/ -v --tb=short
+  # Install from TestPyPI and smoke-test the CLI
+  verify-testpypi:
+    needs: publish-testpypi
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
-      - name: Build package
-        run: uv build
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
 
-      - name: Verify built artifacts
+      - name: Determine expected version
+        id: version
         run: |
-          ls -la dist/
-          uv run python -m zipfile -l dist/*.whl | head -20
+          pip install setuptools-scm
+          VERSION=$(python -m setuptools_scm)
+          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
+          echo "Expected version: $VERSION"
 
-      - name: Upload build artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: dist
-          path: dist/
+      - name: Install from TestPyPI
+        run: |
+          for i in 1 2 3 4 5; do
+            echo "Attempt $i: installing agentops-toolkit==${{ steps.version.outputs.version }}"
+            pip install \
+              "agentops-toolkit==${{ steps.version.outputs.version }}" \
+              --index-url https://test.pypi.org/simple/ \
+              --extra-index-url https://pypi.org/simple/ \
+              && break
+            echo "Not available yet, waiting 30s..."
+            sleep 30
+          done
+
+      - name: Smoke test — version and help
+        run: |
+          agentops --version
+          agentops --help
 
+      - name: Smoke test — init in temp directory
+        run: |
+          TMPDIR=$(mktemp -d)
+          cd "$TMPDIR"
+          agentops init
+          test -f .agentops/config.yaml
+          test -f .agentops/run.yaml
+          echo "✅ agentops init succeeded"
+
+  # Publish to PyPI — requires 'release' environment approval
   publish-pypi:
-    needs: build
+    needs: verify-testpypi
     runs-on: ubuntu-latest
     environment: release
     steps:
@@ -71,11 +130,12 @@ jobs:
       - name: Publish to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
-          password: ${{ secrets.PIPY_TOKEN }}
+          password: ${{ secrets.PYPI_TOKEN }}
           verbose: true
 
+  # Create GitHub Release with built artifacts
   github-release:
-    needs: [publish-pypi]
+    needs: publish-pypi
     runs-on: ubuntu-latest
     permissions:
       contents: write
diff --git a/.github/workflows/staging.yml b/.github/workflows/staging.yml
new file mode 100644
index 0000000..2a9987a
--- /dev/null
+++ b/.github/workflows/staging.yml
@@ -0,0 +1,112 @@
+# AgentOps Toolkit — Staging (TestPyPI)
+#
+# Workflows:
+#   1. ci.yml          — Lint + test on every push/PR
+#   2. _build.yml      — Reusable build (test + package), called by staging and release
+#   3. staging.yml     — Staging: release/* branch → TestPyPI → verify
+#   4. release.yml     — Production: v* tag → TestPyPI → verify → PyPI → GitHub Release
+#
+# Triggered by pushes to release/* branches.
+# Calls the reusable _build.yml, publishes to TestPyPI, and verifies the
+# package installs correctly with a CLI smoke test.
+#
+# This workflow lets you iterate on a release branch and validate the
+# built package before tagging for production.
+#
+# Branch flow:
+#   develop → release/v0.2.0 → push → this workflow
+#     → build → TestPyPI → verify install → ✅ ready to merge and tag
+#
+# Versioning:
+#   Uses setuptools-scm — on a release branch 5 commits after the last tag,
+#   the version will be something like 0.2.0.dev5 (PEP 440 pre-release).
+#
+# Required GitHub secrets (environment: staging):
+#   TEST_PYPI_TOKEN  — TestPyPI API token
+#
+# Setup:
+#   1. https://test.pypi.org/manage/account/token/  → Create TEST_PYPI_TOKEN
+#   2. GitHub repo → Settings → Secrets → Actions    → Add to staging environment
+
+name: Staging
+
+on:
+  push:
+    branches:
+      - "release/**"
+  workflow_dispatch:
+
+jobs:
+
+  # Reusable build: test + package
+  build:
+    uses: ./.github/workflows/_build.yml
+
+  # Publish to TestPyPI
+  publish-testpypi:
+    needs: build
+    runs-on: ubuntu-latest
+    environment: staging
+    steps:
+      - name: Download build artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+
+      - name: Publish to TestPyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          repository-url: https://test.pypi.org/legacy/
+          password: ${{ secrets.TEST_PYPI_TOKEN }}
+          verbose: true
+          skip-existing: true  # Allow re-pushes without failure
+
+  # Install from TestPyPI and smoke-test the CLI
+  verify-testpypi:
+    needs: publish-testpypi
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Determine expected version
+        id: version
+        run: |
+          pip install setuptools-scm
+          VERSION=$(python -m setuptools_scm)
+          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
+          echo "Expected version: $VERSION"
+
+      - name: Install from TestPyPI
+        run: |
+          for i in 1 2 3 4 5; do
+            echo "Attempt $i: installing agentops-toolkit==${{ steps.version.outputs.version }}"
+            pip install \
+              "agentops-toolkit==${{ steps.version.outputs.version }}" \
+              --index-url https://test.pypi.org/simple/ \
+              --extra-index-url https://pypi.org/simple/ \
+              && break
+            echo "Not available yet, waiting 30s..."
+            sleep 30
+          done
+
+      - name: Smoke test — version and help
+        run: |
+          agentops --version
+          agentops --help
+
+      - name: Smoke test — init in temp directory
+        run: |
+          TMPDIR=$(mktemp -d)
+          cd "$TMPDIR"
+          agentops init
+          test -f .agentops/config.yaml
+          test -f .agentops/run.yaml
+          echo "✅ agentops init succeeded"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..d7a5b56
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,7 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.10
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
diff --git a/AGENTS.md b/AGENTS.md
index 17f6f4d..6b6bbdf 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -18,10 +18,10 @@ Primary capabilities:
 Public CLI contract:
 - `agentops init`
 - `agentops eval run --config <run.yaml> [--output <dir>]`
+- `agentops eval compare --runs <baseline>,<current>`
 - `agentops report --in <results.json> [--out <report.md>]`
 
 Planned CLI stubs (not implemented in this release):
-- `agentops eval compare --runs ID1,ID2`
 - `agentops run list|show`
 - `agentops run view <id> [--entry N]`
 - `agentops report show|export`
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a635dc8..1a26980 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,8 +3,46 @@
 All notable changes to this project will be documented in this file.
 This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres to [Semantic Versioning](https://semver.org/).
 
+## [Unreleased]
+
+### Added
+- Implement `agentops eval compare --runs <baseline>,<current>` for baseline comparison of evaluation runs.
+  - Produces `comparison.json` (structured metric deltas, threshold flips, item-level changes) and `comparison.md` (human-readable report).
+  - Exits with code `0` (no regressions), `2` (regressions detected), or `1` (error).
+  - Supports run IDs by timestamped folder name, `latest` keyword, or absolute/relative paths.
+- Add Pydantic models for comparison output: `ComparisonResult`, `MetricDelta`, `ThresholdDelta`, `ItemDelta`, `ComparisonSummary`.
+- Add comparison service (`services/comparison.py`) with run discovery and structured diff logic.
+- Update `investigate-regression` and `run-evals` Copilot skills to reference the new compare command.
+- Add distributable Copilot skills under `.github/plugins/agentops/skills/` for GitHub-based installation (`agentops-run-evals`, `agentops-investigate-regression`, `agentops-observability-triage`).
+- Fix cloud evaluation to use the Foundry Project Evals API (`api-version=2025-11-15-preview`) with `azure_ai_evaluator` testing criteria, replacing the OpenAI SDK-based path that was incompatible.
+- Fix metric polarity in comparison: lower-is-better metrics (e.g. `avg_latency_seconds` with `<=` threshold) now correctly show "improved" when they decrease.
+- Align `azure-ai-projects` version references across all files to `>=2.0.1`.
+
+### Changed
+- Migrate versioning from static `pyproject.toml` field to `setuptools-scm` — version is now derived automatically from git tags.
+- Redesign release pipeline into three workflow files:
+  - `_build.yml` — reusable build workflow (test + package via setuptools-scm)
+  - `staging.yml` — `release/*` branch pushes publish to TestPyPI and verify install
+  - `release.yml` — `v*` tag pushes publish to TestPyPI, then PyPI (with approval gate), then create GitHub Release
+- Add CLI smoke test in staging/release verify step (`agentops --version`, `agentops --help`, `agentops init`).
+- Fix secret reference from `PIPY_TOKEN` to `PYPI_TOKEN`; add `TEST_PYPI_TOKEN` for TestPyPI.
+- Add consistent workflow index header across all CI/CD workflow files.
+
 ## [0.1.0] - 2026-__-__
 
+### Added
+- `DatasetFormat.context_field` — optional field to declare the JSONL column holding retrieved context documents; used by `GroundednessEvaluator` in both cloud and local evaluation modes.
+- `TaskCompletionEvaluator` support in the Foundry backend: default `input_mapping` and cloud `data_mapping` for both cloud and local modes.
+- `ToolCallAccuracyEvaluator` support in the Foundry backend: `_EVALUATORS_NEEDING_TOOL_CALLS` set, cloud `data_mapping` (maps `tool_calls` from `{{sample.tool_calls}}` and `tool_definitions` from `{{item.tool_definitions}}`), and local `input_mapping`.
+- `agent_tools_baseline` bundle upgraded from `SimilarityEvaluator` placeholder to `TaskCompletionEvaluator` + `ToolCallAccuracyEvaluator` with matching thresholds.
+- `smoke-agent-tools.jsonl` enriched with `tool_definitions` and `tool_calls` fields for all 5 rows.
+- Unit tests covering `_cloud_evaluator_data_mapping` (context_field, task_completion, tool_call_accuracy) and `_default_foundry_input_mapping` (GroundednessEvaluator, TaskCompletionEvaluator, ToolCallAccuracyEvaluator).
+
+### Fixed
+- `GroundednessEvaluator` in cloud mode now maps `context` to `{{item.<context_field>}}` when `context_field` is set in the dataset format, instead of incorrectly using the `expected_field` column.
+- `GroundednessEvaluator` in local mode now maps `context` to `$row.context` (the retrieved documents column) instead of `$expected` (the ground truth answer).
+- `smoke-rag.yaml` dataset config now declares `context_field: context` to correctly wire the `context` JSONL column to groundedness evaluation.
+
 ### Changed
 - Split `agentops init` dataset seeds into `.agentops/datasets/` for YAML definitions and `.agentops/data/` for JSONL rows, and updated docs/examples to use the new layout.
 - Expanded `agentops init` run-config seeds to include scenario-specific examples: `.agentops/run-rag.yaml` and `.agentops/run-agent.yaml` in addition to the default `.agentops/run.yaml`.
diff --git a/README.md b/README.md
index fb03e05..26ef35a 100644
--- a/README.md
+++ b/README.md
@@ -170,7 +170,7 @@ Starter bundles created by `agentops init`:
 | `agentops --version` | Show installed version | ✅ |
 | `agentops init [--path DIR]` | Scaffold project workspace and starter files | ✅ |
 | `agentops eval run` | Evaluate a dataset against a bundle | ✅ |
-| `agentops eval compare --runs ID1,ID2` | Compare two past runs | 🚧 |
+| `agentops eval compare --runs ID1,ID2` | Compare two past runs | ✅ |
 | `agentops run list\|show` | List or inspect past runs | 🚧 |
 | `agentops run view <id> [--entry N]` | Deep run inspection | 🚧 |
 | `agentops report` | Regenerate `report.md` from `results.json` | ✅ |
@@ -213,9 +213,43 @@ High-level code layout:
 - Foundry agent tutorial: [docs/tutorial-basic-foundry-agent.md](docs/tutorial-basic-foundry-agent.md)
 - Model-direct tutorial: [docs/tutorial-model-direct.md](docs/tutorial-model-direct.md)
 - RAG tutorial: [docs/tutorial-rag.md](docs/tutorial-rag.md)
+- Baseline comparison tutorial: [docs/tutorial-baseline-comparison.md](docs/tutorial-baseline-comparison.md)
+- Copilot skills installation: [docs/tutorial-copilot-skills.md](docs/tutorial-copilot-skills.md)
 - Built-in evaluator notes: [docs/foundry-evaluation-sdk-built-in-evaluators.md](docs/foundry-evaluation-sdk-built-in-evaluators.md)
 - CI/CD setup guide: [docs/ci-github-actions.md](docs/ci-github-actions.md)
 
+## GitHub Copilot Skills
+
+AgentOps publishes Copilot skills that teach GitHub Copilot how to use the evaluation CLI correctly. Install them from this repository to get AI-assisted guidance for running evaluations, investigating regressions, and triage workflows.
+
+### Available Skills
+
+| Skill | Description |
+|---|---|
+| `agentops-run-evals` | Guides evaluation workflow — init, run, report, compare |
+| `agentops-investigate-regression` | Regression investigation — metric deltas, threshold flips, actionable checks |
+| `agentops-observability-triage` | Observability and triage — current capabilities vs planned features |
+
+### Installation
+
+Skills are distributed from this GitHub repository. Install them in VS Code:
+
+1. Open **VS Code** with **GitHub Copilot Chat** enabled.
+2. Use the Copilot skill install command and point to this repository:
+   - Source: `Azure/agentops`
+   - Skills are located under `.github/plugins/agentops/skills/`
+3. Once installed, Copilot will automatically use the skills when you ask about AgentOps evaluation, regressions, or observability.
+
+Alternatively, you can copy the skill files manually:
+```bash
+# Copy skills to your user-level skills directory
+cp -r .github/plugins/agentops/skills/* ~/.agents/skills/
+```
+
+### For Repository Contributors
+
+If you're working inside this repo, the skills under `.github/skills/` are automatically available to Copilot when the repository is your active workspace.
+
 ## Contributing
 
 See [CONTRIBUTING.md](CONTRIBUTING.md) for architecture rules, testing expectations, and contribution workflow.
diff --git a/docs/ci-github-actions.md b/docs/ci-github-actions.md
index d20bf9a..48e5fc6 100644
--- a/docs/ci-github-actions.md
+++ b/docs/ci-github-actions.md
@@ -32,12 +32,12 @@ This guide explains how to add AgentOps evaluation to your CI pipeline using Git
 
 Your repository must contain these files for the workflow to succeed:
 
-| File | Purpose |
-| --- | --- |
-| `.agentops/run.yaml` | Run specification — references the bundle, dataset, and backend |
-| `.agentops/bundles/<name>.yaml` | Evaluation bundle — evaluators + thresholds |
-| `.agentops/datasets/<name>.yaml` | Dataset metadata |
-| `.agentops/datasets/<name>.jsonl` | Dataset rows (JSONL format) |
+| File                              | Purpose                                                         |
+| --------------------------------- | --------------------------------------------------------------- |
+| `.agentops/run.yaml`              | Run specification — references the bundle, dataset, and backend |
+| `.agentops/bundles/<name>.yaml`   | Evaluation bundle — evaluators + thresholds                     |
+| `.agentops/datasets/<name>.yaml`  | Dataset metadata                                                |
+| `.agentops/datasets/<name>.jsonl` | Dataset rows (JSONL format)                                     |
 
 All paths in `run.yaml` are relative to the `.agentops/` directory.
 
@@ -81,16 +81,16 @@ The workflow uses **Workload Identity Federation (OIDC)** — no client secrets
 
 Set these as **repository variables** (not secrets — they are not confidential):
 
-| Variable | Value |
-| --- | --- |
-| `AZURE_CLIENT_ID` | Application (client) ID |
-| `AZURE_TENANT_ID` | Directory (tenant) ID |
-| `AZURE_SUBSCRIPTION_ID` | Azure subscription ID |
+| Variable                | Value                   |
+| ----------------------- | ----------------------- |
+| `AZURE_CLIENT_ID`       | Application (client) ID |
+| `AZURE_TENANT_ID`       | Directory (tenant) ID   |
+| `AZURE_SUBSCRIPTION_ID` | Azure subscription ID   |
 
 Set this as a **repository secret**:
 
-| Secret | Value |
-| --- | --- |
+| Secret                              | Value                        |
+| ----------------------------------- | ---------------------------- |
 | `AZURE_AI_FOUNDRY_PROJECT_ENDPOINT` | Foundry project endpoint URL |
 
 Go to **Settings** → **Secrets and variables** → **Actions** → **Variables** tab (for variables) or **Secrets** tab (for the endpoint).
@@ -101,9 +101,9 @@ Go to **Settings** → **Secrets and variables** → **Actions** → **Variables
 
 The template workflow triggers on:
 
-| Trigger | When |
-| --- | --- |
-| `pull_request` | Any PR targeting `main` or `develop` |
+| Trigger             | When                                                                               |
+| ------------------- | ---------------------------------------------------------------------------------- |
+| `pull_request`      | Any PR targeting `main` or `develop`                                               |
 | `workflow_dispatch` | Manual run from the Actions tab (supports custom config path and output directory) |
 
 To change which branches trigger evaluations, edit the `on.pull_request.branches` array in the workflow file.
@@ -114,11 +114,11 @@ To change which branches trigger evaluations, edit the `on.pull_request.branches
 
 AgentOps returns CI-friendly exit codes that GitHub Actions interprets directly:
 
-| Exit Code | Meaning | CI Result |
-| --- | --- | --- |
-| `0` | Evaluation succeeded, all thresholds passed | ✅ Job passes |
-| `2` | Evaluation succeeded, one or more thresholds failed | ❌ Job fails |
-| `1` | Runtime or configuration error | ❌ Job fails |
+| Exit Code | Meaning                                             | CI Result    |
+| --------- | --------------------------------------------------- | ------------ |
+| `0`       | Evaluation succeeded, all thresholds passed         | ✅ Job passes |
+| `2`       | Evaluation succeeded, one or more thresholds failed | ❌ Job fails  |
+| `1`       | Runtime or configuration error                      | ❌ Job fails  |
 
 No special handling is needed — GitHub Actions fails the job on any non-zero exit code.
 
@@ -128,14 +128,14 @@ No special handling is needed — GitHub Actions fails the job on any non-zero e
 
 The workflow uploads the following files as a GitHub Actions artifact named `agentops-eval-results`:
 
-| File | Description |
-| --- | --- |
-| `results.json` | Machine-readable evaluation results (versioned schema) |
-| `report.md` | Human-readable Markdown summary |
-| `backend_metrics.json` | Raw backend scores per row |
+| File                    | Description                                                    |
+| ----------------------- | -------------------------------------------------------------- |
+| `results.json`          | Machine-readable evaluation results (versioned schema)         |
+| `report.md`             | Human-readable Markdown summary                                |
+| `backend_metrics.json`  | Raw backend scores per row                                     |
 | `cloud_evaluation.json` | Cloud eval metadata with Foundry portal link (cloud mode only) |
-| `backend.stdout.log` | Backend stdout capture |
-| `backend.stderr.log` | Backend stderr capture |
+| `backend.stdout.log`    | Backend stdout capture                                         |
+| `backend.stderr.log`    | Backend stderr capture                                         |
 
 Artifacts are uploaded even when the evaluation fails (`if: always()`), so you can always inspect results.
 
@@ -174,10 +174,10 @@ agentops config cicd
 
 Options:
 
-| Flag | Description | Default |
-| --- | --- | --- |
+| Flag         | Description                      | Default                 |
+| ------------ | -------------------------------- | ----------------------- |
 | `--dir PATH` | Target repository root directory | `.` (current directory) |
-| `--force` | Overwrite existing workflow file | `false` |
+| `--force`    | Overwrite existing workflow file | `false`                 |
 
 ### Regenerate (overwrite)
 
@@ -243,10 +243,28 @@ Remove or comment out the "Post report as PR comment" step in the workflow.
 
 ## Troubleshooting
 
-| Problem | Solution |
-| --- | --- |
-| `Error: evaluation failed: ...` (exit 1) | Check that `.agentops/run.yaml` exists, config is valid YAML, and secrets are set |
-| `Threshold status: FAILED` (exit 2) | Review `report.md` — thresholds are too strict or model quality regressed |
-| Missing artifacts | Ensure `.agentops/results/latest/` is not in `.gitignore` — the workflow reads this path |
-| Authentication errors | Verify the federated credential entity matches your repo/branch; check that `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID` are set as repository variables; confirm the app registration has access to the Foundry project |
-| `agentops: command not found` | Ensure `pip install agentops-toolkit` runs before the eval step |
+| Problem                                  | Solution                                                                                                                                                                                                                                  |
+| ---------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `Error: evaluation failed: ...` (exit 1) | Check that `.agentops/run.yaml` exists, config is valid YAML, and secrets are set                                                                                                                                                         |
+| `Threshold status: FAILED` (exit 2)      | Review `report.md` — thresholds are too strict or model quality regressed                                                                                                                                                                 |
+| Missing artifacts                        | Ensure `.agentops/results/latest/` is not in `.gitignore` — the workflow reads this path                                                                                                                                                  |
+| Authentication errors                    | Verify the federated credential entity matches your repo/branch; check that `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID` are set as repository variables; confirm the app registration has access to the Foundry project |
+| `agentops: command not found`            | Ensure `pip install agentops-toolkit` runs before the eval step                                                                                                                                                                           |
+
+---
+
+## Internal CI/CD Workflows (Contributors)
+
+If you are contributing to the agentops-toolkit repository itself, the project has separate CI/CD workflows for building and releasing the package:
+
+| Workflow          | Trigger                                    | Purpose                                                                   |
+| ----------------- | ------------------------------------------ | ------------------------------------------------------------------------- |
+| `ci.yml`          | Push to `develop`, PRs to `main`/`develop` | Lint (ruff) + test (matrix) + coverage                                    |
+| `_build.yml`      | Called by staging/release                  | Reusable lint + test + build package                                      |
+| `staging.yml`     | Push to `release/**`                       | Build → TestPyPI → verify install                                         |
+| `release.yml`     | Push `v*` tag                              | TestPyPI → PyPI (with approval) → GitHub Release                          |
+| `cut-release.yml` | Manual dispatch (Actions tab button)       | Create release branch from `develop`, update CHANGELOG, open PR to `main` |
+
+The **Cut Release** workflow provides a one-click way to start a release: enter a version number in the Actions UI, and it creates the release branch, updates the changelog, and opens the PR automatically.
+
+For full details, see [release-process.md](release-process.md).
diff --git a/docs/how-it-works.md b/docs/how-it-works.md
index 8dadbab..5c6a4e9 100644
--- a/docs/how-it-works.md
+++ b/docs/how-it-works.md
@@ -109,7 +109,7 @@ When you run `agentops eval run`, the following happens step by step:
 |---|---|---|
 | `agentops init [--path DIR]` | Scaffold `.agentops/` workspace with starter config, bundles, datasets, and data | Available |
 | `agentops eval run` | Execute an evaluation (main command) | Available |
-| `agentops eval compare --runs ID1,ID2` | Compare two past evaluation runs | Planned (stub) |
+| `agentops eval compare --runs ID1,ID2` | Compare two past evaluation runs | Available |
 | `agentops run list\|show` | List or inspect past runs | Planned (stub) |
 | `agentops run view <id> [--entry N]` | Deep-inspect a run | Planned (stub) |
 | `agentops report [--in <path>] [--out <path>]` | Regenerate `report.md` from `results.json` | Available |
diff --git a/docs/release-process.md b/docs/release-process.md
new file mode 100644
index 0000000..2c91b7a
--- /dev/null
+++ b/docs/release-process.md
@@ -0,0 +1,939 @@
+# GitOps Guide: Building and Releasing AgentOps Toolkit
+
+This guide is a comprehensive instruction manual for engineers working on the **agentops-toolkit** project. It covers the full GitOps lifecycle — from setting up your development environment, through the branching model and CI pipeline, to staging and production releases.
+
+---
+
+## Table of Contents
+
+- [1. GitOps Principles](#1-gitops-principles)
+- [2. Branching Model](#2-branching-model)
+- [3. Development Environment Setup](#3-development-environment-setup)
+- [4. Development Workflow](#4-development-workflow)
+- [5. CI Pipeline (Continuous Integration)](#5-ci-pipeline-continuous-integration)
+- [6. Versioning with setuptools-scm](#6-versioning-with-setuptools-scm)
+- [7. Staging Pipeline (TestPyPI)](#7-staging-pipeline-testpypi)
+- [8. End-to-End Pipeline Testing](#8-end-to-end-pipeline-testing)
+- [9. Production Release Pipeline (PyPI)](#9-production-release-pipeline-pypi)
+- [10. Infrastructure Setup](#10-infrastructure-setup)
+- [11. Workflow File Reference](#11-workflow-file-reference)
+- [12. Release Checklist](#12-release-checklist)
+- [13. Troubleshooting](#13-troubleshooting)
+
+---
+
+## 1. GitOps Principles
+
+AgentOps follows GitOps practices where **git is the single source of truth** for both code and operational state:
+
+- **Declarative configuration** — All pipeline behavior is defined in YAML workflow files checked into the repository.
+- **Version-controlled releases** — Every release is traceable to a git tag. No manual version edits.
+- **Automated pipelines** — Pushing branches or tags triggers the corresponding workflow automatically.
+- **Environment gates** — Production deployment requires explicit human approval via GitHub Environments.
+- **Immutable artifacts** — Built packages are uploaded once and reused across pipeline stages (no rebuilds between TestPyPI and PyPI).
+
+---
+
+## 2. Branching Model
+
+AgentOps uses a modified [Git Flow](https://nvie.com/posts/a-successful-git-branching-model/) strategy:
+
+```
+main              ← always production-ready, receives merges from release/* branches
+  │
+develop           ← integration branch, all feature PRs target here
+  │
+  ├── feature/*   ← individual features branched from develop
+  │
+  └── release/*   ← release preparation, branched from develop when ready to ship
+```
+
+### Branch Purposes
+
+| Branch           | Purpose                                                              | Who creates      | Merges into                   |
+| ---------------- | -------------------------------------------------------------------- | ---------------- | ----------------------------- |
+| `main`           | Production-ready code. Every commit here should be a tagged release. | Maintainers only | —                             |
+| `develop`        | Integration branch. All feature work flows through here.             | —                | `main` (via release branches) |
+| `feature/*`      | Individual features, bug fixes, or improvements.                     | Any contributor  | `develop`                     |
+| `release/v0.X.Y` | Release stabilization and staging. Triggers TestPyPI pipeline.       | Maintainers      | `main`                        |
+
+### Branch Lifecycle
+
+```
+1. feature/my-change ──PR──→ develop       (contributor)
+2. develop ──branch──→ release/v0.2.0      (maintainer, when ready to release)
+3. release/v0.2.0 ──PR──→ main            (maintainer, after staging validates)
+4. main ──tag──→ v0.2.0                    (maintainer, triggers production release)
+5. main ──merge──→ develop                 (maintainer, sync the tag back)
+6. release/v0.2.0 ──delete──               (maintainer, cleanup)
+```
+
+### Branch Protection Rules (Recommended)
+
+Configure these in **Settings → Branches → Branch protection rules**:
+
+| Branch      | Rules                                                                    |
+| ----------- | ------------------------------------------------------------------------ |
+| `main`      | Require PR, require status checks (CI), require approvals, no force push |
+| `develop`   | Require PR, require status checks (CI), no force push                    |
+| `release/*` | Require status checks (Staging pipeline), no force push                  |
+
+---
+
+## 3. Development Environment Setup
+
+### Prerequisites
+
+- Python 3.11 or later
+- [uv](https://docs.astral.sh/uv/) (recommended) or pip
+- Git with access to the repository
+
+### First-Time Setup
+
+```bash
+# 1. Clone the repository
+git clone https://github.com/Azure/agentops.git
+cd agentops
+
+# 2. Install uv (if not already installed)
+# macOS/Linux:
+curl -LsSf https://astral.sh/uv/install.sh | sh
+# Windows:
+powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
+
+# 3. Install the project and dev dependencies
+uv sync --group dev
+
+# 4. Verify the installation
+uv run agentops --version
+uv run pytest tests/ -x -q
+```
+
+### Alternative Setup (pip)
+
+```bash
+python -m venv .venv
+# Windows:
+.venv\Scripts\Activate.ps1
+# macOS/Linux:
+source .venv/bin/activate
+
+pip install -e .
+pip install pytest
+agentops --version
+python -m pytest tests/ -x -q
+```
+
+### Verify Your Setup
+
+After installation, these commands should all succeed:
+
+```bash
+# CLI works
+agentops --version          # Shows version like 0.1.3.dev6
+agentops --help             # Shows available commands
+
+# Tests pass
+uv run pytest tests/ -x -q  # All tests should pass
+
+# Version from git
+python -m setuptools_scm    # Shows version derived from git tags
+```
+
+---
+
+## 4. Development Workflow
+
+### Creating a Feature
+
+```bash
+# 1. Start from the latest develop
+git checkout develop
+git pull origin develop
+
+# 2. Create your feature branch
+git checkout -b feature/my-new-feature
+
+# 3. Make changes, commit, push
+# ... edit files ...
+uv run pytest tests/ -x -q          # Run tests before committing
+git add .
+git commit -m "feat: add my new feature"
+git push origin feature/my-new-feature
+
+# 4. Open a PR targeting develop
+#    GitHub will run the CI pipeline automatically
+```
+
+### PR Requirements
+
+Before your PR can be merged to `develop`:
+
+1. **CI pipeline passes** — lint + tests across OS/Python matrix
+2. **Code review approved** — at least one reviewer
+3. **Architecture rules followed** — see [CONTRIBUTING.md](../CONTRIBUTING.md)
+4. **Tests included** — unit tests in `tests/unit/`, integration tests if needed
+5. **CHANGELOG updated** — add entry under `[Unreleased]` for user-visible changes
+
+### After Your PR is Merged
+
+```bash
+# Sync your local develop
+git checkout develop
+git pull origin develop
+
+# Delete your feature branch
+git branch -d feature/my-new-feature
+```
+
+---
+
+## 5. CI Pipeline (Continuous Integration)
+
+The CI pipeline runs on **every push and PR** to `main` or `develop`.
+
+**Workflow file**: `.github/workflows/ci.yml`
+
+### Jobs
+
+| Job | What it does | Runs on |
+| --- | --- | --- |
+| **lint** | `ruff check` (linting) + `mypy` (type checking, soft-fail) | Ubuntu, Python 3.11 |
+| **test** | `pytest tests/` with JUnit XML output | Matrix: 2 OS × 3 Python versions |
+| **coverage** | `pytest --cov` with XML coverage report | Ubuntu, Python 3.13 (after tests pass) |
+| **publish-dev** | Build package + publish to TestPyPI (develop pushes only) | Ubuntu, Python 3.12 (after lint + test pass) |
+| **verify-dev** | Install from TestPyPI + smoke test (develop pushes only) | Ubuntu, Python 3.12 (after publish-dev) |
+
+The `publish-dev` and `verify-dev` jobs only run on pushes to `develop` (not on PRs). Every merged PR automatically produces an installable dev build on TestPyPI with a version like `0.1.3.dev12`.
+
+### Test Matrix
+
+| OS      | Python 3.11 | Python 3.12 | Python 3.13 |
+| ------- | ----------- | ----------- | ----------- |
+| Ubuntu  | ✅           | ✅           | ✅           |
+| Windows | ✅           | ✅           | ✅           |
+
+### What CI Catches
+
+- Syntax and style issues (ruff)
+- Type errors (mypy, non-blocking)
+- Test failures across platforms
+- Import errors or missing dependencies
+- Regression in exit code behavior
+
+### Viewing CI Results
+
+1. Go to the **Actions** tab → find the CI run for your PR
+2. Click into a failing job to see the error
+3. Download test result artifacts if needed
+
+---
+
+## 6. Versioning with setuptools-scm
+
+AgentOps uses [setuptools-scm](https://github.com/pypa/setuptools-scm) for **fully automatic versioning**. There is **no `version` field in `pyproject.toml`** — the version is derived from git tags at build time.
+
+### How It Works
+
+setuptools-scm reads your git history and computes the version:
+
+| Git state                                     | Example version | Explanation                   |
+| --------------------------------------------- | --------------- | ----------------------------- |
+| Exactly on tag `v0.2.0`                       | `0.2.0`         | Clean release version         |
+| 3 commits after `v0.2.0`                      | `0.2.1.dev3`    | Dev version, 3 commits ahead  |
+| 10 commits after `v0.1.2` on `release/v0.2.0` | `0.1.3.dev10`   | Dev version on release branch |
+
+### Configuration
+
+In `pyproject.toml`:
+
+```toml
+[build-system]
+requires = ["setuptools>=68", "wheel", "setuptools-scm>=8"]
+
+[project]
+dynamic = ["version"]    # Version comes from setuptools-scm, not a static field
+
+[tool.setuptools_scm]
+local_scheme = "no-local-version"   # Strips +hash suffix (PyPI rejects local versions)
+```
+
+### Checking the Version
+
+```bash
+# From the installed CLI
+agentops --version
+
+# From setuptools-scm directly
+python -m setuptools_scm
+
+# From Python code
+python -c "from agentops import __version__; print(__version__)"
+```
+
+### Rules
+
+- **Never add `version = "..."` to `pyproject.toml`** — this will conflict with setuptools-scm.
+- **Tags must follow PEP 440** — use `v0.2.0`, not `release-0.2.0` or `0.2.0`.
+- **`fetch-depth: 0`** is required in CI checkout steps — setuptools-scm needs the full git history.
+- **`pip install -e .` requires `.git`** — editable installs need the git directory present (standard for development).
+
+---
+
+## 7. Staging Pipeline (TestPyPI)
+
+The staging pipeline validates a release candidate by publishing to TestPyPI and verifying the installed package works.
+
+**Workflow file**: `.github/workflows/staging.yml`
+
+**Trigger**: Push to any `release/*` branch
+
+### Pipeline Flow
+
+```
+push to release/v0.2.0
+        │
+   ┌────▼────────┐
+   │   _build     │  ← Reusable workflow: test + build package
+   │  (tests +    │     Version: 0.2.1.dev3 (from setuptools-scm)
+   │   package)   │
+   └────┬────────┘
+        │
+   ┌────▼───────────┐
+   │ publish-testpypi │  ← Upload to TestPyPI (staging environment)
+   │                   │     Uses TEST_PYPI_TOKEN secret
+   └────┬───────────┘
+        │
+   ┌────▼───────────┐
+   │ verify-testpypi  │  ← Install from TestPyPI in a fresh environment
+   │                   │     Run: agentops --version
+   │                   │     Run: agentops --help
+   │                   │     Run: agentops init (in temp directory)
+   └─────────────────┘
+```
+
+### What Gets Validated
+
+1. **Tests pass** — the full test suite runs before building
+2. **Package builds** — setuptools-scm generates the correct version, wheel and sdist are created
+3. **Package uploads** — the built artifacts successfully upload to TestPyPI
+4. **Package installs** — `pip install` from TestPyPI resolves all dependencies
+5. **CLI works** — `agentops --version` and `--help` run without errors
+6. **Init works** — `agentops init` creates the expected workspace files
+
+### Iterating on a Release Branch
+
+If staging fails, fix the issue and push again:
+
+```bash
+# On your release/v0.2.0 branch
+# ... fix the issue ...
+git add .
+git commit -m "fix: correct packaging issue"
+git push origin release/v0.2.0
+# Staging pipeline re-runs automatically
+```
+
+Each push generates a new dev version (e.g. `0.2.1.dev4`, `0.2.1.dev5`), so there are no version conflicts on TestPyPI. The `skip-existing: true` flag also prevents failures if the same version is re-uploaded.
+
+### Manual Verification (Optional)
+
+After the staging pipeline passes, you can manually test the package:
+
+```bash
+# Install the specific dev version from TestPyPI
+pip install "agentops-toolkit==0.2.1.dev3" \
+  --index-url https://test.pypi.org/simple/ \
+  --extra-index-url https://pypi.org/simple/
+
+agentops --version
+agentops --help
+
+# Test init in a temp directory
+cd $(mktemp -d)
+agentops init
+ls .agentops/
+```
+
+> **Note**: `--extra-index-url https://pypi.org/simple/` is required so that dependencies (typer, pydantic, ruamel.yaml) resolve from the real PyPI.
+
+---
+
+## 8. End-to-End Pipeline Testing
+
+Before cutting a real release, you can validate the entire pipeline end-to-end using a disposable test branch and tag. This is especially useful when:
+
+- You've modified any workflow file (`_build.yml`, `staging.yml`, `release.yml`)
+- You've changed `pyproject.toml` build configuration
+- You've updated setuptools-scm settings
+- A new engineer wants to understand the release process hands-on
+
+### 8.1 Test the Staging Pipeline
+
+#### Step 1: Create a Test Release Branch
+
+From the branch that contains your workflow changes (or from `develop`):
+
+```bash
+git checkout develop          # or your feature branch with workflow changes
+git pull origin develop
+git checkout -b release/v0.0.0-test
+git push origin release/v0.0.0-test
+```
+
+This triggers the `staging.yml` workflow automatically.
+
+#### Step 2: Monitor the Pipeline
+
+1. Go to **Actions** tab → find the **Staging** workflow run for `release/v0.0.0-test`
+2. Watch all 3 jobs:
+
+```
+Job 1: build / build        → Should tests pass? Package build?
+Job 2: publish-testpypi     → Does TestPyPI upload succeed?
+Job 3: verify-testpypi      → Can the package install and run?
+```
+
+3. Click into each job to inspect step-level output
+4. If a job fails, read the logs, fix the issue, push again:
+
+```bash
+# Fix and re-push
+git add .
+git commit -m "fix: correct workflow issue"
+git push origin release/v0.0.0-test
+# Pipeline re-runs automatically
+```
+
+#### Step 3: Verify on TestPyPI (Optional)
+
+Confirm the test package appeared on TestPyPI:
+
+```bash
+# Check the version that was published
+python -m setuptools_scm
+
+# Install and test manually
+pip install "agentops-toolkit==$(python -m setuptools_scm)" \
+  --index-url https://test.pypi.org/simple/ \
+  --extra-index-url https://pypi.org/simple/
+
+agentops --version
+agentops --help
+
+# Test init
+cd $(mktemp -d)
+agentops init
+ls .agentops/
+```
+
+#### Step 4: Clean Up the Test Branch
+
+```bash
+# Delete remote branch
+git push origin --delete release/v0.0.0-test
+
+# Switch back and delete local branch
+git checkout develop
+git branch -d release/v0.0.0-test
+```
+
+### 8.2 Test the Full Release Pipeline (Including PyPI Approval Gate)
+
+> **Warning**: This will publish a test version to PyPI if you approve it. Only do this if you want to validate the full production flow. You can cancel at the approval gate to skip the actual PyPI publish.
+
+#### Step 1: Create a Test Tag
+
+From `develop` or your feature branch:
+
+```bash
+git tag v0.0.0-test.1
+git push origin v0.0.0-test.1
+```
+
+This triggers the `release.yml` workflow.
+
+#### Step 2: Monitor the Pipeline
+
+1. Go to **Actions** tab → find the **Release** workflow run for `v0.0.0-test.1`
+2. Watch the jobs execute in sequence:
+
+```
+Job 1: build / build        ✅ Tests + build
+Job 2: publish-testpypi     ✅ Upload to TestPyPI
+Job 3: verify-testpypi      ✅ Install + smoke test
+Job 4: publish-pypi         ⏸️  PAUSES — waiting for approval
+Job 5: github-release       ⏳ Waiting for Job 4
+```
+
+3. At the `publish-pypi` step, you have two choices:
+   - **Approve** — publishes to real PyPI (use only if you want to test the full flow)
+   - **Reject** — cancels the remaining jobs without publishing to PyPI
+
+#### Step 3: Inspect the Approval Gate
+
+1. Click on the **Release** workflow run
+2. The `publish-pypi` job shows a yellow "Waiting" badge
+3. Click **Review deployments**
+4. Select the **release** environment
+5. Choose **Reject** to cancel without publishing, or **Approve and deploy** to continue
+
+This validates that the environment protection rules and reviewer requirements work correctly.
+
+#### Step 4: Clean Up
+
+```bash
+# Delete the test tag (remote and local)
+git push origin --delete v0.0.0-test.1
+git tag -d v0.0.0-test.1
+
+# If a GitHub Release was created, delete it manually:
+# Go to Releases → find v0.0.0-test.1 → Delete
+```
+
+If you approved the PyPI publish, the test version (`0.0.0.test1`) will exist on PyPI permanently (PyPI versions cannot be deleted, only yanked). This is harmless but visible.
+
+### 8.3 Quick E2E Test Summary
+
+| What to test        | Command                                                              | What to watch                     |
+| ------------------- | -------------------------------------------------------------------- | --------------------------------- |
+| Staging only        | `git push origin release/v0.0.0-test`                                | 3 jobs: build → TestPyPI → verify |
+| Full release (safe) | `git push origin v0.0.0-test.1` then **reject** at approval          | 4 jobs run, approval gate works   |
+| Full release (real) | `git push origin v0.0.0-test.1` then **approve**                     | All 5 jobs, package on PyPI       |
+| Cleanup (branch)    | `git push origin --delete release/v0.0.0-test`                       | Branch removed                    |
+| Cleanup (tag)       | `git push origin --delete v0.0.0-test.1 && git tag -d v0.0.0-test.1` | Tag removed                       |
+
+### 8.4 Testing Workflow Changes on a Feature Branch
+
+If you're modifying the workflow files on a feature branch (not yet merged to `develop`), you can still test them:
+
+```bash
+# Your workflow changes are on feature/my-ci-changes
+git checkout feature/my-ci-changes
+
+# Create a test release branch directly from your feature branch
+git checkout -b release/v0.0.0-test
+git push origin release/v0.0.0-test
+
+# GitHub Actions uses the workflow files from the pushed branch,
+# so your modifications are what actually runs
+```
+
+This is useful because GitHub Actions reads workflow files from the branch being pushed, not from `main` or `develop`. Your modified workflows execute immediately without needing to merge first.
+
+After testing:
+
+```bash
+# Clean up
+git push origin --delete release/v0.0.0-test
+git checkout feature/my-ci-changes
+git branch -d release/v0.0.0-test
+```
+
+---
+
+## 9. Production Release Pipeline (PyPI)
+
+The production pipeline publishes a final release to PyPI and creates a GitHub Release.
+
+**Workflow file**: `.github/workflows/release.yml`
+
+**Trigger**: Push a `v*` tag (e.g. `v0.2.0`)
+
+### Pipeline Flow
+
+```
+push tag v0.2.0
+        │
+   ┌────▼────────┐
+   │   _build     │  ← Same reusable build as staging
+   │  (tests +    │     Version: 0.2.0 (clean, from tag)
+   │   package)   │
+   └────┬────────┘
+        │
+   ┌────▼───────────┐
+   │ publish-testpypi │  ← Final TestPyPI upload (clean version)
+   └────┬───────────┘
+        │
+   ┌────▼───────────┐
+   │ verify-testpypi  │  ← Smoke test from TestPyPI
+   └────┬───────────┘
+        │
+   ┌────▼───────────┐
+   │  publish-pypi    │  ← ⏸️ PAUSES HERE — requires approval
+   │                   │     Uses PYPI_TOKEN secret
+   │  (environment:   │     Designated reviewers must approve
+   │   release)       │
+   └────┬───────────┘
+        │
+   ┌────▼───────────┐
+   │ github-release   │  ← Creates GitHub Release with artifacts
+   │                   │     Generates release notes automatically
+   └─────────────────┘
+```
+
+### Step-by-Step: Cutting a Release
+
+#### Step 1: Cut the Release (One-Click)
+
+1. Go to the **Actions** tab → select **Cut Release** workflow
+2. Click **Run workflow**
+3. Enter the version (e.g. `0.2.0`) — no `v` prefix
+4. Click **Run workflow**
+
+The workflow automatically:
+- Creates `release/v0.2.0` from `develop`
+- Updates `CHANGELOG.md` (`[Unreleased]` → `[0.2.0] - YYYY-MM-DD`)
+- Pushes the branch (triggers [staging pipeline](#7-staging-pipeline-testpypi))
+- Opens a PR: `release/v0.2.0` → `main`
+
+> **Alternative (manual)**: If you prefer to create the release branch locally:
+> ```bash
+> git checkout develop && git pull origin develop
+> git checkout -b release/v0.2.0
+> # Edit CHANGELOG.md manually
+> git commit -m "chore: prepare release 0.2.0"
+> git push origin release/v0.2.0
+> ```
+
+#### Step 2: Wait for Staging
+
+The branch push triggers the staging pipeline automatically. Wait for it to pass.
+
+#### Step 3: Monitor Staging
+
+1. Go to **Actions** tab → find the **Staging** workflow run
+2. Verify all 3 jobs pass:
+   - ✅ `build / build` — tests pass, package builds
+   - ✅ `publish-testpypi` — uploaded to TestPyPI
+   - ✅ `verify-testpypi` — installed and smoke-tested
+
+If any job fails, fix the issue on the release branch and push. The pipeline re-runs automatically.
+
+#### Step 4: Merge to Main
+
+Create a PR from `release/v0.2.0` → `main` (or use the one already opened by Cut Release):
+
+1. Go to GitHub → **Pull Requests** → **New Pull Request**
+2. Base: `main` ← Compare: `release/v0.2.0`
+3. Title: `Release v0.2.0`
+4. Get the required reviews and merge
+
+#### Step 5: Tag the Release
+
+```bash
+git checkout main
+git pull origin main
+git tag v0.2.0
+git push origin v0.2.0
+```
+
+This triggers the [production release pipeline](#8-production-release-pipeline-pypi).
+
+#### Step 6: Approve the PyPI Publish
+
+1. Go to **Actions** tab → find the **Release** workflow run for `v0.2.0`
+2. The pipeline will run through build → TestPyPI → verify
+3. At the `publish-pypi` job, it pauses with **"Waiting for review"**
+4. Click **Review deployments** → select the **release** environment → **Approve and deploy**
+5. The package publishes to PyPI
+6. The `github-release` job creates a GitHub Release with the built artifacts and auto-generated release notes
+
+#### Step 7: Post-Release Cleanup
+
+```bash
+# Sync the tag back to develop
+git checkout develop
+git pull origin develop
+git merge main
+git push origin develop
+
+# Delete the release branch (remote and local)
+git push origin --delete release/v0.2.0
+git branch -d release/v0.2.0
+```
+
+#### Step 8: Verify the Published Package
+
+```bash
+# Install from PyPI
+pip install agentops-toolkit==0.2.0
+
+# Verify
+agentops --version    # Should show 0.2.0
+agentops --help
+```
+
+Check the published package:
+- PyPI: https://pypi.org/project/agentops-toolkit/0.2.0/
+- GitHub Release: https://github.com/Azure/agentops/releases/tag/v0.2.0
+
+---
+
+## 10. Infrastructure Setup
+
+This section covers one-time setup required before the pipelines can run.
+
+### 10.1 GitHub Environments
+
+Create two environments in **Settings → Environments → New environment**:
+
+#### `staging` Environment
+
+- **Purpose**: Controls access to TestPyPI publishing
+- **Protection rules**: None required (auto-deploys), or add reviewers for extra safety
+- **Secrets**:
+
+  | Secret            | Value              | How to get it                                                                     |
+  | ----------------- | ------------------ | --------------------------------------------------------------------------------- |
+  | `TEST_PYPI_TOKEN` | TestPyPI API token | [test.pypi.org/manage/account/token](https://test.pypi.org/manage/account/token/) |
+
+#### `release` Environment
+
+- **Purpose**: Controls access to production PyPI publishing
+- **Protection rules**: **Required reviewers** — add at least one team member who must approve
+- **Deployment branches**: Optionally restrict to `main` branch and `v*` tags
+- **Secrets**:
+
+  | Secret       | Value                                         | How to get it                                                           |
+  | ------------ | --------------------------------------------- | ----------------------------------------------------------------------- |
+  | `PYPI_TOKEN` | PyPI API token (scoped to `agentops-toolkit`) | [pypi.org/manage/account/token](https://pypi.org/manage/account/token/) |
+
+### 10.2 PyPI and TestPyPI Accounts
+
+#### TestPyPI (Staging)
+
+1. Go to [test.pypi.org/account/register](https://test.pypi.org/account/register/)
+2. Create an account (separate from PyPI — different databases)
+3. Go to [test.pypi.org/manage/account/token](https://test.pypi.org/manage/account/token/)
+4. Create an API token (scope: entire account for first upload, then project-scoped after)
+5. Add the token as `TEST_PYPI_TOKEN` secret in the GitHub `staging` environment
+
+> **Note**: TestPyPI and PyPI are completely separate systems with separate accounts, tokens, and namespaces. An account on one does not grant access to the other.
+
+#### PyPI (Production)
+
+1. Go to [pypi.org/account/register](https://pypi.org/account/register/) or log in
+2. Go to [pypi.org/manage/account/token](https://pypi.org/manage/account/token/)
+3. Create an API token scoped to the `agentops-toolkit` project
+4. Add the token as `PYPI_TOKEN` secret in the GitHub `release` environment
+
+### 10.3 First-Time Package Registration
+
+The first time you publish to TestPyPI or PyPI, the project name (`agentops-toolkit`) is registered automatically. After the first upload:
+
+- Scope your API tokens to the specific project for better security
+- Add collaborators/maintainers on the PyPI/TestPyPI project page if needed
+
+---
+
+## 11. Workflow File Reference
+
+All workflow files are in `.github/workflows/`:
+
+### `ci.yml` — Continuous Integration
+
+```
+Trigger: push to develop, PR to develop
+Flow:    lint → test (matrix) → coverage
+         + on develop push: publish-dev → verify-dev (TestPyPI)
+Purpose: Quality gate for all code changes; auto-publish dev builds
+```
+
+Key detail: `publish-dev` and `verify-dev` only run on pushes to `develop` (not PRs). Every merge to develop produces a dev version on TestPyPI (e.g. `0.1.3.dev12`) via setuptools-scm. PRs to `main` are not covered by CI because they come from `release/*` branches which are already validated by the staging pipeline.
+
+### `_build.yml` — Reusable Build
+
+```
+Trigger: workflow_call (called by staging.yml and release.yml)
+Flow:    checkout (full history) → uv sync → pytest → uv build → upload artifact
+Purpose: Single source of truth for the build process
+```
+
+Key detail: Uses `fetch-depth: 0` to ensure setuptools-scm has full git history for version derivation.
+
+### `staging.yml` — Staging Pipeline
+
+```
+Trigger: push to release/* branches, or workflow_dispatch
+Flow:    _build → publish-testpypi → verify-testpypi
+Purpose: Validate release candidates before production
+```
+
+Key details:
+- `skip-existing: true` allows re-pushes without upload failures
+- Verify step uses a retry loop (5 attempts, 30s apart) for TestPyPI index propagation
+- Smoke tests cover `--version`, `--help`, and `agentops init`
+
+### `release.yml` — Production Release
+
+```
+Trigger: push v* tags, or workflow_dispatch
+Flow:    _build → publish-testpypi → verify-testpypi → publish-pypi (approval) → github-release
+Purpose: Publish to PyPI and create GitHub Release
+```
+
+Key details:
+- `publish-pypi` uses `environment: release` which requires reviewer approval
+- `github-release` uses `gh release create` with `--generate-notes` for automatic release notes
+- Built artifacts (.whl, .tar.gz) are attached to the GitHub Release
+
+### `cut-release.yml` — Cut Release (Manual Dispatch)
+
+```
+Trigger: workflow_dispatch (manual button in Actions tab)
+Input:   version — semver string (e.g. 0.2.0)
+Flow:    validate → create release branch → update CHANGELOG → push → open PR
+Purpose: One-click release branch creation from develop
+```
+
+Key details:
+- Creates `release/v<version>` branch from `develop`
+- Automatically updates `CHANGELOG.md` — renames `[Unreleased]` to `[<version>] - <date>` and adds a fresh `[Unreleased]` section
+- Opens a PR from `release/v<version>` → `main` with a checklist
+- The branch push triggers `staging.yml` automatically
+- Fails safely if the branch already exists or CHANGELOG is missing `[Unreleased]`
+- Does NOT auto-tag or auto-publish — tagging remains a manual, intentional step
+
+---
+
+## 12. Release Checklist
+
+Use this checklist when cutting a release:
+
+**Preparation**
+- [ ] All intended features/fixes are merged to `develop`
+- [ ] `CHANGELOG.md` has entries under `[Unreleased]` for all user-visible changes
+- [ ] Tests pass locally: `uv run pytest tests/ -x -q`
+- [ ] Version from setuptools-scm looks correct: `python -m setuptools_scm`
+
+**Staging**
+- [ ] Release branch created via **Cut Release** workflow (or manually)
+- [ ] CHANGELOG automatically updated with version and date
+- [ ] Staging pipeline passes: build + TestPyPI + verify (all 3 green)
+- [ ] PR opened: `release/v0.X.Y` → `main`
+
+**Production**
+- [ ] PR from `release/v0.X.Y` → `main` created and approved
+- [ ] PR merged to `main`
+- [ ] Version tag created and pushed: `v0.X.Y`
+- [ ] Release pipeline runs: build + TestPyPI + verify pass
+- [ ] PyPI publish approved in GitHub Actions
+- [ ] GitHub Release created with artifacts
+- [ ] Published package verified: `pip install agentops-toolkit==0.X.Y`
+
+**Cleanup**
+- [ ] `main` merged back to `develop`
+- [ ] Release branch deleted (remote and local)
+- [ ] `[Unreleased]` section in CHANGELOG ready for new entries
+
+---
+
+## 13. Troubleshooting
+
+### Build Failures
+
+| Problem                                  | Cause                               | Solution                                      |
+| ---------------------------------------- | ----------------------------------- | --------------------------------------------- |
+| `setuptools_scm` can't determine version | Shallow clone (missing git history) | Ensure `fetch-depth: 0` in checkout step      |
+| Version shows `0.0.0` locally            | Not in a git repo or no tags exist  | Run `git tag v0.0.1` to create an initial tag |
+| `ModuleNotFoundError` in tests           | Dependencies not installed          | Run `uv sync --group dev`                     |
+| Tests fail on Windows but pass on Linux  | Path separator issues               | Use `pathlib.Path`, not string concatenation  |
+
+### TestPyPI Issues
+
+| Problem                                       | Cause                            | Solution                                                                                                                |
+| --------------------------------------------- | -------------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
+| Upload fails with 403                         | Invalid or expired token         | Regenerate `TEST_PYPI_TOKEN` and update the GitHub secret                                                               |
+| Upload fails with "already exists"            | Same version previously uploaded | Normal — `skip-existing: true` handles this. If you need a new upload, push another commit to increment the dev version |
+| Install fails with "no matching distribution" | Package not yet indexed          | The verify job retries automatically (5 attempts, 30s apart). If persistent, check TestPyPI status                      |
+| Install fails with dependency errors          | Dependency not on TestPyPI       | Verify `--extra-index-url https://pypi.org/simple/` is present                                                          |
+
+### PyPI Issues
+
+| Problem                                    | Cause                                     | Solution                                                       |
+| ------------------------------------------ | ----------------------------------------- | -------------------------------------------------------------- |
+| Publish step stuck on "Waiting for review" | Normal — requires approval                | A designated reviewer must approve in the Actions UI           |
+| Upload fails with 403                      | Invalid `PYPI_TOKEN`                      | Regenerate the token on pypi.org and update the GitHub secret  |
+| Version already exists on PyPI             | Tag points to an already-released version | PyPI versions are immutable. You must use a new version number |
+
+### Git and Version Issues
+
+| Problem                                     | Cause                          | Solution                                                                                         |
+| ------------------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------ |
+| Wrong version in built package              | Tag not on the expected commit | Verify with `git log --oneline --decorate` that the tag is where you expect                      |
+| `pip install -e .` fails                    | `.git` directory missing       | Editable installs need git history for setuptools-scm. Clone the repo, don't just download a zip |
+| Merge conflicts between release and develop | Normal for concurrent work     | Resolve conflicts on the release branch before merging to main                                   |
+
+### Environment and Permissions
+
+| Problem                           | Cause                               | Solution                                                               |
+| --------------------------------- | ----------------------------------- | ---------------------------------------------------------------------- |
+| "Environment not found" error     | GitHub Environment not created      | Create `staging` and `release` environments in Settings → Environments |
+| "Secret not found" error          | Secret not added to the environment | Add secrets to the specific environment, not repository-level secrets  |
+| Reviewer can't approve deployment | Not listed as required reviewer     | Update the environment's required reviewers list                       |
+
+---
+
+## Architecture Diagram
+
+```
+  Feature Development              Staging                    Production Release
+  ─────────────────              ───────                    ──────────────────
+
+  feature/* ──PR──→ develop
+                      │
+                      ├──→ CI (ci.yml)
+                      │    lint + test + coverage
+                      │    + publish-dev → TestPyPI (dev version)
+                      │
+                      └──→ Cut Release (cut-release.yml)
+                           manual dispatch → enter version
+                           │
+                           └──→ release/v0.2.0
+                                │
+                                ├──→ Staging (staging.yml)
+                                │
+                                │    ┌──────────┐
+                                │    │  _build   │
+                                │    │ test+build│
+                                │    └────┬─────┘
+                                │         │
+                                │    ┌────▼────────┐
+                                │    │  TestPyPI    │
+                                │    │  publish     │
+                                │    └────┬────────┘
+                                │         │
+                                │    ┌────▼────────┐
+                                │    │  Verify      │
+                                │    │  install     │
+                                │    └─────────────┘
+                                │
+                                └──PR──→ main ──tag──→ v0.2.0
+                                                          │
+                                                          ├──→ Release (release.yml)
+                                                          │
+                                                          │    ┌──────────┐
+                                                          │    │  _build   │
+                                                          │    └────┬─────┘
+                                                          │         │
+                                                          │    ┌────▼────────┐
+                                                          │    │  TestPyPI    │
+                                                          │    └────┬────────┘
+                                                          │         │
+                                                          │    ┌────▼────────┐
+                                                          │    │  Verify      │
+                                                          │    └────┬────────┘
+                                                          │         │
+                                                          │    ┌────▼────────┐
+                                                          │    │  PyPI       │
+                                                          │    │  (approval) │
+                                                          │    └────┬────────┘
+                                                          │         │
+                                                          │    ┌────▼────────┐
+                                                          │    │  GitHub     │
+                                                          │    │  Release    │
+                                                          │    └────────────┘
+                                                          │
+                                                    main ──merge──→ develop
+```
diff --git a/docs/tutorial-baseline-comparison.md b/docs/tutorial-baseline-comparison.md
new file mode 100644
index 0000000..9c74deb
--- /dev/null
+++ b/docs/tutorial-baseline-comparison.md
@@ -0,0 +1,252 @@
+# Tutorial: Baseline Comparison
+
+This tutorial walks through comparing evaluation runs to catch regressions before they reach production. It covers the mechanics of the compare command, but also explores how comparisons behave differently depending on whether you are evaluating a model deployment directly or an agent — and when each approach makes sense.
+
+## Why compare runs?
+
+Every time you change something — a model deployment, an agent's instructions, a retrieval pipeline, or even the evaluation dataset itself — you risk degrading quality without realizing it. A single evaluation run tells you where you stand *now*. Comparing two runs tells you *what changed* and *whether it got worse*.
+
+This matters most in two situations:
+- **Before merging a PR**: did the change improve the agent, or break it?
+- **After deploying a new model version**: did quality hold, or did it regress?
+
+Without comparison, you're looking at absolute scores and hoping you remember what they were last time. With comparison, you get a structured diff that tells you exactly which metrics moved, which thresholds flipped, and which specific rows started failing.
+
+## Prerequisites
+
+- Python 3.11+
+- `pip install agentops-toolkit`
+- A Foundry project with at least one model deployment (for model-direct) or a deployed agent (for agent evaluation)
+- `az login` or equivalent Azure credentials
+- Two completed evaluation runs, or the willingness to run two evaluations now
+
+## Part 1: Choosing your evaluation target
+
+Before you compare, you need to decide what you're evaluating. AgentOps supports two targets, and they produce meaningfully different results.
+
+### Model-direct (`target: model`)
+
+Sends your dataset prompts straight to a model deployment and evaluates the raw completions. There is no agent layer — no system instructions, no tools, no retrieval. The model sees each prompt in isolation and responds.
+
+This is useful when you want to:
+- Benchmark a model deployment before building an agent on top of it
+- Detect model-level regressions when Azure deploys a new model version
+- Measure raw language capabilities (similarity, coherence, fluency) without agent complexity
+- Establish a quality floor that your agent should at least match
+
+In practice, model-direct evaluations tend to produce **higher similarity scores** because the model responds concisely and closely to the expected answer. There is no agent personality reshaping the response.
+
+Run configuration:
+```yaml
+backend:
+  type: foundry
+  target: model
+  model: gpt-5.1
+```
+
+### Agent (`target: agent`)
+
+Routes each prompt through a deployed Foundry agent. The agent applies its system instructions, may call tools, may consult a knowledge base, and produces a response shaped by its configuration.
+
+This is useful when you want to:
+- Evaluate the full end-to-end behavior your users actually experience
+- Test whether agent instructions and tool configurations work correctly together
+- Catch regressions caused by changes to agent settings, not just the underlying model
+- Measure real latency including agent orchestration overhead
+
+Agent evaluations typically produce **lower similarity scores** than model-direct, even on the same questions. This is expected — the agent adds context, rephrases answers in its own style, and may include extra information from tools. A SimilarityEvaluator score of 5.0 on model-direct might become 3.4 on an agent for the same prompt. That does not necessarily mean the agent is worse; it means the agent is doing its job differently.
+
+Run configuration:
+```yaml
+backend:
+  type: foundry
+  target: agent
+  agent_id: my-agent:1
+  model: gpt-5.1
+```
+
+### When to compare model-direct vs agent
+
+Comparing a model-direct run against an agent run is valid and sometimes valuable. It answers the question: *how much does the agent layer change the output quality?*
+
+Expect to see:
+- **Similarity drops** — the agent rephrases, which lowers textual similarity even when answers are correct
+- **Latency increases** — agent orchestration adds overhead (thread creation, polling, tool calls)
+- **Threshold flips** — thresholds set for model-direct may be too strict for agent responses
+
+If you see a large similarity drop (say, from 5.0 to 1.0), that is worth investigating — the agent may be hallucinating, ignoring the question, or hitting an error in its tool chain. But a moderate drop (5.0 to 3.5) is usually the agent adding its own framing, which is fine.
+
+For ongoing regression detection, compare **like against like**: model-direct against model-direct, or agent against agent. Cross-target comparisons are more diagnostic than gating.
+
+## Part 2: Running two evaluations
+
+### Step 1: Run the baseline
+
+Pick your target and run:
+
+```bash
+# Model-direct baseline
+agentops eval run -c .agentops/run.yaml
+
+# Or agent baseline
+agentops eval run -c .agentops/run-agent.yaml
+```
+
+This creates a timestamped directory:
+```
+.agentops/results/2026-03-19_100000/
+├── results.json
+├── report.md
+└── backend_metrics.json
+```
+
+The run is also copied to `.agentops/results/latest/`.
+
+### Step 2: Make a change
+
+Now change something you want to evaluate:
+- Update the model deployment version
+- Modify the agent's system instructions
+- Add or remove a tool from the agent
+- Update the evaluation dataset with new test cases
+- Adjust a retrieval pipeline or knowledge base
+
+### Step 3: Run again
+
+```bash
+agentops eval run -c .agentops/run.yaml
+```
+
+You now have two runs under `.agentops/results/`.
+
+## Part 3: Comparing runs
+
+The compare command takes two run identifiers separated by a comma. The first is the baseline, the second is the current run.
+
+```bash
+# By timestamped folder name
+agentops eval compare --runs 2026-03-19_100000,2026-03-19_140000
+
+# Using 'latest' for the current run
+agentops eval compare --runs 2026-03-19_100000,latest
+
+# Write output to a specific directory
+agentops eval compare --runs 2026-03-19_100000,latest -o .agentops/results/my-comparison
+```
+
+Run identifiers can be:
+- **Timestamped folder names** like `2026-03-19_100000` — resolved under `.agentops/results/`
+- **`latest`** — points to the most recent run
+- **Paths** — relative or absolute path to a `results.json` file or a directory containing one
+
+The command produces two files in the current run's output directory (or the `-o` directory):
+- `comparison.json` — structured data for automation
+- `comparison.md` — readable report for humans and PR reviews
+
+### Exit codes
+
+| Code | Meaning |
+|---|---|
+| `0` | No regressions detected — safe to proceed |
+| `2` | Regressions detected — investigate before merging |
+| `1` | Error — bad run ID, missing file, or other problem |
+
+These are the same exit codes used by `agentops eval run`, so CI pipelines handle them consistently.
+
+## Part 4: Reading the comparison report
+
+### How metric direction works
+
+AgentOps figures out whether "up" or "down" is good for each metric by looking at the threshold criteria in your results:
+
+- Metrics with `>=` or `>` thresholds are **higher-is-better** (e.g., SimilarityEvaluator). A decrease is flagged as a regression.
+- Metrics with `<=` or `<` thresholds are **lower-is-better** (e.g., avg_latency_seconds). An increase is flagged as a regression.
+
+This means if your latency drops from 6s to 4s, the comparison correctly reports it as an **improvement**, not a regression.
+
+### The summary section
+
+The summary gives you the quick picture:
+
+```
+Metrics improved: 1
+Metrics regressed: 1
+Thresholds flipped pass→fail: 1
+Items newly failing: 3
+```
+
+If `has_regressions` is true (and exit code is 2), at least one of these is nonzero: metrics regressed, thresholds flipped to fail, or items started failing.
+
+### Metric deltas table
+
+Shows every metric that exists in both runs, with the delta and direction:
+
+```
+| SimilarityEvaluator | 5.00 | 1.80 | -3.20 | -64% | regressed |
+| avg_latency_seconds | 5.69 | 4.59 | -1.10 | -19% | improved  |
+```
+
+### Threshold changes table
+
+Only shows thresholds that **flipped** between runs. A stable threshold (pass→pass or fail→fail) is omitted for clarity.
+
+### Item changes table
+
+Only shows rows that changed pass/fail status. If row 3 was passing in both runs, it is not listed.
+
+## Part 5: Using comparison in CI
+
+A typical GitHub Actions pattern:
+
+```yaml
+- name: Run evaluation
+  run: agentops eval run -o .agentops/results/current
+
+- name: Compare with baseline
+  run: agentops eval compare --runs baseline,current
+  # Exit code 2 fails the job if regressions are detected
+```
+
+### Choosing a baseline strategy
+
+There is no single right way to manage baselines. Pick the one that fits your workflow:
+
+**Committed baseline** — check a `results.json` into your repo under a stable name (e.g., `.agentops/results/baseline/`). Every PR compares against it. Update the baseline when you intentionally accept a quality change. This is simple and predictable, but requires manual baseline updates.
+
+**Artifact-based baseline** — download the baseline `results.json` from a previous CI run's artifacts. Each merge to `main` uploads the current results as the new baseline. This automates baseline drift but depends on your CI artifact retention.
+
+**Rolling latest** — always compare against the previous run. This catches run-over-run regressions but can miss gradual degradation over many runs.
+
+For most teams, the committed baseline approach works well. It acts as a quality contract: merge only if you match or exceed the baseline.
+
+## Part 6: Investigating regressions
+
+When the comparison says regressions were detected, work through these steps:
+
+1. **Read `comparison.md`** — start with the summary. How many metrics regressed? How many thresholds flipped? How many items are newly failing?
+
+2. **Check concentration** — if 1 out of 50 items regressed, that might be a dataset edge case. If 40 out of 50 regressed, something fundamental changed.
+
+3. **Identify the variable** — what changed between the two runs? Only one thing should change at a time. If you changed the model *and* the dataset *and* the agent instructions simultaneously, you cannot attribute the regression to any single cause.
+
+4. **Look at the actual responses** — read `backend.stdout.log` in the run output directory. It shows the expected and predicted text for each row. Often the root cause is obvious when you see the actual model/agent output.
+
+5. **Rerun with the previous configuration** — if you suspect the model deployment changed, rerun the baseline dataset against the current deployment. If scores still drop, the model is the cause. If scores hold, something else changed.
+
+### Typical regression patterns
+
+**Across-the-board similarity drop** — usually means the model deployment was updated or the agent's system instructions changed in a way that alters response style. Check whether the answers are still *correct* even if they are less *similar* to the expected text.
+
+**A few rows regressed, most are fine** — likely dataset-specific. Check whether the failing rows have unusual inputs, edge cases, or ambiguous expected answers.
+
+**Latency increased but quality held** — infrastructure issue, throttling, or the agent is now calling more tools. Check whether new tool calls were added to the agent configuration.
+
+**Threshold was borderline and flipped** — the metric is near the threshold value and normal variance pushed it over. Consider whether the threshold is set too tightly, or whether the metric genuinely degraded.
+
+## Next steps
+
+- [Model-Direct Evaluation Tutorial](tutorial-model-direct.md) — evaluate a model deployment without agents
+- [RAG Evaluation Tutorial](tutorial-rag.md) — evaluate retrieval-augmented responses
+- [Foundry Agent Evaluation Tutorial](tutorial-basic-foundry-agent.md) — evaluate an agent end-to-end
+- [CI/CD Integration Guide](ci-github-actions.md) — set up automated evaluation in pipelines
+- [CI/CD Integration Guide](ci-github-actions.md)
diff --git a/docs/tutorial-basic-foundry-agent.md b/docs/tutorial-basic-foundry-agent.md
index 2fe266d..b2dbc6e 100644
--- a/docs/tutorial-basic-foundry-agent.md
+++ b/docs/tutorial-basic-foundry-agent.md
@@ -1,53 +1,82 @@
-# Tutorial (Basic): Foundry Agent + Similarity Evaluation
+# Tutorial: Foundry Agent Evaluation
 
-Goal: create a **Foundry** QA agent and run a minimal AgentOps evaluation using **SimilarityEvaluator** end-to-end.
+This tutorial evaluates a deployed Foundry agent end-to-end — system instructions, tools, retrieval, and all. You send prompts through the agent the same way a real user would, then measure the quality of the responses.
 
-> **New to AgentOps?** This tutorial uses the **agent** target. If you want to
-> evaluate a model deployment directly (no agent), see the
-> [Model-Direct Tutorial](tutorial-model-direct.md). For RAG evaluation,
-> see the [RAG Tutorial](tutorial-rag.md).
+Agent evaluation is different from model-direct evaluation in important ways. When you evaluate a model directly, you get the raw model output — concise, predictable, and closely matching expected answers. When you evaluate an agent, you get the *agent's* output, which is shaped by its instructions, may include information gathered from tools, and is phrased in the agent's style. This means agent similarity scores are typically lower than model-direct scores, even when the agent is working correctly.
+
+That difference is not a flaw — it is the point. Agent evaluation tells you whether the complete system behaves the way your users will experience it, not just whether the underlying model knows the right answer.
+
+## When to use agent evaluation
+
+Use agent evaluation when you want to:
+
+- **Test the end-to-end experience** your users will actually see, including agent instructions, tool calls, and knowledge base lookups
+- **Catch regressions caused by agent configuration changes** — modified system instructions, added/removed tools, updated knowledge sources
+- **Measure real latency** including the agent orchestration overhead (thread creation, tool execution, polling)
+- **Validate that tools work correctly** — if an agent has a calculator tool, does it actually call it and return the right answer?
+
+### How agent scores differ from model-direct
+
+In our testing, the same QA dataset scored **5.0** on model-direct (perfect similarity) and **3.4** on an agent. The agent was answering correctly, but it was rephrasing answers in its own style, adding context, and sometimes including extra details from its system instructions.
+
+A SimilarityEvaluator score of 3.4 on an agent is not a failure — it means the agent is producing responses that capture the core meaning but differ from the exact expected text. Set your thresholds accordingly. A threshold of ≥ 3 is usually appropriate for agents, while model-direct can sustain ≥ 4 or even ≥ 5 on clean datasets.
+
+If you see agent scores drop to 1.0 on questions that the model-direct handles at 5.0, that is worth investigating. It usually means the agent's instructions are conflicting with the question, a tool call failed, or the agent is hallucinating.
+
+### Agent vs model-direct: quick decision guide
+
+| Question | Model-Direct | Agent |
+|---|---|---|
+| What does the raw model do with this prompt? | ✅ | |
+| Is the agent responding correctly to users? | | ✅ |
+| Did a model version change affect quality? | ✅ | ✅ |
+| Did agent instruction changes affect quality? | | ✅ |
+| What is the real latency users experience? | | ✅ |
+| Can I get a fast baseline with no agent setup? | ✅ | |
 
 ## Prerequisites
 
 - Python 3.11+
-- Azure CLI
-- Access to Azure AI Foundry
+- Azure CLI (`az login`)
+- A Foundry project with a deployed agent
+- A model deployment in the same project (used as the judge model for SimilarityEvaluator)
+- `pip install agentops-toolkit`
 
 ## Part 1: Create the agent in Foundry
 
-### 1) Create or open a Foundry project
-
-1. Open `https://ai.azure.com`.
-2. Create a new Foundry project (or open an existing one).
+If you already have an agent, skip to Part 2.
 
-### 2) Create an agent
+### 1) Open the Foundry portal
 
-1. In the project, go to **Build > Agents**.
-2. Click **New agent**.
+Go to `https://ai.azure.com` and open your Foundry project.
 
-### 3) Add agent instructions
+### 2) Create a new agent
 
-Paste the following instructions into the agent configuration:
+Navigate to **Build > Agents** and create a new agent. For this tutorial, a simple QA agent works well:
 
+**System instructions:**
 ```text
 You are a factual question-answering assistant.
 
-Mandatory rules:
+Rules:
 1. Answer short factual questions clearly and directly.
-2. Keep answers concise (one short sentence when possible).
-3. Do not invent facts. If uncertain, say you are not sure.
-4. Do not include markdown lists or extra formatting.
-5. Prefer canonical names and objective wording.
+2. Keep answers concise — one or two sentences when possible.
+3. Do not invent facts. If uncertain, say so.
+4. Do not use markdown formatting in responses.
 ```
 
-### 4) Save and collect values
+Choose a model deployment (e.g., `gpt-5.1`) and save the agent.
+
+### 3) Note the agent identifier
 
-After saving the agent, copy these values from the Foundry project/agent details:
+After saving, you need the agent's identifier for the run config. There are two types:
 
-- **Project endpoint**: `https://<resource>.services.ai.azure.com/api/projects/<project>`
-- **Agent ID**: use the exact value shown in your Foundry agent details.
+- **Named agents** (new Foundry experience): use the agent name, optionally with a version — e.g., `my-agent` or `my-agent:3`
+- **Legacy agents** (asst_ prefix): use the full ID — e.g., `asst_ftDQySPlKUwcgR1eiXEzUEO5`
 
-## Part 2: Set up AgentOps locally
+AgentOps handles both. Named agents use the Foundry Responses API; legacy agents use the Threads API.
+
+## Part 2: Set up AgentOps
 
 ### 1) Azure login
 
@@ -55,82 +84,39 @@ After saving the agent, copy these values from the Foundry project/agent details
 az login
 ```
 
-### 2) Configure the project endpoint
+### 2) Set the project endpoint
 
 PowerShell:
-
 ```powershell
 $env:AZURE_AI_FOUNDRY_PROJECT_ENDPOINT = "https://<resource>.services.ai.azure.com/api/projects/<project>"
 ```
 
 Bash/zsh:
-
 ```bash
 export AZURE_AI_FOUNDRY_PROJECT_ENDPOINT="https://<resource>.services.ai.azure.com/api/projects/<project>"
 ```
 
-Authentication is passwordless via `DefaultAzureCredential` (local `az login`, or Managed Identity in Azure). Do not use API keys.
-
-> **Minimal setup:** For cloud evaluation, the only required environment variable is
-> `AZURE_AI_FOUNDRY_PROJECT_ENDPOINT`. AgentOps automatically defaults the OpenAI
-> API version to `2025-05-01`. For AI-assisted evaluators, explicitly configure a
-> model deployment that exists in your project via `backend.model` or
-> `AZURE_AI_MODEL_DEPLOYMENT_NAME`. No additional OpenAI env vars are needed
-> unless you want to override the defaults.
-
-#### Optional overrides
-
-| Variable | Purpose | Default |
-|---|---|---|
-| `AZURE_AI_MODEL_DEPLOYMENT_NAME` | Set the judge model used by AI-assisted evaluators when `backend.model` is not provided | No project-universal default deployment |
-
-### 3) Initialize AgentOps
+### 3) Initialize the workspace
 
 ```bash
 agentops init
 ```
 
-This creates the `.agentops/` workspace with the following structure:
-
-```
-.agentops/
-├── config.yaml                              # workspace defaults
-├── run.yaml                                 # default model-direct run
-├── run-rag.yaml                             # example run for RAG scenario
-├── run-agent.yaml                           # example run for agent scenario
-├── .gitignore
-├── bundles/
-│   ├── model_direct_baseline.yaml           # Model-Only: SimilarityEvaluator >= 3
-│   ├── rag_retrieval_baseline.yaml          # RAG: GroundednessEvaluator >= 3
-│   └── agent_tools_baseline.yaml            # Agent with Tools (placeholder)
-├── datasets/
-│   ├── smoke-model-direct.yaml              # simple QA definition for model-direct
-│   ├── smoke-rag.yaml                       # QA + context definition for RAG
-│   └── smoke-agent-tools.yaml               # placeholder definition for tools
-├── data/
-│   ├── smoke-model-direct.jsonl             # sample data (5 rows)
-│   ├── smoke-rag.jsonl                      # sample data with context field
-│   └── smoke-agent-tools.jsonl              # sample tool-calling data
-└── results/
-```
-
-If the workspace already exists, existing files are **not** overwritten (use `agentops init --force` to reset).
-
-### 4) Update `.agentops/run-agent.yaml`
+## Part 3: Configure the agent run
 
-For this tutorial, use the `smoke-model-direct.yaml` dataset spec with the agent target. Update `run-agent.yaml` to:
+Open `.agentops/run-agent.yaml` and fill in your agent details:
 
 ```yaml
 version: 1
 bundle:
-  path: bundles/model_direct_baseline.yaml
+  path: bundles/agent_tools_baseline.yaml
 dataset:
-  path: datasets/smoke-model-direct.yaml
+  path: datasets/smoke-agent-tools.yaml
 backend:
   type: foundry
   target: agent
-  agent_id: <your-agent-id>
-  model: <replace-with-your-foundry-model-deployment-name>
+  agent_id: my-agent:1                # ← your agent name or asst_ ID
+  model: gpt-5.1                      # ← used as judge model for evaluators
   project_endpoint_env: AZURE_AI_FOUNDRY_PROJECT_ENDPOINT
   api_version: "2025-05-01"
   poll_interval_seconds: 2
@@ -140,51 +126,112 @@ output:
   write_report: true
 ```
 
-### 5) Verify the sample dataset
+Key differences from model-direct:
+- `target: agent` — routes prompts through the agent instead of calling the model directly
+- `agent_id` — identifies which agent to invoke. Required for agent target.
+- `model` — still needed as the judge model for AI-assisted evaluators like SimilarityEvaluator. This is the model that *evaluates* the agent's responses, not the model the agent uses internally.
 
-`agentops init` already created `.agentops/data/smoke-model-direct.jsonl` with sample data:
+### Why both `agent_id` and `model`?
+
+The `agent_id` determines *what* you are evaluating (the agent). The `model` determines *how* you evaluate it (the judge model that runs SimilarityEvaluator). They can be different deployments. In practice, most teams use the same deployment for both, but you could use a cheaper model as the judge if cost is a concern.
+
+## Part 4: Review the dataset
+
+The sample dataset at `.agentops/data/smoke-agent-tools.jsonl` contains five prompts designed for an agent with tool capabilities:
 
 ```jsonl
-{"id":"1","input":"What is the capital of France?","expected":"Paris is the capital of France."}
-{"id":"2","input":"Which planet is known as the Red Planet?","expected":"Mars is known as the Red Planet."}
-{"id":"3","input":"What is the chemical symbol for water?","expected":"The chemical symbol for water is H2O."}
-{"id":"4","input":"Who wrote Romeo and Juliet?","expected":"William Shakespeare wrote Romeo and Juliet."}
-{"id":"5","input":"What is the largest ocean on Earth?","expected":"The Pacific Ocean is the largest ocean on Earth."}
+{"id":"1","input":"What is the weather in Seattle today?","expected":"I'll check the weather for Seattle..."}
+{"id":"2","input":"Convert 100 USD to EUR","expected":"100 USD is approximately 92 EUR..."}
 ```
 
-This tutorial uses `model_direct_baseline`, which applies:
-- `SimilarityEvaluator >= 3` (ordinal scale 1–5)
+These prompts include questions that might trigger tool calls (weather, currency conversion, search). If your agent does not have these tools, it will answer based on its knowledge, which may score lower on similarity. That is expected — the evaluation measures what the agent *actually does*, not what it could do with the right tools.
+
+### Adapting the dataset to your agent
 
-## Part 3: Run evaluation
+For meaningful evaluation, your dataset should match what your agent is designed to do. If your agent is a customer support bot, test it with customer support questions. If it is a code assistant, test it with coding tasks. The smoke dataset is just a starting point.
 
-### 1) Run
+## Part 5: Run the evaluation
 
 ```bash
-agentops eval run --config .agentops/run-agent.yaml
+agentops eval run -c .agentops/run-agent.yaml
 ```
 
-### 2) Check results
+AgentOps will:
+1. Send each prompt to the agent via the Foundry API
+2. Wait for the agent to process the request (including any tool calls)
+3. Collect the agent's response
+4. Run SimilarityEvaluator comparing the response to the expected answer
+5. Measure latency per row
+6. Write results under `.agentops/results/latest/`
+
+### What to expect
+
+Agent evaluations take longer than model-direct because each prompt involves:
+- Thread or session creation
+- Message delivery
+- Agent processing (may include tool calls)
+- Response collection
+
+A 5-row agent evaluation typically takes 30–60 seconds in local mode, compared to 10–20 seconds for model-direct.
 
-- `.agentops/results/latest/results.json`
-- `.agentops/results/latest/report.md`
+### Reading the results
+
+Open `.agentops/results/latest/report.md`. For an agent with the simple QA instructions above, expect:
+
+- **SimilarityEvaluator** around 3–4 (the agent captures meaning but rephrases)
+- **avg_latency_seconds** around 5–15s per row (agent orchestration overhead)
+- Some rows may fail the ≥ 3 threshold if the agent's response diverges significantly
+
+If most rows score 4–5, your agent is working well. If most score 1–2, check the agent's instructions, verify it has access to the right tools, and look at the actual responses in `backend.stdout.log`.
+
+## Part 6: Compare with a baseline
+
+After you change the agent's instructions, add tools, or update the model deployment, run again and compare:
+
+```bash
+agentops eval run -c .agentops/run-agent.yaml
+agentops eval compare --runs <previous-timestamp>,latest
+```
+
+The comparison shows metric deltas, threshold flips, and per-row changes. See the [Baseline Comparison Tutorial](tutorial-baseline-comparison.md) for the full workflow.
+
+### Comparing agent vs model-direct
+
+You can also compare your agent run against a model-direct run on the same dataset:
+
+```bash
+agentops eval compare --runs model-direct-run,agent-run
+```
+
+This tells you how much the agent layer changes the output quality. Expect:
+- **Similarity drops** — the agent rephrases, which is normal
+- **Latency increases** — agent orchestration adds overhead
+- **Possible threshold flips** — thresholds set for model-direct may be too strict for agent responses
+
+This comparison is useful for diagnostics but should not be used as a CI gate. Gate model-direct runs against model-direct baselines, and agent runs against agent baselines.
 
 ## Evaluation scenarios
 
-AgentOps supports three evaluation scenarios:
+AgentOps supports three scenarios, each with a different bundle:
 
-| Scenario | Bundle | Target | Description |
-|---|---|---|---|
-| **Model-Only** | `model_direct_baseline.yaml` | `model` | Direct model calls, SimilarityEvaluator |
-| **RAG** | `rag_retrieval_baseline.yaml` | `agent` | Agent with retrieval, GroundednessEvaluator |
-| **Agent with Tools** | `agent_tools_baseline.yaml` | `agent` | Placeholder for tool-calling agents |
+| Scenario | Bundle | Target | Evaluator | Use case |
+|---|---|---|---|---|
+| **Model-Only** | `model_direct_baseline` | `model` | SimilarityEvaluator | Benchmark raw model quality |
+| **RAG** | `rag_retrieval_baseline` | `agent` | GroundednessEvaluator | Evaluate grounding against context |
+| **Agent with Tools** | `agent_tools_baseline` | `agent` | SimilarityEvaluator | Evaluate full agent behavior |
 
-- [Model-Direct Tutorial](tutorial-model-direct.md) — evaluate a model without an agent
-- [RAG Tutorial](tutorial-rag.md) — evaluate groundedness of RAG responses
+The RAG scenario uses GroundednessEvaluator instead of SimilarityEvaluator because the key question is whether the agent's response is grounded in the retrieved context, not whether it matches a specific expected answer.
 
 ## Notes
 
-- Authentication is automatic via `DefaultAzureCredential`.
-- For local development, `az login` is enough.
-- AgentOps defaults the OpenAI API version to `2025-05-01`.
-- For AI-assisted evaluators, set `backend.model` or `AZURE_AI_MODEL_DEPLOYMENT_NAME` to a deployment that exists in your Foundry project.
-- This tutorial intentionally keeps the flow minimal.
+- **Cloud vs local mode**: By default, AgentOps uses Foundry Cloud Evaluation with the `azure_ai_evaluator` API. Set `AGENTOPS_FOUNDRY_MODE=local` to invoke the agent row-by-row and run evaluators locally (requires `pip install azure-ai-evaluation`).
+- **Authentication**: `DefaultAzureCredential` handles auth automatically. For local dev, use `az login`. For CI, set `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_CLIENT_SECRET`.
+- **Named vs legacy agents**: Named agents (e.g., `my-agent:3`) use the Responses API. Legacy agents (`asst_*`) use the Threads API. Both work transparently.
+- **Exit codes**: `0` = all thresholds passed, `2` = threshold failures, `1` = error.
+
+## Next steps
+
+- [Model-Direct Tutorial](tutorial-model-direct.md) — evaluate a model without agents
+- [RAG Tutorial](tutorial-rag.md) — evaluate retrieval-augmented responses
+- [Baseline Comparison Tutorial](tutorial-baseline-comparison.md) — compare runs and detect regressions
+- [Copilot Skills Tutorial](tutorial-copilot-skills.md) — install skills for AI-assisted guidance
diff --git a/docs/tutorial-copilot-skills.md b/docs/tutorial-copilot-skills.md
new file mode 100644
index 0000000..ad5345c
--- /dev/null
+++ b/docs/tutorial-copilot-skills.md
@@ -0,0 +1,151 @@
+# Tutorial: Installing AgentOps Copilot Skills
+
+This tutorial explains how to install the AgentOps Copilot skills, what each skill does, and how to verify they are working correctly — including using AgentOps itself to evaluate skill quality.
+
+## Why install skills?
+
+When you ask GitHub Copilot a question about running evaluations or investigating a regression, it does its best with general knowledge. But Copilot does not know the specifics of AgentOps — what commands exist, what flags they accept, what outputs they produce, and which commands are still planned but not implemented.
+
+Skills close that gap. Each skill is a structured document that tells Copilot *exactly* how to help with a particular workflow. After installation, Copilot stops guessing and starts giving accurate, specific guidance grounded in the actual CLI behavior.
+
+The difference is noticeable. Without the skill, Copilot might suggest `agentops monitor dashboard` (which is planned but not implemented). With the skill, Copilot will tell you honestly that monitoring is planned, and pivot to what you *can* do today — inspect `results.json` and `report.md`.
+
+## The three AgentOps skills
+
+| Skill | Purpose | When it activates |
+|---|---|---|
+| `agentops-run-evals` | Walks through the full evaluation workflow from workspace setup to report interpretation. Covers `init`, `eval run`, `report`, and `eval compare`. | You ask about running evaluations, finding configs, or understanding results. |
+| `agentops-investigate-regression` | Guides regression investigation using the comparison command. Structures findings into observations vs hypotheses and ends with actionable next steps. | You mention score drops, threshold failures, comparing runs, or quality degradation. |
+| `agentops-observability-triage` | Provides honest status on what observability features exist today versus what is planned. Redirects to available artifact-based triage instead of pretending monitoring commands exist. | You ask about tracing, monitoring, dashboards, or alerts. |
+
+The skills are complementary. In a typical workflow, `run-evals` helps you get started, `investigate-regression` helps when something goes wrong, and `observability-triage` sets expectations about what is and is not available yet.
+
+## Prerequisites
+
+- VS Code with the GitHub Copilot Chat extension
+- The AgentOps CLI installed: `pip install agentops-toolkit`
+
+The skills reference CLI commands, so Copilot's guidance only works if the CLI is actually available in your environment.
+
+## Installation
+
+### Option 1: Install from GitHub (recommended)
+
+The skills are distributed from the `Azure/agentops` repository, following the same pattern used by other Azure Copilot skills (like the ones in `microsoft/azure-skills`).
+
+In VS Code:
+
+1. Open **Copilot Chat**.
+2. Use the skill install flow and point to this repository:
+   - **Source:** `Azure/agentops`
+   - **Skill path:** `.github/plugins/agentops/skills/`
+3. Select the skills you want to install.
+
+Once installed, the skills appear in `~/.agents/skills/` and a lock file (`~/.agents/.skill-lock.json`) tracks where they came from. Skills are available across all workspaces.
+
+### Option 2: Manual copy
+
+If you prefer to manage skills manually:
+
+**macOS / Linux:**
+```bash
+git clone https://github.com/Azure/agentops.git /tmp/agentops
+cp -r /tmp/agentops/.github/plugins/agentops/skills/* ~/.agents/skills/
+rm -rf /tmp/agentops
+```
+
+**Windows (PowerShell):**
+```powershell
+git clone https://github.com/Azure/agentops.git $env:TEMP\agentops
+Copy-Item -Recurse "$env:TEMP\agentops\.github\plugins\agentops\skills\*" "$env:USERPROFILE\.agents\skills\"
+Remove-Item -Recurse -Force "$env:TEMP\agentops"
+```
+
+### Option 3: Project-scoped installation
+
+If you want the skills available only within a specific repository (useful for teams with different tool versions), copy them into the project:
+
+```bash
+mkdir -p .github/plugins/agentops/skills
+cp -r <agentops-repo>/.github/plugins/agentops/skills/* .github/plugins/agentops/skills/
+```
+
+This way the skills travel with the repo and every contributor gets them automatically.
+
+## Verifying the installation
+
+Check that the skill directories exist:
+
+```bash
+ls ~/.agents/skills/
+# Expected: agentops-run-evals/  agentops-investigate-regression/  agentops-observability-triage/
+```
+
+Each directory should contain a `SKILL.md` file with YAML frontmatter (the `name` and `description` fields that Copilot uses for skill matching).
+
+## Using the skills
+
+You do not need to invoke skills explicitly. Copilot matches your question to the right skill based on trigger phrases in the skill description. Just ask naturally.
+
+### Example: starting an evaluation
+
+> "How do I start running evaluations with AgentOps?"
+
+With the `agentops-run-evals` skill installed, Copilot will respond with the correct sequence: `agentops init` to scaffold the workspace, then `agentops eval run` to execute, then point you to `.agentops/results/latest/` for the outputs. It will not suggest commands that do not exist.
+
+### Example: investigating a regression
+
+> "My evaluation scores dropped after I switched model deployments. What should I do?"
+
+With `agentops-investigate-regression`, Copilot will suggest running `agentops eval compare --runs <baseline>,latest`, then walk you through interpreting the comparison report — which thresholds flipped, which metrics of the model or agent degraded, and whether the issue is broad or concentrated in specific rows. It separates factual observations from hypotheses and ends with concrete next steps.
+
+### Example: asking about monitoring
+
+> "Can I set up monitoring alerts for my evaluation quality?"
+
+With `agentops-observability-triage`, Copilot will tell you directly that `agentops monitor setup`, `dashboard`, and `alert` commands are planned but not yet implemented. Instead of giving wrong instructions, it pivots to what works today: running `agentops eval run` and `agentops report` to generate artifacts, then inspecting `results.json` and `report.md` for triage.
+
+## Updating skills
+
+Pull the latest version from the repository and re-copy:
+
+```bash
+git clone https://github.com/Azure/agentops.git /tmp/agentops
+cp -r /tmp/agentops/.github/plugins/agentops/skills/* ~/.agents/skills/
+rm -rf /tmp/agentops
+```
+
+If you installed via the VS Code skill install flow, the lock file tracks version hashes and will prompt for updates when the source repo changes.
+
+## Evaluating skill quality with AgentOps
+
+This is an advanced use case, but a natural one: you can use AgentOps to evaluate the quality of its own Copilot skills.
+
+The idea is to create a dataset where each row contains a user question paired with the skill content as context, along with an expected answer that reflects correct guidance. Then SimilarityEvaluator measures whether the model (acting as Copilot) produces responses that align with those expectations.
+
+For example, one row might be:
+- **Input:** *"You are a Copilot assistant with this skill: [run-evals SKILL.md]. User asks: Is agentops eval compare available?"*
+- **Expected:** *"Yes, agentops eval compare --runs is available. You can compare two runs by providing run IDs separated by a comma."*
+
+Run it the same way as any other evaluation:
+
+```bash
+agentops eval run -c .agentops/run-skills.yaml
+```
+
+When we tested this against our three skills, the SimilarityEvaluator scored **4.2 out of 5** — the model consistently produced guidance aligned with what the skills intend.
+
+This approach is valuable when you are actively iterating on skill content. Before and after editing a skill, run the evaluation and compare:
+
+```bash
+agentops eval compare --runs skill-baseline,latest
+```
+
+If the score drops, the skill change may have introduced inaccurate or confusing guidance. This is the same regression-detection pattern used for agents and models, applied to the skills themselves.
+
+## Next steps
+
+- [Baseline Comparison Tutorial](tutorial-baseline-comparison.md) — compare runs and detect regressions
+- [Model-Direct Evaluation Tutorial](tutorial-model-direct.md) — evaluate a model deployment
+- [RAG Evaluation Tutorial](tutorial-rag.md) — evaluate retrieval-augmented responses
+- [CI/CD Integration Guide](ci-github-actions.md) — automate evaluation in pipelines
diff --git a/docs/tutorial-model-direct.md b/docs/tutorial-model-direct.md
index f74750e..d0792ef 100644
--- a/docs/tutorial-model-direct.md
+++ b/docs/tutorial-model-direct.md
@@ -1,12 +1,37 @@
-# Tutorial: Model-Direct Evaluation (No Agent)
+# Tutorial: Model-Direct Evaluation
 
-Goal: evaluate a **model deployment** directly using **SimilarityEvaluator** — no agent, no retrieval, no tools.
+This tutorial runs an evaluation against a model deployment directly — no agent, no retrieval, no tools. The model receives each prompt in isolation and responds. You evaluate those responses using SimilarityEvaluator, which compares the model's answer against an expected reference on an ordinal scale of 1 to 5.
+
+Model-direct evaluation is the simplest starting point. It tells you what the raw model can do before you add the complexity of an agent layer, and it serves as a quality floor for anything you build on top.
+
+## When model-direct makes sense
+
+Use this when you want to:
+
+- **Benchmark a model deployment** before building an agent. If the model itself cannot answer basic QA correctly, no amount of agent instructions will fix that.
+- **Detect model-level regressions** after Azure deploys a new model version or you switch deployments. Run the same dataset, compare results, and see if quality held.
+- **Compare model deployments** side by side. Run the same dataset against `gpt-4o` and `gpt-5.1`, then use `agentops eval compare` to see which scores higher.
+- **Establish a quality baseline** before investing in agent development. If model-direct scores 5.0 on your dataset and your agent scores 3.4, the gap tells you how much the agent layer is reshaping responses.
+
+Model-direct evaluations typically produce the **highest similarity scores** because the model responds concisely and directly. There is no agent personality rewriting the answer, no tool calls injecting extra context, and no system instructions shaping the tone. If your model-direct score is already low, the problem is either the dataset, the model, or the evaluator — not the agent.
+
+### What model-direct does *not* tell you
+
+Model-direct sends isolated prompts with no conversation history, no system instructions, and no memory of prior turns. It cannot evaluate:
+
+- Whether your agent handles multi-turn conversations correctly
+- Whether tool calls execute and return useful results
+- Whether retrieval augmentation improves groundedness
+- Whether the agent's personality and guardrails work as intended
+
+For those, you need agent evaluation. See the [Foundry Agent Tutorial](tutorial-basic-foundry-agent.md).
 
 ## Prerequisites
 
 - Python 3.11+
-- Azure CLI
-- Access to Azure AI Foundry with at least one deployed model in your project
+- Azure CLI (`az login`)
+- A Foundry project with at least one model deployment (e.g., `gpt-4o`, `gpt-5.1`)
+- `pip install agentops-toolkit`
 
 ## Part 1: Set up
 
@@ -16,55 +41,33 @@ Goal: evaluate a **model deployment** directly using **SimilarityEvaluator** —
 az login
 ```
 
-### 2) Configure the project endpoint
+AgentOps uses `DefaultAzureCredential` — no API keys, no manual token management. For local development, `az login` is all you need. In CI, use a service principal or managed identity.
 
-PowerShell:
+### 2) Set the project endpoint
 
+This is the only required environment variable. You can find it in the Foundry portal under your project settings.
+
+PowerShell:
 ```powershell
 $env:AZURE_AI_FOUNDRY_PROJECT_ENDPOINT = "https://<resource>.services.ai.azure.com/api/projects/<project>"
 ```
 
 Bash/zsh:
-
 ```bash
 export AZURE_AI_FOUNDRY_PROJECT_ENDPOINT="https://<resource>.services.ai.azure.com/api/projects/<project>"
 ```
 
-Authentication is passwordless via `DefaultAzureCredential`. No API keys needed.
-
-### 3) Initialize AgentOps
+### 3) Initialize the workspace
 
 ```bash
 agentops init
 ```
 
-This creates the `.agentops/` workspace:
-
-```
-.agentops/
-├── config.yaml
-├── run.yaml                                  # defaults to model-direct scenario
-├── run-rag.yaml                              # example run for RAG scenario
-├── run-agent.yaml                            # example run for agent scenario
-├── .gitignore
-├── bundles/
-│   ├── model_direct_baseline.yaml            # SimilarityEvaluator >= 3
-│   ├── rag_retrieval_baseline.yaml           # GroundednessEvaluator >= 3
-│   └── agent_tools_baseline.yaml             # placeholder (Agent with Tools)
-├── datasets/
-│   ├── smoke-model-direct.yaml               # model-direct dataset definition
-│   ├── smoke-rag.yaml                        # RAG dataset definition
-│   └── smoke-agent-tools.yaml                # tools dataset definition
-├── data/
-│   ├── smoke-model-direct.jsonl              # simple QA dataset for model-direct
-│   ├── smoke-rag.jsonl                       # QA + context for RAG
-│   └── smoke-agent-tools.jsonl               # placeholder dataset for tools
-└── results/
-```
+This creates `.agentops/` with starter configs, bundles, datasets, and sample data. The default `run.yaml` is already configured for model-direct evaluation.
 
 ## Part 2: Configure the run
 
-The default `run.yaml` is already set up for model-direct evaluation:
+Open `.agentops/run.yaml`. The only thing you need to change is the model deployment name:
 
 ```yaml
 version: 1
@@ -75,7 +78,7 @@ dataset:
 backend:
   type: foundry
   target: model
-  model: <replace-with-your-foundry-model-deployment-name>
+  model: gpt-5.1    # ← replace with your actual deployment name
   project_endpoint_env: AZURE_AI_FOUNDRY_PROJECT_ENDPOINT
   api_version: "2025-05-01"
   poll_interval_seconds: 2
@@ -85,65 +88,91 @@ output:
   write_report: true
 ```
 
-Key differences from agent evaluation:
-- `target: model` — calls the model deployment directly (no agent)
-- `model` — the deployment name to use, and it must match a model already deployed in your Foundry project
-- No `agent_id` needed
+The key fields:
+- `target: model` — this is what makes it model-direct (as opposed to `target: agent`)
+- `model` — must match an existing deployment in your Foundry project. AgentOps will fail with a clear error if the deployment does not exist.
+- No `agent_id` — not needed for model-direct
 
-### Update the model name
+### What the bundle evaluates
 
-Replace the placeholder with your actual deployment name, for example:
+The `model_direct_baseline` bundle uses two evaluators:
+- **SimilarityEvaluator** (source: foundry) — AI-assisted comparison of the model's response against the expected answer. Scores 1–5, threshold ≥ 3.
+- **avg_latency_seconds** (source: local) — average response time per row, threshold ≤ 10 seconds.
 
-```yaml
-backend:
-  model: gpt-4o
-```
+## Part 3: Review the dataset
 
-## Part 3: Verify the dataset
-
-`agentops init` already created `.agentops/data/smoke-model-direct.jsonl` with sample data:
+The sample dataset at `.agentops/data/smoke-model-direct.jsonl` contains five simple QA pairs:
 
 ```jsonl
 {"id":"1","input":"What is the capital of France?","expected":"Paris is the capital of France."}
 {"id":"2","input":"Which planet is known as the Red Planet?","expected":"Mars is known as the Red Planet."}
-{"id":"3","input":"What is the chemical symbol for water?","expected":"The chemical symbol for water is H2O."}
-{"id":"4","input":"Who wrote Romeo and Juliet?","expected":"William Shakespeare wrote Romeo and Juliet."}
-{"id":"5","input":"What is the largest ocean on Earth?","expected":"The Pacific Ocean is the largest ocean on Earth."}
 ```
 
 Each row has:
-- `input` — the prompt sent directly to the model
-- `expected` — the reference answer for similarity comparison
+- `input` — the prompt sent to the model
+- `expected` — the reference answer that SimilarityEvaluator compares against
+
+For model-direct evaluation, these prompts are sent raw with no system instructions. The model sees only the `input` text. This is intentional — it isolates the model's capability from any agent configuration.
+
+### Writing your own dataset
+
+When you create your own dataset, keep the expected answers in the same style as the model. If the model tends to start with "The answer is..." but your expected answers are terse one-word responses, SimilarityEvaluator will penalize the style mismatch even though the content is correct. Match the level of detail you expect from the model.
+
+## Part 4: Run the evaluation
+
+```bash
+agentops eval run
+```
+
+By default this uses `.agentops/run.yaml`. If you want to point to a different config:
+
+```bash
+agentops eval run -c .agentops/run.yaml
+```
+
+AgentOps will:
+1. Send each `input` to the model deployment via the Foundry Cloud Evaluation API
+2. Run SimilarityEvaluator on each response against the `expected` answer
+3. Check thresholds: SimilarityEvaluator ≥ 3 and avg_latency ≤ 10s
+4. Write `results.json` and `report.md` under `.agentops/results/latest/`
+
+### Understanding the output
+
+Open `.agentops/results/latest/report.md` for the human-readable summary. You will see:
+
+- **Overall status** — PASS or FAIL based on all thresholds
+- **Metrics** — aggregate SimilarityEvaluator score and average latency
+- **Item verdicts** — per-row pass/fail showing which specific questions the model handled well or poorly
+- **Threshold checks** — which thresholds passed and which failed, with item counts
+
+A SimilarityEvaluator score of 5.0 means the model's response is semantically equivalent to the expected answer. Scores of 3–4 mean the response captures the core meaning but may differ in phrasing or detail. Below 3 indicates a meaningful divergence — the model may have missed the point, hallucinated, or provided an unrelated answer.
+
+## Part 5: Compare against a future run
 
-## Part 4: Run evaluation
+After you change model deployments, update the dataset, or modify any configuration, run the evaluation again and compare:
 
 ```bash
 agentops eval run
+agentops eval compare --runs <previous-timestamp>,latest
 ```
 
-This will:
-1. Send each `input` directly to the model deployment
-2. Evaluate response quality with `SimilarityEvaluator` (ordinal scale 1–5)
-3. Check the threshold: `SimilarityEvaluator >= 3`
+The comparison report shows exactly what changed — which metrics moved, which thresholds flipped, and which rows started failing. See the [Baseline Comparison Tutorial](tutorial-baseline-comparison.md) for the full workflow.
 
-### Check results
+## Transitioning to agent evaluation
 
-- `.agentops/results/latest/results.json` — machine-readable results
-- `.agentops/results/latest/report.md` — human-readable summary
+Once you are satisfied with model-direct quality, the next step is usually to build an agent and evaluate it. The transition is straightforward:
 
-## When to use Model-Direct
+1. Create an agent in the Foundry portal with system instructions and (optionally) tools
+2. Copy `run.yaml` to a new file and change `target: model` to `target: agent`, add the `agent_id`
+3. Run the same dataset through the agent
+4. Compare model-direct vs agent results with `agentops eval compare`
 
-Use this scenario when you want to:
-- Evaluate a model deployment without any agent orchestration
-- Benchmark raw model quality on QA tasks
-- Compare different model deployments on the same dataset
-- Run quick smoke tests on model responses
+Expect similarity scores to drop somewhat — the agent rephrases answers in its own style and may add contextual information. A drop from 5.0 to 3.5 is typical and usually acceptable. A drop to 1.0 suggests the agent is not functioning correctly.
 
-For RAG evaluation (with retrieval context), see the [RAG Tutorial](tutorial-rag.md).
+See the [Foundry Agent Tutorial](tutorial-basic-foundry-agent.md) for the full guide.
 
 ## Notes
 
-- Authentication is automatic via `DefaultAzureCredential`.
-- For local development, `az login` is enough.
-- Replace the placeholder in `backend.model` with a deployment name that already exists in your Foundry project.
-- If you configure AI-assisted evaluators separately, you can also set `AZURE_AI_MODEL_DEPLOYMENT_NAME` to a deployment that exists in the same project.
+- Cloud evaluation (default mode) runs the model and evaluators server-side in Foundry. Results appear in the Foundry portal under **Build > Evaluations**.
+- Set `AGENTOPS_FOUNDRY_MODE=local` to run evaluators locally instead of via the cloud API. This requires `pip install azure-ai-evaluation`.
+- Exit codes: `0` = all thresholds passed, `2` = one or more thresholds failed, `1` = error.
diff --git a/plugins/agentops/skills/agentops-investigate-regression/SKILL.md b/plugins/agentops/skills/agentops-investigate-regression/SKILL.md
new file mode 100644
index 0000000..32f05a5
--- /dev/null
+++ b/plugins/agentops/skills/agentops-investigate-regression/SKILL.md
@@ -0,0 +1,107 @@
+---
+name: agentops-investigate-regression
+description: Help users investigate evaluation regressions in AgentOps by comparing runs, analyzing row-level scores, and identifying root causes. Trigger when users say "regression", "score dropped", "threshold failed", "compare runs", "why did this eval get worse", "which rows failed", "debug evaluation", "quality degradation". Install agentops-toolkit via pip. Commands are agentops eval run, agentops eval compare, and agentops report.
+---
+
+# AgentOps Investigate Regression
+
+> **Prerequisite:** Install the AgentOps CLI with `pip install agentops-toolkit`.
+
+## Purpose
+Guide users through regression investigation using N-run comparison, row-level score analysis, and structured root cause identification.
+
+## When to Use
+- User reports lower scores versus previous runs.
+- User reports new threshold failures (PASS → FAIL).
+- User asks to compare current and prior evaluation outcomes.
+- CI gating changed from pass to fail and root cause is unclear.
+- User asks which specific rows or questions are failing.
+
+## Available Commands
+
+```bash
+agentops eval run [-c <config>] [-f md|html|all]                    # Generate fresh results
+agentops report [-f md|html|all]                                     # Regenerate report
+agentops eval compare --runs <id1>,<id2>[,...] [-f md|html|all]      # Compare N runs
+```
+
+Run identifiers for `--runs` can be:
+- Timestamped folder names (e.g. `2026-03-01_100000`)
+- The keyword `latest`
+- Absolute or relative paths to a `results.json` or a run directory
+
+## Investigation Workflow
+
+1. **Reproduce:** `agentops eval run -f html` to get fresh results with visual report.
+2. **Compare:** `agentops eval compare --runs <baseline>,latest -f html`
+3. **Check the verdict:** NO REGRESSIONS vs REGRESSIONS DETECTED
+4. **Read run config:** Check Status row — `FAIL (60% · 3/5)` tells you exactly how many rows failed.
+5. **Read Evaluators table:**
+   - ● green dot = Met threshold, ● red dot = Missed
+   - ↑ improved / ↓ regressed vs baseline
+   - `(3/5)` = row pass rate for this evaluator
+6. **Drill into Row Details:** Find exactly which rows scored below threshold and why.
+7. **Act:** Fix the identified issues (prompt tuning, dataset quality, model selection).
+
+## Understanding the Report
+
+### What REGRESSIONS DETECTED means
+A regression is detected ONLY when:
+- A run's overall status flips from **PASS to FAIL** vs baseline
+- A previously-passing **row** now fails
+
+A minor numeric decrease (e.g., latency 4.84s → 6.00s) that stays within the threshold (≤ 10s) is **NOT** a regression. The verdict focuses on threshold-breaking changes, not noise.
+
+### Comparison types
+The report auto-detects what's being compared:
+- **Model Comparison** — same dataset, different models → full row-level analysis valid
+- **Agent Comparison** — same dataset, different agents → full row-level analysis valid
+- **Dataset Coverage** — different datasets → row details skipped (rows aren't comparable)
+- **General** — multiple things vary
+
+### Evaluators table
+Each cell shows: `● score ↑ delta (n/n rows)`
+- **● dot** = Met (green) or Missed (red) vs the absolute threshold target
+- **↑↓ delta** = direction vs baseline run (improved/regressed/unchanged)
+- **(n/n)** = how many rows met the threshold out of total
+- **Green highlight** = best score across all runs
+- Metrics without thresholds (like `samples_evaluated`) show as plain informational numbers
+
+### Row Details table
+Each cell shows per-evaluator scores: `● SimilarityEvaluator: 2`
+- Green ● = this row met the threshold
+- Red ● = this row missed — **this is why the run failed**
+
+### Status
+`PASS (100% · 5/5)` = all rows met all thresholds
+`FAIL (60% · 3/5)` = 3 of 5 rows passed, 2 failed → the specific rows that failed explain the FAIL
+
+## Root Cause Checklist
+When you find regressions:
+
+1. **Which rows failed?** → Check Row Details for red ● dots
+2. **Which evaluator failed?** → The evaluator with red dots tells you what's weak
+3. **Is it the model?** → Compare same dataset across models to isolate
+4. **Is it the dataset?** → Some questions are inherently harder (real-time, ambiguous)
+5. **Is it the agent instructions?** → Compare agent versions on same dataset
+6. **Is it random variance?** → Run the same config 2-3 times and compare
+
+## Guardrails
+- Do not infer causality from correlation alone.
+- Separate observations (data from artifacts) from hypotheses (plausible causes).
+- Keep remediation advice tied to reproducible checks.
+- When comparing runs with different datasets, do NOT analyze row-level changes — they're different questions.
+
+## Examples
+- "My eval went from PASS to FAIL after changing model"
+  → `agentops eval compare --runs <old>,<new> -f html`. Check Evaluators for ↓ regressed metrics and Row Details for newly-failing rows.
+- "Which specific questions are failing?"
+  → Open the HTML report, scroll to Row Details — each row shows the actual score per evaluator with ● Met/Missed.
+- "Is gpt-4.1 better than gpt-5.1 for my use case?"
+  → Create two run.yaml files (same dataset, different model), run both, compare. The Evaluators table with row pass rates tells you which model handles your questions better.
+- "Why is CI failing now?"
+  → `agentops eval compare --runs <last_pass>,latest -f html`. The Status line shows `FAIL (80% · 4/5)` — one row regressed. Row Details shows which.
+
+## Learn More
+- Documentation: https://github.com/Azure/agentops
+- PyPI: https://pypi.org/project/agentops-toolkit/
diff --git a/plugins/agentops/skills/agentops-observability-triage/SKILL.md b/plugins/agentops/skills/agentops-observability-triage/SKILL.md
new file mode 100644
index 0000000..451d13d
--- /dev/null
+++ b/plugins/agentops/skills/agentops-observability-triage/SKILL.md
@@ -0,0 +1,113 @@
+---
+name: agentops-observability-triage
+description: Guide users on observability and triage workflows for AgentOps evaluations. Trigger when users ask about tracing, monitoring, dashboards, alerts, run health, production triage, or understanding evaluation outputs. Common phrases include "set up tracing", "monitor evals", "create alerts", "triage failed evaluations", "observability", "understand eval results", "what do these scores mean". Install agentops-toolkit via pip. Tracing and monitoring commands are planned for a future release.
+---
+
+# AgentOps Observability Triage
+
+> **Prerequisite:** Install the AgentOps CLI with `pip install agentops-toolkit`.
+
+## Purpose
+Provide practical observability guidance using current reporting artifacts. Frame tracing/monitoring as planned future features while showing what's available today — including HTML reports with visual indicators and N-run comparison dashboards.
+
+## When to Use
+- User asks how to monitor ongoing evaluation quality.
+- User asks for tracing, dashboards, or alerts.
+- User needs triage steps after an unexpected evaluation outcome.
+- User asks what the evaluation scores and indicators mean.
+
+## Available Commands
+
+```bash
+agentops eval run [-c <config>] [-f md|html|all]                    # Generate results
+agentops report [--in <results.json>] [-f md|html|all]              # Regenerate report
+agentops eval compare --runs <id1>,<id2>[,...] [-f md|html|all]      # Compare N runs
+```
+
+## Planned Commands (Not Yet Available)
+
+```bash
+agentops trace init             # Initialize tracing
+agentops monitor setup          # Set up monitoring
+agentops monitor dashboard      # Configure dashboards
+agentops monitor alert          # Configure alerts
+```
+
+## Triage Workflow
+
+### Quick triage (single run)
+1. `agentops eval run -f html` — run and generate HTML report
+2. Open `report.html` — check overall status, threshold checks, item verdicts
+3. If FAIL: look at which evaluator thresholds were missed
+
+### Deep triage (comparison)
+1. `agentops eval compare --runs <baseline>,latest -f html`
+2. Open `comparison.html` — visual dashboard with:
+   - **Status**: `PASS (100% · 5/5)` or `FAIL (60% · 3/5)` — immediate pass rate
+   - **Evaluators**: ● dots (Met/Missed), ↑↓ arrows (direction vs baseline), (n/n) row rates
+   - **Row Details**: per-row scores showing exactly which questions failed
+3. Check if regression is real (threshold flip) or noise (minor shift within threshold)
+
+### Multi-run trending
+1. Run the same config multiple times over days/weeks
+2. Compare all: `agentops eval compare --runs <oldest>,<middle>,<latest> -f html`
+3. The Evaluators table shows trend direction for each metric across all runs
+
+### Model selection
+1. Create run configs for each candidate model (same dataset + bundle)
+2. Run each: `agentops eval run -c <model-config> -f html`
+3. Compare: `agentops eval compare --runs <model1>,<model2>,<model3> -f html`
+4. Report auto-detects "Model Comparison" and shows side-by-side with best highlighting
+5. Pick the model that meets thresholds at the best quality/latency/cost ratio
+
+## Understanding Report Indicators
+
+### HTML visual indicators
+- **● green dot** — evaluator score Met the threshold target
+- **● red dot** — evaluator score Missed the threshold target
+- **↑ green arrow** — score improved vs baseline
+- **↓ red arrow** — score regressed vs baseline
+- **→ gray arrow** — unchanged
+- **Green highlighted cell** — best score across all compared runs
+- **(3/5)** — 3 out of 5 rows met this evaluator's threshold
+- **Muted gray text** — informational metric (no threshold, e.g., samples_evaluated)
+
+### Status
+- `PASS (100% · 5/5)` — all 5 rows met all thresholds
+- `FAIL (80% · 4/5)` — 4 of 5 rows passed, 1 failed
+- PASS = all row thresholds met · FAIL = one or more rows missed
+
+### Verdict
+- **NO REGRESSIONS** — no run's status flipped PASS→FAIL vs baseline
+- **REGRESSIONS DETECTED** — at least one run has newly-failing rows or status flipped
+
+### Comparison types (auto-detected)
+- **Model Comparison** — comparing different models on same dataset
+- **Agent Comparison** — comparing different agents on same dataset
+- **Dataset Coverage** — testing same model/agent on different datasets
+- **General** — multiple parameters vary
+
+## Report Formats
+- `-f md` — Markdown (default), good for PRs and CI logs
+- `-f html` — professional visual dashboard, best for analysis
+- `-f all` — generates both
+
+## Guardrails
+- Do not present tracing or monitoring commands as available today.
+- Do not imply real-time dashboards or alerts currently exist.
+- Always pivot to concrete available outputs when asked about unimplemented features.
+- The HTML report IS the current dashboard — it's self-contained, no server needed.
+
+## Examples
+- "How do I set up tracing?"
+  → Tracing (`agentops trace init`) is planned. For now, use `-f html` to generate visual reports with per-row score breakdowns.
+- "Can I monitor eval quality over time?"
+  → Run evals periodically and compare: `agentops eval compare --runs <old>,<mid>,<new> -f html`. The trend arrows show quality direction.
+- "What does FAIL (80% · 4/5) mean?"
+  → 4 of 5 dataset rows met all evaluator thresholds, 1 row missed. Check Row Details to see which row and which evaluator scored below target.
+- "What do the colored dots mean?"
+  → Green ● = score met the threshold target, Red ● = missed. In the Evaluators table, this is the aggregate score; in Row Details, it's per-row.
+
+## Learn More
+- Documentation: https://github.com/Azure/agentops
+- PyPI: https://pypi.org/project/agentops-toolkit/
diff --git a/plugins/agentops/skills/agentops-run-evals/SKILL.md b/plugins/agentops/skills/agentops-run-evals/SKILL.md
new file mode 100644
index 0000000..64340e9
--- /dev/null
+++ b/plugins/agentops/skills/agentops-run-evals/SKILL.md
@@ -0,0 +1,143 @@
+---
+name: agentops-run-evals
+description: Guide users through running AgentOps evaluations end to end — single runs, multi-model benchmarks, and N-run comparisons. Trigger when users ask to initialize AgentOps, run an evaluation, compare runs, benchmark models, regenerate a report, or summarize results. Common phrases include "run eval", "start agentops", "compare models", "benchmark agents", "run.yaml", "report", "evaluation results", "which model is best". Install agentops-toolkit via pip. Commands are agentops init, agentops eval run, agentops eval compare, and agentops report.
+---
+
+# AgentOps Run Evaluations
+
+> **Prerequisite:** Install the AgentOps CLI with `pip install agentops-toolkit`.
+
+## Purpose
+Guide users through the full AgentOps evaluation workflow — workspace setup, running evaluations, comparing N runs, benchmarking models/agents, and interpreting reports.
+
+## When to Use
+- User wants to start using AgentOps in a project.
+- User asks how to run an evaluation with `run.yaml`.
+- User wants to compare evaluation runs (2 or more).
+- User wants to benchmark multiple models or agents on the same dataset.
+- User asks how to regenerate reports or choose report format.
+- User asks where evaluation outputs are written.
+
+## Available Commands
+
+```bash
+pip install agentops-toolkit                          # Install the CLI
+agentops init [--path <dir>]                          # Scaffold workspace
+agentops eval run [-c <run.yaml>] [-f md|html|all]    # Run evaluation
+agentops report [--in <results.json>] [-f md|html|all] # Regenerate report
+agentops eval compare --runs <id1>,<id2>[,<id3>,...] [-f md|html|all]  # Compare N runs
+```
+
+### Key flags
+- `-c / --config` — path to run.yaml (default: `.agentops/run.yaml`)
+- `-f / --format` — report format: `md` (default), `html`, or `all`
+- `-o / --output` — output directory override
+- `--runs` — comma-separated run IDs (timestamps, `latest`, or paths)
+
+## Recommended Workflow
+
+### Single evaluation
+1. `agentops init` — scaffold `.agentops/` workspace
+2. Edit `.agentops/run.yaml` with bundle, dataset, and backend settings
+3. Set env: `$env:AZURE_AI_FOUNDRY_PROJECT_ENDPOINT = "https://..."`
+4. `agentops eval run` — run evaluation
+5. Check `.agentops/results/latest/results.json` and `report.md`
+
+### Multi-model benchmark
+1. Create one run.yaml per model (same dataset + bundle, different `model:`):
+   ```yaml
+   # run-gpt51.yaml          # run-gpt41.yaml
+   backend:                   backend:
+     type: foundry              type: foundry
+     target: model              target: model
+     model: gpt-5.1             model: gpt-4.1
+   ```
+2. Run each: `agentops eval run -c .agentops/run-gpt51.yaml -f html`
+3. Compare all: `agentops eval compare --runs <id1>,<id2>,<id3> -f html`
+4. Open the HTML report — shows side-by-side scores, ● Met/Missed dots, ↑↓ direction arrows, row pass rates, and best-run highlighting
+
+### Multi-agent comparison
+Same approach — create one run.yaml per agent version:
+```yaml
+backend:
+  type: foundry
+  target: agent
+  agent_id: my-agent:1    # or my-agent:2, my-agent:3
+```
+
+## Report Formats
+- **`md`** (default) — Markdown, suitable for PRs and CI logs
+- **`html`** — professional dashboard with visual indicators (● dots, ↑↓ arrows, color-coded badges, best highlighting)
+- **`all`** — generates both
+
+## Comparison Report Sections
+The comparison report contains:
+
+1. **Header** — verdict (NO REGRESSIONS / REGRESSIONS DETECTED), comparison type, varying parameter
+2. **Run Config** — identity fields (Target, Model, Agent) + Status with pass rate (e.g., `PASS (100% · 5/5)`)
+3. **Evaluators** — unified table showing per-evaluator:
+   - Target threshold (e.g., `>= 3`)
+   - Score per run with ● green/red dot (Met/Missed vs target)
+   - Delta + ↑↓ direction vs baseline (improved/regressed/unchanged)
+   - Row pass rate (e.g., `(4/5)`)
+   - Best run highlighted with green background
+   - Informational metrics (like `samples_evaluated`) shown as plain numbers
+4. **Row Details** — per-row evaluator scores with ● dots (only when same dataset across runs)
+5. **Fixed Parameters** — reference config info at bottom
+
+## Comparison Types (auto-detected)
+- **Model Comparison** — same dataset, model varies
+- **Agent Comparison** — same dataset, agent varies
+- **Dataset Coverage** — same agent/model, dataset varies (row details skipped)
+- **General Comparison** — multiple things vary
+
+## Regression Detection
+A regression is detected ONLY when:
+- A run's overall status flips from PASS to FAIL vs baseline
+- A previously-passing row now fails
+
+Minor numeric shifts within passing thresholds are NOT regressions.
+
+## Evaluation Terminology
+- **Met** / **Missed** — evaluator score vs absolute threshold target
+- **improved** / **regressed** / **unchanged** — score direction vs baseline run
+- **PASS** / **FAIL** — overall run status (PASS = all row thresholds met, FAIL = any row missed)
+
+## Exit Codes
+- `0` — succeeded and all thresholds passed (eval run) / no regressions (compare)
+- `2` — thresholds failed (eval run) / regressions detected (compare)
+- `1` — runtime or configuration error
+
+## Expected Outputs
+- `results.json` — machine-readable normalized results
+- `report.md` / `report.html` — human-readable report (per format flag)
+- `cloud_evaluation.json` — Foundry portal URL (cloud eval only)
+- `comparison.json` + `comparison.md` / `comparison.html` — comparison outputs
+
+## Environment Setup
+```bash
+# Required for Foundry backend
+$env:AZURE_AI_FOUNDRY_PROJECT_ENDPOINT = "https://<account>.services.ai.azure.com/api/projects/<project>"
+
+# Authentication
+az login  # local development
+# CI/CD: set AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET
+```
+
+## Guardrails
+- Do not invent commands or flags beyond documented CLI behavior.
+- Planned commands (`run list`, `bundle show`, `trace init`, `monitor`) are NOT implemented — state they are planned.
+- The `--format` flag accepts only `md`, `html`, or `all`.
+- When comparing runs with different datasets, row-level comparison is not meaningful — the report handles this automatically.
+
+## Examples
+- "Compare 3 models on the same dataset"
+  → Create 3 run.yaml files (one per model), run each with `agentops eval run -c <config> -f html`, then `agentops eval compare --runs <id1>,<id2>,<id3> -f html`
+- "Which model should I use?"
+  → Run multi-model benchmark, check Evaluators table for best scores and latency, pick the model that meets thresholds at lowest cost
+- "Why did my eval fail?"
+  → Check the Row Details section — it shows per-row scores with ● Met/Missed so you can see exactly which rows scored below threshold
+
+## Learn More
+- Documentation: https://github.com/Azure/agentops
+- PyPI: https://pypi.org/project/agentops-toolkit/
diff --git a/pyproject.toml b/pyproject.toml
index 75026e4..ea203e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,10 +1,10 @@
 [build-system]
-requires = ["setuptools>=68", "wheel"]
+requires = ["setuptools>=68", "wheel", "setuptools-scm>=8"]
 build-backend = "setuptools.build_meta"
 
 [project]
 name = "agentops-toolkit"
-version = "0.1.2"
+dynamic = ["version"]
 description = "AgentOps CLI for standardized evaluation workflows"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -12,6 +12,7 @@ dependencies = [
   "typer>=0.12,<1.0",
   "pydantic>=2,<3",
   "ruamel.yaml>=0.18,<1.0",
+  "azure-ai-projects>=2.0.1",
 ]
 license = { file = "LICENSE" }
 
@@ -40,8 +41,12 @@ where = ["src"]
 [dependency-groups]
 dev = [
     "mypy>=1.19.1",
+    "pre-commit>=4.0",
     "pytest>=8.0",
     "pytest-asyncio>=0.24",
     "pytest-cov>=5.0",
     "ruff>=0.9",
 ]
+
+[tool.setuptools_scm]
+local_scheme = "no-local-version"
diff --git a/src/agentops/__main__.py b/src/agentops/__main__.py
index 71c8f5a..08aec35 100644
--- a/src/agentops/__main__.py
+++ b/src/agentops/__main__.py
@@ -1,4 +1,5 @@
 """Entrypoint for `python -m agentops`."""
+
 from agentops.cli.app import app
 
 if __name__ == "__main__":
diff --git a/src/agentops/backends/base.py b/src/agentops/backends/base.py
index d81f8b5..2822b4d 100644
--- a/src/agentops/backends/base.py
+++ b/src/agentops/backends/base.py
@@ -1,4 +1,5 @@
 """Backend protocol and shared execution models."""
+
 from __future__ import annotations
 
 from dataclasses import dataclass
diff --git a/src/agentops/backends/foundry_backend.py b/src/agentops/backends/foundry_backend.py
index 9c7e06d..e64e374 100644
--- a/src/agentops/backends/foundry_backend.py
+++ b/src/agentops/backends/foundry_backend.py
@@ -108,32 +108,45 @@ def _parse_agent_name_version(agent_id: str) -> tuple[str, str | None]:
     return agent_id.strip(), None
 
 
-_NLP_ONLY_EVALUATORS = frozenset({
-    "f1_score",
-    "bleu",
-    "rouge",
-    "meteor",
-    "gleu",
-})
-
-_EVALUATORS_NEEDING_GROUND_TRUTH = frozenset({
-    "similarity",
-    "f1_score",
-    "bleu",
-    "rouge",
-    "meteor",
-    "gleu",
-})
-
-_EVALUATORS_NEEDING_CONTEXT = frozenset({
-    "groundedness",
-})
+_NLP_ONLY_EVALUATORS = frozenset(
+    {
+        "f1_score",
+        "bleu",
+        "rouge",
+        "meteor",
+        "gleu",
+    }
+)
+
+_EVALUATORS_NEEDING_GROUND_TRUTH = frozenset(
+    {
+        "similarity",
+        "f1_score",
+        "bleu",
+        "rouge",
+        "meteor",
+        "gleu",
+    }
+)
+
+_EVALUATORS_NEEDING_CONTEXT = frozenset(
+    {
+        "groundedness",
+    }
+)
+
+_EVALUATORS_NEEDING_TOOL_CALLS = frozenset(
+    {
+        "tool_call_accuracy",
+    }
+)
 
 
 def _cloud_evaluator_data_mapping(
     builtin_name: str,
     input_field: str,
     expected_field: str,
+    context_field: str | None = None,
 ) -> Dict[str, str]:
     """Build ``data_mapping`` for an ``azure_ai_evaluator`` testing criterion."""
     item_input = "{{item." + input_field + "}}"
@@ -147,7 +160,13 @@ def _cloud_evaluator_data_mapping(
     if builtin_name in _EVALUATORS_NEEDING_GROUND_TRUTH:
         mapping["ground_truth"] = item_expected
     elif builtin_name in _EVALUATORS_NEEDING_CONTEXT:
-        mapping["context"] = item_expected
+        # Use the dedicated context column when declared in dataset format;
+        # fall back to expected_field only when no context_field is configured.
+        context_item = "{{item." + (context_field or expected_field) + "}}"
+        mapping["context"] = context_item
+    elif builtin_name in _EVALUATORS_NEEDING_TOOL_CALLS:
+        mapping["tool_calls"] = "{{sample.tool_calls}}"
+        mapping["tool_definitions"] = "{{item.tool_definitions}}"
     return mapping
 
 
@@ -398,7 +417,22 @@ def _default_foundry_input_mapping(name: str) -> Dict[str, str]:
         return {
             "query": "$prompt",
             "response": "$prediction",
-            "context": "$expected",
+            # Use the dedicated 'context' row field (retrieved documents).
+            # Override via evaluators[].config.input_mapping in the bundle
+            # if your dataset column has a different name.
+            "context": "$row.context",
+        }
+    if name == "TaskCompletionEvaluator":
+        return {
+            "query": "$prompt",
+            "response": "$prediction",
+        }
+    if name == "ToolCallAccuracyEvaluator":
+        return {
+            "query": "$prompt",
+            "response": "$prediction",
+            "tool_calls": "$row.tool_calls",
+            "tool_definitions": "$row.tool_definitions",
         }
     return {}
 
@@ -1068,9 +1102,9 @@ def _invoke_model_direct(self, settings: FoundrySettings, prompt: str) -> str:
             from azure.identity import DefaultAzureCredential  # noqa: WPS433
         except ImportError as exc:
             raise ImportError(
-                "Model-direct evaluation requires 'azure-ai-projects>=2.0.0b1' "
+                "Model-direct evaluation requires 'azure-ai-projects>=2.0.1' "
                 "and 'azure-identity'. "
-                "Install with: pip install 'azure-ai-projects>=2.0.0b1' azure-identity openai"
+                "Install with: pip install 'azure-ai-projects>=2.0.1' azure-identity openai"
             ) from exc
 
         credential = DefaultAzureCredential(exclude_developer_cli_credential=True)
@@ -1106,22 +1140,17 @@ def _execute_cloud_evaluation(
         stderr_path: Path,
         metrics_path: Path,
     ) -> BackendExecutionResult:
-        """Run evaluation via the Foundry Cloud Evaluation API (New Experience).
+        """Run evaluation via the Foundry Project Evals API (New Experience).
 
-        Uses ``client.evals.create`` / ``client.evals.runs.create`` with
-        ``azure_ai_evaluator`` testing criteria and ``azure_ai_target_completions``
-        data source so results appear in the Foundry Evaluations page.
+        Uses the Foundry Project REST endpoint
+        ``{project_endpoint}/openai/evals?api-version=2025-11-15-preview``
+        with ``azure_ai_evaluator`` testing criteria so results appear in the
+        Foundry Evaluations page.
 
         Reference: https://learn.microsoft.com/azure/foundry/how-to/develop/cloud-evaluation
         """
-        try:
-            from azure.ai.projects import AIProjectClient  # noqa: WPS433
-            from azure.identity import DefaultAzureCredential  # noqa: WPS433
-        except ImportError as exc:
-            raise ImportError(
-                "Foundry Cloud Evaluation requires 'azure-ai-projects>=2.0.0b1' and 'azure-identity'. "
-                "Install with: pip install 'azure-ai-projects>=2.0.0b1' azure-identity openai"
-            ) from exc
+        # The Foundry Project Evals API version that supports azure_ai_evaluator.
+        _EVALS_API_VERSION = "2025-11-15-preview"
 
         rows = _load_jsonl(dataset_source_path)
         total_rows = len(rows)
@@ -1166,6 +1195,7 @@ def _execute_cloud_evaluation(
                     builtin_name,
                     input_field,
                     expected_field,
+                    context_field=dataset_config.format.context_field,
                 ),
             }
             if _cloud_evaluator_needs_model(builtin_name):
@@ -1179,19 +1209,42 @@ def _execute_cloud_evaluation(
                 }
             testing_criteria.append(criterion)
 
-        # --- Create OpenAI client (SDK picks correct api-version) -----------
+        # --- Acquire token for Foundry Project Evals API --------------------
         try:
-            credential = DefaultAzureCredential(exclude_developer_cli_credential=True)
-            project_client = AIProjectClient(
-                endpoint=settings.project_endpoint,
-                credential=credential,
-            )
-            openai_client = project_client.get_openai_client()
-        except ImportError:
-            raise
+            evals_token = _acquire_token("https://ai.azure.com/.default")
         except Exception as exc:
             raise RuntimeError(_CREDENTIAL_HELP_MESSAGE) from exc
 
+        evals_base_url = settings.project_endpoint.rstrip("/")
+        evals_headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {evals_token}",
+        }
+
+        def _evals_post(path: str, body: Dict[str, Any]) -> Dict[str, Any]:
+            url = (
+                f"{evals_base_url}/openai/evals{path}?api-version={_EVALS_API_VERSION}"
+            )
+            return self._request_json(
+                method="POST",
+                url=url,
+                headers=evals_headers,
+                timeout_seconds=60,
+                body=body,
+            )
+
+        def _evals_get(path: str, extra_params: str = "") -> Dict[str, Any]:
+            params = f"api-version={_EVALS_API_VERSION}"
+            if extra_params:
+                params = f"{params}&{extra_params}"
+            url = f"{evals_base_url}/openai/evals{path}?{params}"
+            return self._request_json(
+                method="GET",
+                url=url,
+                headers=evals_headers,
+                timeout_seconds=60,
+            )
+
         # --- Data schema ----------------------------------------------------
         item_schema: Dict[str, Any] = {
             "type": "object",
@@ -1203,16 +1256,20 @@ def _execute_cloud_evaluation(
         }
 
         eval_name = f"agentops-eval-{uuid.uuid4().hex[:8]}"
-        eval_object = openai_client.evals.create(
-            name=eval_name,
-            data_source_config={
-                "type": "custom",
-                "item_schema": item_schema,
-                "include_sample_schema": True,
+        eval_object = _evals_post(
+            "",
+            {
+                "name": eval_name,
+                "data_source_config": {
+                    "type": "custom",
+                    "item_schema": item_schema,
+                    "include_sample_schema": True,
+                },
+                "testing_criteria": testing_criteria,
             },
-            testing_criteria=testing_criteria,
         )
-        logger.info("Cloud evaluation created: %s", eval_object.id)
+        eval_id = eval_object["id"]
+        logger.info("Cloud evaluation created: %s", eval_id)
 
         # --- Target + input messages ----------------------------------------
         input_messages: Dict[str, Any] = {
@@ -1233,17 +1290,19 @@ def _execute_cloud_evaluation(
 
         if settings.target == "model":
             # Model-direct: use completions data source (no agent)
-            eval_run = openai_client.evals.runs.create(
-                eval_id=eval_object.id,
-                name=run_name,
-                data_source={
-                    "type": "completions",
-                    "source": {
-                        "type": "file_content",
-                        "content": [{"item": row} for row in rows],
+            eval_run = _evals_post(
+                f"/{eval_id}/runs",
+                {
+                    "name": run_name,
+                    "data_source": {
+                        "type": "completions",
+                        "source": {
+                            "type": "file_content",
+                            "content": [{"item": row} for row in rows],
+                        },
+                        "input_messages": input_messages,
+                        "model": settings.model,
                     },
-                    "input_messages": input_messages,
-                    "model": settings.model,
                 },
             )
         else:
@@ -1256,22 +1315,26 @@ def _execute_cloud_evaluation(
             if agent_version:
                 target["version"] = agent_version
 
-            eval_run = openai_client.evals.runs.create(
-                eval_id=eval_object.id,
-                name=run_name,
-                data_source={
-                    "type": "azure_ai_target_completions",
-                    "source": {
-                        "type": "file_content",
-                        "content": [{"item": row} for row in rows],
+            eval_run = _evals_post(
+                f"/{eval_id}/runs",
+                {
+                    "name": run_name,
+                    "data_source": {
+                        "type": "azure_ai_target_completions",
+                        "source": {
+                            "type": "file_content",
+                            "content": [{"item": row} for row in rows],
+                        },
+                        "input_messages": input_messages,
+                        "target": target,
                     },
-                    "input_messages": input_messages,
-                    "target": target,
                 },
             )
+
+        run_id = eval_run["id"]
         logger.info(
             "Cloud evaluation run started: %s  (polling every %.0fs, timeout %.0fs)",
-            eval_run.id,
+            run_id,
             settings.poll_interval_seconds,
             settings.poll_interval_seconds * settings.max_poll_attempts,
         )
@@ -1281,13 +1344,11 @@ def _execute_cloud_evaluation(
         terminal_failure = {"failed", "cancelled", "canceled", "expired", "error"}
         poll_start = perf_counter()
         last_logged_status: str | None = None
+        latest_run: Dict[str, Any] = eval_run
 
         for attempt in range(1, settings.max_poll_attempts + 1):
-            latest_run = openai_client.evals.runs.retrieve(
-                run_id=eval_run.id,
-                eval_id=eval_object.id,
-            )
-            run_status = str(getattr(latest_run, "status", "unknown")).lower()
+            latest_run = _evals_get(f"/{eval_id}/runs/{run_id}")
+            run_status = str(latest_run.get("status", "unknown")).lower()
 
             # Only log when the status changes to avoid flooding the console.
             if run_status != last_logged_status:
@@ -1314,14 +1375,11 @@ def _execute_cloud_evaluation(
             )
 
         # --- Collect output items -------------------------------------------
-        output_items = list(
-            openai_client.evals.runs.output_items.list(
-                run_id=eval_run.id,
-                eval_id=eval_object.id,
-                order="asc",
-                limit=100,
-            )
+        output_items_resp = _evals_get(
+            f"/{eval_id}/runs/{run_id}/output_items",
+            extra_params="order=asc&limit=100",
         )
+        output_items: List[Dict[str, Any]] = output_items_resp.get("data", [])
         if not output_items:
             raise RuntimeError(
                 "Foundry cloud evaluation completed with no output items"
@@ -1352,7 +1410,7 @@ def _execute_cloud_evaluation(
         stderr_lines: List[str] = []
 
         for index, item in enumerate(output_items, start=1):
-            datasource_item = getattr(item, "datasource_item", {}) or {}
+            datasource_item = item.get("datasource_item", {}) or {}
             row_data = (
                 datasource_item.get("item", datasource_item)
                 if isinstance(datasource_item, dict)
@@ -1363,15 +1421,17 @@ def _execute_cloud_evaluation(
             expected = _normalize_text(row_data.get(expected_field))
 
             # Extract prediction from sample
-            sample = getattr(item, "sample", None)
+            sample = item.get("sample", None)
             prediction = ""
             if isinstance(sample, dict):
                 prediction = _normalize_text(sample.get("output_text", ""))
 
             row_metric_entries: List[Dict[str, float]] = []
-            for result in getattr(item, "results", []) or []:
-                metric_name = getattr(result, "name", "")
-                metric_score = getattr(result, "score", None)
+            for result in item.get("results", []) or []:
+                metric_name = result.get("name", "") if isinstance(result, dict) else ""
+                metric_score = (
+                    result.get("score", None) if isinstance(result, dict) else None
+                )
                 if isinstance(metric_name, str) and isinstance(
                     metric_score, (int, float)
                 ):
@@ -1388,20 +1448,26 @@ def _execute_cloud_evaluation(
             # Only emit local evaluator metrics if they are configured in the bundle.
             if "exact_match" in enabled_local_names:
                 passed = prediction.lower() == expected.lower() if expected else False
-                row_metric_entries.append({
-                    "name": "exact_match",
-                    "value": 1.0 if passed else 0.0,
-                })
+                row_metric_entries.append(
+                    {
+                        "name": "exact_match",
+                        "value": 1.0 if passed else 0.0,
+                    }
+                )
             if "latency_seconds" in enabled_local_names:
-                row_metric_entries.append({
-                    "name": "latency_seconds",
-                    "value": approx_latency_per_row,
-                })
+                row_metric_entries.append(
+                    {
+                        "name": "latency_seconds",
+                        "value": approx_latency_per_row,
+                    }
+                )
             if "avg_latency_seconds" in enabled_local_names:
-                row_metric_entries.append({
-                    "name": "avg_latency_seconds",
-                    "value": approx_latency_per_row,
-                })
+                row_metric_entries.append(
+                    {
+                        "name": "avg_latency_seconds",
+                        "value": approx_latency_per_row,
+                    }
+                )
 
             # Update aggregate values for local evaluator metrics.
             for entry in row_metric_entries:
@@ -1410,14 +1476,16 @@ def _execute_cloud_evaluation(
                     evaluator_aggregate_values[agg_name].append(entry["value"])
 
             row_index = index
-            datasource_item_id = getattr(item, "datasource_item_id", None)
+            datasource_item_id = item.get("datasource_item_id", None)
             if isinstance(datasource_item_id, int) and datasource_item_id >= 0:
                 row_index = datasource_item_id + 1
 
-            row_metrics_payload.append({
-                "row_index": row_index,
-                "metrics": row_metric_entries,
-            })
+            row_metrics_payload.append(
+                {
+                    "row_index": row_index,
+                    "metrics": row_metric_entries,
+                }
+            )
             stdout_lines.append(
                 f"row={row_index} expected={expected!r} prediction={prediction!r}"
             )
@@ -1430,10 +1498,12 @@ def _execute_cloud_evaluation(
         for name in enabled_evaluator_order:
             values = evaluator_aggregate_values.get(name, [])
             if values:
-                metrics_entries.append({
-                    "name": name,
-                    "value": sum(values) / len(values),
-                })
+                metrics_entries.append(
+                    {
+                        "name": name,
+                        "value": sum(values) / len(values),
+                    }
+                )
         metrics_entries.append({"name": "samples_evaluated", "value": float(total)})
 
         metrics_path.write_text(
@@ -1447,17 +1517,14 @@ def _execute_cloud_evaluation(
         stderr_path.write_text("\n".join(stderr_lines), encoding="utf-8")
 
         # --- Report URL (deep-link to the New Foundry Experience) -----------
-        latest_run = openai_client.evals.runs.retrieve(
-            run_id=eval_run.id, eval_id=eval_object.id
-        )
-        report_url = getattr(latest_run, "report_url", None)
+        report_url = latest_run.get("report_url")
 
         cloud_meta_path = context.backend_output_dir / "cloud_evaluation.json"
         cloud_meta_path.write_text(
             json.dumps(
                 {
-                    "eval_id": eval_object.id,
-                    "run_id": eval_run.id,
+                    "eval_id": eval_id,
+                    "run_id": run_id,
                     "report_url": report_url,
                     "evaluation_name": eval_name,
                     "run_name": run_name,
@@ -1477,7 +1544,7 @@ def _execute_cloud_evaluation(
         else:
             command_display = (
                 "foundry.cloud_evaluation "
-                f"project_endpoint={settings.project_endpoint} agent_id={settings.agent_id}"
+                f"project_endpoint={settings.project_endpoint} target=agent agent_id={settings.agent_id} model={settings.model}"
             )
 
         logger.info("Cloud evaluation completed with %d output item(s)", total)
@@ -1604,20 +1671,26 @@ def _record_row_metrics(
             # Only emit local evaluator metrics that are configured in the bundle.
             if "exact_match" in enabled_local_names:
                 passed = prediction_normalized.lower() == expected_text.lower()
-                row_metric_entries.append({
-                    "name": "exact_match",
-                    "value": 1.0 if passed else 0.0,
-                })
+                row_metric_entries.append(
+                    {
+                        "name": "exact_match",
+                        "value": 1.0 if passed else 0.0,
+                    }
+                )
             if "latency_seconds" in enabled_local_names:
-                row_metric_entries.append({
-                    "name": "latency_seconds",
-                    "value": row_latency,
-                })
+                row_metric_entries.append(
+                    {
+                        "name": "latency_seconds",
+                        "value": row_latency,
+                    }
+                )
             if "avg_latency_seconds" in enabled_local_names:
-                row_metric_entries.append({
-                    "name": "avg_latency_seconds",
-                    "value": row_latency,
-                })
+                row_metric_entries.append(
+                    {
+                        "name": "avg_latency_seconds",
+                        "value": row_latency,
+                    }
+                )
 
             for metric_entry in row_metric_entries:
                 metric_name = metric_entry["name"]
@@ -1625,10 +1698,12 @@ def _record_row_metrics(
                 if metric_name in evaluator_aggregate_values:
                     evaluator_aggregate_values[metric_name].append(metric_value)
 
-            row_metrics_payload.append({
-                "row_index": row_index,
-                "metrics": row_metric_entries,
-            })
+            row_metrics_payload.append(
+                {
+                    "row_index": row_index,
+                    "metrics": row_metric_entries,
+                }
+            )
 
             stdout_lines.append(
                 f"row={row_index} expected={expected_text!r} prediction={prediction_normalized!r}"
@@ -1749,10 +1824,12 @@ def _record_row_metrics(
         for evaluator_name in enabled_evaluator_order:
             values = evaluator_aggregate_values.get(evaluator_name, [])
             if values:
-                metrics_entries.append({
-                    "name": evaluator_name,
-                    "value": sum(values) / len(values),
-                })
+                metrics_entries.append(
+                    {
+                        "name": evaluator_name,
+                        "value": sum(values) / len(values),
+                    }
+                )
 
         metrics_entries.append({"name": "samples_evaluated", "value": float(total)})
 
@@ -1771,13 +1848,13 @@ def _record_row_metrics(
         if settings.target == "model":
             command_display = (
                 "foundry.model_direct "
-                f"project_endpoint={settings.project_endpoint} model={settings.model}"
+                f"project_endpoint={settings.project_endpoint} target=model model={settings.model}"
             )
         else:
             command_display = (
                 "foundry.agent_service "
-                f"project_endpoint={settings.project_endpoint} agent_id={settings.agent_id} "
-                f"api_version={settings.api_version}"
+                f"project_endpoint={settings.project_endpoint} target=agent agent_id={settings.agent_id} "
+                f"model={settings.model} api_version={settings.api_version}"
             )
 
         return BackendExecutionResult(
diff --git a/src/agentops/cli/app.py b/src/agentops/cli/app.py
index d64ffe1..a9f9e7b 100644
--- a/src/agentops/cli/app.py
+++ b/src/agentops/cli/app.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from pathlib import Path
-from typing import Annotated, Optional
+from typing import Annotated
 
 import typer
 
@@ -59,6 +59,7 @@ def _planned_command(command_name: str) -> None:
 # Global callback — configures logging before any command runs
 # ---------------------------------------------------------------------------
 
+
 def _version_callback(value: bool) -> None:
     if value:
         from agentops import __version__
@@ -90,9 +91,12 @@ def _main(
 # agentops init
 # ---------------------------------------------------------------------------
 
+
 @app.command("init")
 def cmd_init(
-    force: bool = typer.Option(False, "--force", help="Overwrite starter files if they exist."),
+    force: bool = typer.Option(
+        False, "--force", help="Overwrite starter files if they exist."
+    ),
     directory: Path = typer.Option(
         Path("."),
         "--dir",
@@ -129,29 +133,47 @@ def cmd_init(
 # agentops eval run
 # ---------------------------------------------------------------------------
 
+
 @eval_app.command("run")
 def cmd_eval_run(
     config: Annotated[
-        Optional[Path],
+        Path | None,
         typer.Option(
             "--config",
             "-c",
             help="Path to run.yaml (default: .agentops/run.yaml).",
         ),
     ] = None,
-    output: Annotated[Optional[Path], typer.Option("--output", "-o", help="Output directory for results.")] = None,
+    output: Annotated[
+        Path | None,
+        typer.Option("--output", "-o", help="Output directory for results."),
+    ] = None,
+    report_format: Annotated[
+        str, typer.Option("--format", "-f", help="Report format: md, html, or all.")
+    ] = "md",
 ) -> None:
     """Run an evaluation defined in a run.yaml file."""
-    log.debug("cmd_eval_run called config=%s output=%s", config, output)
+    if report_format not in ("md", "html", "all"):
+        typer.echo("Error: --format must be md, html, or all.", err=True)
+        raise typer.Exit(code=1)
+
+    log.debug(
+        "cmd_eval_run called config=%s output=%s format=%s",
+        config,
+        output,
+        report_format,
+    )
     try:
-        run_result = run_evaluation(config_path=config, output_override=output)
+        run_result = run_evaluation(
+            config_path=config, output_override=output, report_format=report_format
+        )
     except Exception as exc:
         typer.echo(f"Error: evaluation failed: {exc}", err=True)
         raise typer.Exit(code=1) from exc
 
     typer.echo(f"Evaluation output directory: {run_result.output_dir}")
     typer.echo(f"results.json: {run_result.results_path}")
-    typer.echo(f"report.md: {run_result.report_path}")
+    typer.echo(f"report: {run_result.report_path}")
 
     if run_result.exit_code == 2:
         typer.echo("Threshold status: FAILED")
@@ -164,23 +186,71 @@ def cmd_eval_run(
 def cmd_eval_compare(
     runs: Annotated[
         str,
-        typer.Option("--runs", help="Comma-separated run ids (example: ID1,ID2)."),
+        typer.Option(
+            "--runs", help="Comma-separated run ids (example: ID1,ID2 or ID1,ID2,ID3)."
+        ),
     ],
+    output: Annotated[
+        Path | None,
+        typer.Option("--output", "-o", help="Output directory for comparison results."),
+    ] = None,
+    report_format: Annotated[
+        str, typer.Option("--format", "-f", help="Report format: md, html, or all.")
+    ] = "md",
 ) -> None:
-    """Compare two past evaluation runs (planned)."""
-    _ = runs
-    _planned_command("agentops eval compare --runs ID1,ID2")
+    """Compare two or more past evaluation runs."""
+    from agentops.services.comparison import run_comparison
+
+    if report_format not in ("md", "html", "all"):
+        typer.echo("Error: --format must be md, html, or all.", err=True)
+        raise typer.Exit(code=1)
+
+    parts = [p.strip() for p in runs.split(",")]
+    if len(parts) < 2:
+        typer.echo(
+            "Error: --runs must contain at least two comma-separated run ids.", err=True
+        )
+        raise typer.Exit(code=1)
+
+    log.debug(
+        "cmd_eval_compare called runs=%s output=%s format=%s",
+        parts,
+        output,
+        report_format,
+    )
+    try:
+        result = run_comparison(
+            run_ids=parts,
+            output_dir=output,
+            report_format=report_format,
+        )
+    except Exception as exc:
+        typer.echo(f"Error: comparison failed: {exc}", err=True)
+        raise typer.Exit(code=1) from exc
+
+    typer.echo(f"comparison.json: {result.comparison_json_path}")
+    if result.comparison_md_path:
+        typer.echo(f"comparison.md: {result.comparison_md_path}")
+    if result.comparison_html_path:
+        typer.echo(f"comparison.html: {result.comparison_html_path}")
+
+    if result.has_regressions:
+        typer.echo("Comparison verdict: REGRESSIONS DETECTED")
+        raise typer.Exit(code=2)
+
+    typer.echo("Comparison verdict: NO REGRESSIONS")
 
 
 # ---------------------------------------------------------------------------
 # agentops report
 # ---------------------------------------------------------------------------
 
+
 @report_app.callback(invoke_without_command=True)
 def cmd_report(
     ctx: typer.Context,
     results_in: Annotated[
-        Optional[Path],
+        Path | None,
         typer.Option(
             "--in",
             help=(
@@ -189,18 +259,34 @@ def cmd_report(
             ),
         ),
     ] = None,
-    report_out: Annotated[Optional[Path], typer.Option("--out", help="Output path for report.md.")] = None,
+    report_out: Annotated[
+        Path | None,
+        typer.Option("--out", help="Output path for report."),
+    ] = None,
+    report_format: Annotated[
+        str, typer.Option("--format", "-f", help="Report format: md, html, or all.")
+    ] = "md",
 ) -> None:
-    """Regenerate report.md from a results.json file."""
+    """Regenerate report from a results.json file."""
     if ctx.invoked_subcommand is not None:
         return
 
+    if report_format not in ("md", "html", "all"):
+        typer.echo("Error: --format must be md, html, or all.", err=True)
+        raise typer.Exit(code=1)
+
     resolved_results_in = results_in or DEFAULT_REPORT_INPUT
-    log.debug("cmd_report called in=%s out=%s", resolved_results_in, report_out)
+    log.debug(
+        "cmd_report called in=%s out=%s format=%s",
+        resolved_results_in,
+        report_out,
+        report_format,
+    )
     try:
         report_result = generate_report_from_results(
             results_path=resolved_results_in,
             output_path=report_out,
+            report_format=report_format,
         )
     except Exception as exc:
         typer.echo(f"Error: report generation failed: {exc}", err=True)
@@ -208,6 +294,8 @@ def cmd_report(
 
     typer.echo(f"Loaded results: {report_result.input_results_path}")
     typer.echo(f"Generated report: {report_result.output_report_path}")
+    if report_result.html_report_path:
+        typer.echo(f"Generated report: {report_result.html_report_path}")
 
 
 @report_app.command("show")
@@ -238,7 +326,7 @@ def cmd_run_show() -> None:
 def cmd_run_view(
     run_id: str,
     entry: Annotated[
-        Optional[int],
+        int | None,
         typer.Option("--entry", help="Optional row/entry index for deep inspection."),
     ] = None,
 ) -> None:
@@ -291,7 +379,9 @@ def cmd_config_show() -> None:
 
 @config_app.command("cicd")
 def cmd_config_cicd(
-    force: bool = typer.Option(False, "--force", help="Overwrite existing workflow file."),
+    force: bool = typer.Option(
+        False, "--force", help="Overwrite existing workflow file."
+    ),
     directory: Path = typer.Option(
         Path("."),
         "--dir",
@@ -318,9 +408,15 @@ def cmd_config_cicd(
     if result.created_files or result.overwritten_files:
         typer.echo("")
         typer.echo("Next steps:")
-        typer.echo("  1. Set GitHub repository variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_SUBSCRIPTION_ID")
-        typer.echo("  2. Set GitHub repository secret: AZURE_AI_FOUNDRY_PROJECT_ENDPOINT")
-        typer.echo("  3. Configure Azure Workload Identity Federation (see docs/ci-github-actions.md)")
+        typer.echo(
+            "  1. Set GitHub repository variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_SUBSCRIPTION_ID"
+        )
+        typer.echo(
+            "  2. Set GitHub repository secret: AZURE_AI_FOUNDRY_PROJECT_ENDPOINT"
+        )
+        typer.echo(
+            "  3. Configure Azure Workload Identity Federation (see docs/ci-github-actions.md)"
+        )
         typer.echo("  4. Commit and push the workflow file")
     elif result.skipped_files:
         typer.echo("No files written. Use --force to overwrite existing workflow.")
diff --git a/src/agentops/core/config_loader.py b/src/agentops/core/config_loader.py
index e323140..13c5000 100644
--- a/src/agentops/core/config_loader.py
+++ b/src/agentops/core/config_loader.py
@@ -1,4 +1,5 @@
 """YAML config loaders for AgentOps schemas."""
+
 from __future__ import annotations
 
 from pathlib import Path
diff --git a/src/agentops/core/models.py b/src/agentops/core/models.py
index a8c378e..9540485 100644
--- a/src/agentops/core/models.py
+++ b/src/agentops/core/models.py
@@ -1,4 +1,5 @@
 """Pydantic models for AgentOps schemas."""
+
 from __future__ import annotations
 
 from pathlib import Path
@@ -136,6 +137,7 @@ class DatasetFormat(BaseModel):
     type: str
     input_field: str
     expected_field: str
+    context_field: Optional[str] = None
 
 
 class DatasetConfig(BaseModel):
@@ -185,9 +187,8 @@ def _reject_placeholder_model(cls, value: Optional[str]) -> Optional[str]:
 
         normalized = value.strip()
         looks_like_placeholder = (
-            (normalized.startswith("<") and normalized.endswith(">"))
-            or "replace-with" in normalized.lower()
-        )
+            normalized.startswith("<") and normalized.endswith(">")
+        ) or "replace-with" in normalized.lower()
         if looks_like_placeholder:
             raise ValueError(
                 "backend.model must be replaced with a real Foundry model deployment name"
@@ -204,17 +205,24 @@ def _validate_subprocess_requirements(self) -> "BackendConfig":
         elif self.type == "foundry":
             target = (self.target or "agent").strip().lower()
             if target not in {"agent", "model"}:
-                raise ValueError("backend.target must be 'agent' or 'model' for foundry")
+                raise ValueError(
+                    "backend.target must be 'agent' or 'model' for foundry"
+                )
 
             self.target = target
             if target == "agent":
                 if not self.agent_id or not self.agent_id.strip():
-                    raise ValueError("backend.agent_id is required for foundry target=agent")
+                    raise ValueError(
+                        "backend.agent_id is required for foundry target=agent"
+                    )
             # target=model does not require agent_id
 
             if self.max_poll_attempts is not None and self.max_poll_attempts <= 0:
                 raise ValueError("backend.max_poll_attempts must be > 0")
-            if self.poll_interval_seconds is not None and self.poll_interval_seconds <= 0:
+            if (
+                self.poll_interval_seconds is not None
+                and self.poll_interval_seconds <= 0
+            ):
                 raise ValueError("backend.poll_interval_seconds must be > 0")
         else:
             raise ValueError(f"Unsupported backend type: {self.type}")
@@ -364,3 +372,90 @@ class RunResult(BaseModel):
     thresholds: List[ThresholdEvaluationResult] = Field(default_factory=list)
     summary: Summary
     artifacts: Optional[Artifacts] = None
+
+
+# ---------------------------------------------------------------------------
+# Comparison models
+# ---------------------------------------------------------------------------
+
+Direction = Literal["improved", "regressed", "unchanged"]
+
+
+class RunReference(BaseModel):
+    run_id: str
+    bundle_name: str
+    dataset_name: str
+    started_at: str
+    backend: Optional[str] = None
+    target: Optional[str] = None
+    model: Optional[str] = None
+    agent_id: Optional[str] = None
+    project_endpoint: Optional[str] = None
+    overall_passed: Optional[bool] = None
+
+
+class ComparisonMetricRow(BaseModel):
+    """One metric across all compared runs."""
+
+    name: str
+    values: List[float] = Field(default_factory=list)
+    deltas: List[Optional[float]] = Field(default_factory=list)
+    delta_percents: List[Optional[float]] = Field(default_factory=list)
+    directions: List[Direction] = Field(default_factory=list)
+    best_run_index: Optional[int] = None
+
+
+class ComparisonThresholdRow(BaseModel):
+    """One threshold across all compared runs."""
+
+    evaluator: str
+    criteria: Criteria
+    target: Optional[str] = None
+    passed: List[bool] = Field(default_factory=list)
+
+
+class ComparisonItemRow(BaseModel):
+    """One dataset item across all compared runs."""
+
+    row_index: int
+    passed_all: List[bool] = Field(default_factory=list)
+    scores: Dict[str, List[Optional[float]]] = Field(default_factory=dict)
+
+
+ComparisonType = Literal[
+    "agent",  # Same dataset, different agent/agent version
+    "model",  # Same dataset, different model
+    "dataset",  # Same agent/model, different datasets
+    "general",  # Multiple things differ
+]
+
+
+class ComparisonConditions(BaseModel):
+    """What's fixed vs varying across compared runs."""
+
+    comparison_type: ComparisonType
+    fixed: Dict[str, str] = Field(default_factory=dict)
+    varying: List[str] = Field(default_factory=list)
+    row_level_valid: bool = True
+
+
+class ComparisonSummary(BaseModel):
+    run_count: int
+    any_regressions: bool
+    runs_with_regressions: List[int] = Field(default_factory=list)
+
+
+class ComparisonResult(BaseModel):
+    """Unified comparison of 2 or more evaluation runs.
+
+    The first entry in ``runs`` is always the baseline.
+    """
+
+    version: int = 1
+    runs: List[RunReference] = Field(default_factory=list)
+    baseline_index: int = 0
+    conditions: Optional[ComparisonConditions] = None
+    metric_rows: List[ComparisonMetricRow] = Field(default_factory=list)
+    threshold_rows: List[ComparisonThresholdRow] = Field(default_factory=list)
+    item_rows: List[ComparisonItemRow] = Field(default_factory=list)
+    summary: ComparisonSummary
diff --git a/src/agentops/core/reporter.py b/src/agentops/core/reporter.py
index 3f0c52f..8208075 100644
--- a/src/agentops/core/reporter.py
+++ b/src/agentops/core/reporter.py
@@ -1,90 +1,716 @@
-"""Markdown report generation for AgentOps."""
-from __future__ import annotations
-
-from agentops.core.models import RunResult
-
-
-def generate_report_markdown(result: RunResult) -> str:
-    overall_status = "PASS" if result.summary.overall_passed else "FAIL"
-
-    lines: list[str] = []
-    lines.append("# AgentOps Evaluation Report")
-    lines.append("")
-    lines.append("## Overview")
-    lines.append("")
-    lines.append(f"- Bundle: {result.bundle.name}")
-    lines.append(f"- Dataset: {result.dataset.name}")
-    lines.append(f"- Overall status: **{overall_status}**")
-    lines.append("")
-    lines.append("## Execution Summary")
-    lines.append("")
-    lines.append("| Field | Value |")
-    lines.append("|---|---|")
-    lines.append(f"| Backend | {result.execution.backend} |")
-    lines.append(f"| Duration (s) | {result.execution.duration_seconds:.3f} |")
-    lines.append(f"| Started at | {result.execution.started_at} |")
-    lines.append(f"| Finished at | {result.execution.finished_at} |")
-    lines.append(f"| Exit code | {result.execution.exit_code} |")
-    lines.append("")
-    lines.append("## Metrics")
-
-    if result.metrics:
-        lines.append("")
-        lines.append("| Metric | Value |")
-        lines.append("|---|---:|")
-        for metric in result.metrics:
-            lines.append(f"| {metric.name} | {metric.value:.6f} |")
-    else:
-        lines.append("- No metrics found")
-
-    lines.append("")
-    lines.append("## Run Metrics")
-    if result.run_metrics:
-        lines.append("")
-        lines.append("| Metric | Value |")
-        lines.append("|---|---:|")
-        for metric in result.run_metrics:
-            lines.append(f"| {metric.name} | {metric.value:.6f} |")
-    else:
-        lines.append("- No run metrics derived")
-
-    lines.append("")
-    lines.append("## Item Verdicts")
-    if result.item_evaluations:
-        passed_items = sum(1 for item in result.item_evaluations if item.passed_all)
-        lines.append(f"- Items passed all thresholds: {passed_items}/{len(result.item_evaluations)}")
-        lines.append("")
-        lines.append("| Row | Passed All | Passed Rules | Total Rules |")
-        lines.append("|---:|---|---:|---:|")
-        for item in result.item_evaluations:
-            passed_rules = sum(1 for threshold in item.thresholds if threshold.passed)
-            lines.append(
-                f"| {item.row_index} | {'PASS' if item.passed_all else 'FAIL'} | {passed_rules} | {len(item.thresholds)} |"
-            )
-    else:
-        lines.append("- No item-level evaluations found")
-
-    lines.append("")
-    lines.append("## Threshold Checks")
-    if result.thresholds:
-        lines.append("")
-        lines.append("| Evaluator | Criteria | Expected | Actual | Status |")
-        lines.append("|---|---|---:|---:|---|")
-        for threshold in result.thresholds:
-            mark = "PASS" if threshold.passed else "FAIL"
-            lines.append(f"| {threshold.evaluator} | {threshold.criteria} | {threshold.expected} | {threshold.actual} | {mark} |")
-    else:
-        lines.append("- No thresholds configured")
-
-    lines.append("")
-    lines.append("## Artifacts")
-    if result.artifacts is not None:
-        if result.artifacts.backend_stdout is not None:
-            lines.append(f"- backend_stdout: {result.artifacts.backend_stdout}")
-        if result.artifacts.backend_stderr is not None:
-            lines.append(f"- backend_stderr: {result.artifacts.backend_stderr}")
-        if result.artifacts.foundry_eval_studio_url is not None:
-            lines.append(f"- foundry_eval_studio_url: {result.artifacts.foundry_eval_studio_url}")
-        if result.artifacts.foundry_eval_name is not None:
-            lines.append(f"- foundry_eval_name: {result.artifacts.foundry_eval_name}")
-    return "\n".join(lines).rstrip() + "\n"
+"""Report generation for AgentOps (Markdown and HTML)."""
+
+from __future__ import annotations
+
+from agentops.core.models import ComparisonResult, RunResult
+
+
+def generate_report_markdown(result: RunResult) -> str:
+    overall_status = "PASS" if result.summary.overall_passed else "FAIL"
+
+    lines: list[str] = []
+    lines.append("# AgentOps Evaluation Report")
+    lines.append("")
+    lines.append("## Overview")
+    lines.append("")
+    lines.append(f"- Bundle: {result.bundle.name}")
+    lines.append(f"- Dataset: {result.dataset.name}")
+    lines.append(f"- Overall status: **{overall_status}**")
+    lines.append("")
+    lines.append("## Execution Summary")
+    lines.append("")
+    lines.append("| Field | Value |")
+    lines.append("|---|---|")
+    lines.append(f"| Backend | {result.execution.backend} |")
+    lines.append(f"| Duration (s) | {result.execution.duration_seconds:.3f} |")
+    lines.append(f"| Started at | {result.execution.started_at} |")
+    lines.append(f"| Finished at | {result.execution.finished_at} |")
+    lines.append(f"| Exit code | {result.execution.exit_code} |")
+    lines.append("")
+    lines.append("## Metrics")
+
+    if result.metrics:
+        lines.append("")
+        lines.append("| Metric | Value |")
+        lines.append("|---|---:|")
+        for metric in result.metrics:
+            lines.append(f"| {metric.name} | {_fmt(metric.value)} |")
+    else:
+        lines.append("- No metrics found")
+
+    lines.append("")
+    lines.append("## Run Metrics")
+    if result.run_metrics:
+        lines.append("")
+        lines.append("| Metric | Value |")
+        lines.append("|---|---:|")
+        for metric in result.run_metrics:
+            lines.append(f"| {metric.name} | {_fmt(metric.value)} |")
+    else:
+        lines.append("- No run metrics derived")
+
+    lines.append("")
+    lines.append("## Item Verdicts")
+    if result.item_evaluations:
+        passed_items = sum(1 for item in result.item_evaluations if item.passed_all)
+        lines.append(
+            f"- Items passed all thresholds: {passed_items}/{len(result.item_evaluations)}"
+        )
+        lines.append("")
+        lines.append("| Row | Passed All | Passed Rules | Total Rules |")
+        lines.append("|---:|---|---:|---:|")
+        for item in result.item_evaluations:
+            passed_rules = sum(1 for threshold in item.thresholds if threshold.passed)
+            lines.append(
+                f"| {item.row_index} | {'PASS' if item.passed_all else 'FAIL'} | {passed_rules} | {len(item.thresholds)} |"
+            )
+    else:
+        lines.append("- No item-level evaluations found")
+
+    lines.append("")
+    lines.append("## Threshold Checks")
+    if result.thresholds:
+        lines.append("")
+        lines.append("| Evaluator | Criteria | Expected | Actual | Status |")
+        lines.append("|---|---|---:|---:|---|")
+        for threshold in result.thresholds:
+            mark = _threshold_label(threshold.passed)
+            lines.append(
+                f"| {threshold.evaluator} | {threshold.criteria} | {threshold.expected} | {threshold.actual} | {mark} |"
+            )
+    else:
+        lines.append("- No thresholds configured")
+
+    lines.append("")
+    lines.append("## Artifacts")
+    if result.artifacts is not None:
+        if result.artifacts.backend_stdout is not None:
+            lines.append(f"- backend_stdout: {result.artifacts.backend_stdout}")
+        if result.artifacts.backend_stderr is not None:
+            lines.append(f"- backend_stderr: {result.artifacts.backend_stderr}")
+        if result.artifacts.foundry_eval_studio_url is not None:
+            lines.append(
+                f"- foundry_eval_studio_url: {result.artifacts.foundry_eval_studio_url}"
+            )
+        if result.artifacts.foundry_eval_name is not None:
+            lines.append(f"- foundry_eval_name: {result.artifacts.foundry_eval_name}")
+    return "\n".join(lines).rstrip() + "\n"
+
+
+# ---------------------------------------------------------------------------
+# Shared formatting helpers
+# ---------------------------------------------------------------------------
+
+
+def _fmt(value: float) -> str:
+    """Smart number formatting: integers show without decimals, floats show 2 dp."""
+    if value == int(value) and abs(value) < 1e15:
+        return str(int(value))
+    return f"{value:.2f}"
+
+
+def _fmt_delta(value: float) -> str:
+    """Smart delta formatting with sign prefix."""
+    if value == int(value) and abs(value) < 1e15:
+        return f"{int(value):+d}"
+    return f"{value:+.2f}"
+
+
+def _threshold_label(passed: bool) -> str:
+    return "Met" if passed else "Missed"
+
+
+def _check_threshold(value: float, criteria: str, target: str | None) -> bool:
+    """Evaluate whether a metric value meets a threshold criteria+target."""
+    if target is None:
+        return True
+    try:
+        t = float(target)
+    except (ValueError, TypeError):
+        return True
+    if criteria == ">=":
+        return value >= t
+    if criteria == ">":
+        return value > t
+    if criteria == "<=":
+        return value <= t
+    if criteria == "<":
+        return value < t
+    if criteria == "==":
+        return value == t
+    return True
+
+
+def _fmt_target(criteria: str, target: str | None) -> str:
+    """Format threshold target as 'criteria value' (e.g., '>= 3')."""
+    if target is None:
+        return criteria
+    try:
+        val = float(target)
+        return f"{criteria} {_fmt(val)}"
+    except (ValueError, TypeError):
+        return f"{criteria} {target}"
+
+
+# ---------------------------------------------------------------------------
+# Shared HTML helpers
+# ---------------------------------------------------------------------------
+
+_CSS = """\
+:root {
+  --bg: #ffffff; --surface: #f6f8fa; --border: #d1d9e0;
+  --text: #1f2328; --muted: #656d76; --accent: #0969da;
+  --green: #1a7f37; --red: #cf222e; --yellow: #9a6700;
+  --green-bg: #dafbe1; --red-bg: #ffebe9; --yellow-bg: #fff8c5;
+  --font: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
+}
+*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+body { font-family: var(--font); background: var(--bg); color: var(--text); line-height: 1.6; padding: 2rem 2.5rem; max-width: 1020px; margin: 0 auto; }
+h1 { font-size: 1.6rem; font-weight: 600; margin-bottom: .4rem; }
+h2 { font-size: 1.1rem; font-weight: 600; color: var(--text); margin: 2rem 0 .6rem; padding-bottom: .35rem; border-bottom: 1px solid var(--border); }
+.badge { display: inline-block; padding: .15rem .6rem; border-radius: .25rem; font-weight: 600; font-size: .8rem; }
+.badge-pass { background: var(--green-bg); color: var(--green); }
+.badge-fail { background: var(--red-bg); color: var(--red); }
+.badge-improved { background: var(--green-bg); color: var(--green); }
+.badge-regressed { background: var(--red-bg); color: var(--red); }
+.badge-unchanged { background: var(--yellow-bg); color: var(--yellow); }
+.meta { color: var(--muted); font-size: .85rem; margin-bottom: 1.2rem; display: flex; flex-wrap: wrap; gap: .3rem 1.5rem; }
+.meta span { white-space: nowrap; }
+table { width: 100%; border-collapse: collapse; margin: .4rem 0 1rem; font-size: .875rem; }
+th, td { padding: .5rem .7rem; text-align: left; border-bottom: 1px solid var(--border); }
+th { background: var(--surface); font-size: .75rem; font-weight: 600; text-transform: uppercase; letter-spacing: .03em; color: var(--muted); }
+td { vertical-align: top; }
+tr:hover td { background: #f0f4f8; }
+.num { text-align: right; font-variant-numeric: tabular-nums; }
+.card-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: .6rem; margin: .4rem 0 1.2rem; }
+.card { background: var(--surface); border: 1px solid var(--border); border-radius: .5rem; padding: .8rem 1rem; }
+.card .label { font-size: .7rem; font-weight: 600; text-transform: uppercase; color: var(--muted); letter-spacing: .03em; }
+.card .value { font-size: 1.3rem; font-weight: 700; margin-top: .15rem; }
+footer { margin-top: 2.5rem; padding-top: .8rem; border-top: 1px solid var(--border); color: var(--muted); font-size: .75rem; text-align: center; }
+"""
+
+
+def _html_escape(text: str) -> str:
+    return (
+        text.replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+    )
+
+
+def _badge(label: str, kind: str) -> str:
+    return f'<span class="badge badge-{kind}">{_html_escape(label)}</span>'
+
+
+def _status_badge(passed: bool) -> str:
+    return _badge("PASS", "pass") if passed else _badge("FAIL", "fail")
+
+
+def _threshold_badge(passed: bool) -> str:
+    return _badge("Met", "pass") if passed else _badge("Missed", "fail")
+
+
+def _direction_badge(direction: str) -> str:
+    return _badge(direction, direction)
+
+
+def _wrap_page(title: str, body: str) -> str:
+    return (
+        "<!DOCTYPE html>\n"
+        '<html lang="en">\n<head>\n'
+        '<meta charset="utf-8">\n'
+        '<meta name="viewport" content="width=device-width, initial-scale=1">\n'
+        f"<title>{_html_escape(title)}</title>\n"
+        f"<style>{_CSS}</style>\n"
+        "</head>\n<body>\n"
+        f"{body}\n"
+        "<footer>Generated by AgentOps</footer>\n"
+        "</body>\n</html>\n"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Evaluation run HTML report
+# ---------------------------------------------------------------------------
+
+
+def generate_report_html(result: RunResult) -> str:
+    overall = result.summary.overall_passed
+    parts: list[str] = []
+
+    parts.append(f"<h1>AgentOps Evaluation Report {_status_badge(overall)}</h1>")
+    parts.append(
+        '<div class="meta">'
+        f"<span><strong>Bundle:</strong> {_html_escape(result.bundle.name)}</span>"
+        f"<span><strong>Dataset:</strong> {_html_escape(result.dataset.name)}</span>"
+        f"<span><strong>Backend:</strong> {_html_escape(result.execution.backend)}</span>"
+        "</div>"
+    )
+
+    parts.append("<h2>Execution</h2>")
+    parts.append('<div class="card-grid">')
+    for label, val in [
+        ("Duration", f"{result.execution.duration_seconds:.1f}s"),
+        ("Started", result.execution.started_at[:19]),
+        ("Exit code", str(result.execution.exit_code)),
+    ]:
+        parts.append(
+            f'<div class="card"><div class="label">{label}</div><div class="value">{_html_escape(val)}</div></div>'
+        )
+    parts.append("</div>")
+
+    if result.metrics:
+        parts.append("<h2>Metrics</h2>")
+        parts.append(
+            '<table><thead><tr><th>Metric</th><th class="num">Value</th></tr></thead><tbody>'
+        )
+        for m in result.metrics:
+            parts.append(
+                f'<tr><td>{_html_escape(m.name)}</td><td class="num">{_fmt(m.value)}</td></tr>'
+            )
+        parts.append("</tbody></table>")
+
+    if result.run_metrics:
+        parts.append("<h2>Run Metrics</h2>")
+        parts.append(
+            '<table><thead><tr><th>Metric</th><th class="num">Value</th></tr></thead><tbody>'
+        )
+        for m in result.run_metrics:
+            parts.append(
+                f'<tr><td>{_html_escape(m.name)}</td><td class="num">{_fmt(m.value)}</td></tr>'
+            )
+        parts.append("</tbody></table>")
+
+    if result.thresholds:
+        parts.append("<h2>Threshold Checks</h2>")
+        parts.append(
+            '<table><thead><tr><th>Evaluator</th><th>Criteria</th><th class="num">Expected</th><th class="num">Actual</th><th>Status</th></tr></thead><tbody>'
+        )
+        for t in result.thresholds:
+            parts.append(
+                f"<tr><td>{_html_escape(t.evaluator)}</td><td>{_html_escape(t.criteria)}</td>"
+                f'<td class="num">{_html_escape(t.expected)}</td><td class="num">{_html_escape(t.actual)}</td>'
+                f"<td>{_threshold_badge(t.passed)}</td></tr>"
+            )
+        parts.append("</tbody></table>")
+
+    if result.item_evaluations:
+        passed_count = sum(1 for i in result.item_evaluations if i.passed_all)
+        total = len(result.item_evaluations)
+        parts.append(f"<h2>Item Verdicts ({passed_count}/{total} passed)</h2>")
+        parts.append(
+            '<table><thead><tr><th class="num">Row</th><th>Status</th><th class="num">Passed Rules</th><th class="num">Total Rules</th></tr></thead><tbody>'
+        )
+        for item in result.item_evaluations:
+            pr = sum(1 for th in item.thresholds if th.passed)
+            parts.append(
+                f'<tr><td class="num">{item.row_index}</td><td>{_status_badge(item.passed_all)}</td>'
+                f'<td class="num">{pr}</td><td class="num">{len(item.thresholds)}</td></tr>'
+            )
+        parts.append("</tbody></table>")
+
+    if result.artifacts:
+        urls = []
+        if result.artifacts.foundry_eval_studio_url:
+            urls.append(
+                f'<a href="{_html_escape(result.artifacts.foundry_eval_studio_url)}" style="color:var(--accent)">View in Foundry</a>'
+            )
+        if urls:
+            parts.append("<h2>Artifacts</h2>")
+            parts.append("<p>" + " &middot; ".join(urls) + "</p>")
+
+    return _wrap_page("AgentOps Evaluation Report", "\n".join(parts))
+
+
+# ---------------------------------------------------------------------------
+# Comparison Markdown report (N runs)
+# ---------------------------------------------------------------------------
+
+
+def generate_comparison_markdown(result: ComparisonResult) -> str:
+    verdict = (
+        "REGRESSIONS DETECTED" if result.summary.any_regressions else "NO REGRESSIONS"
+    )
+    run_labels = [r.run_id for r in result.runs]
+
+    lines: list[str] = []
+    lines.append("# AgentOps Comparison Report")
+    lines.append("")
+    lines.append("## Overview")
+    lines.append("")
+    lines.append(f"- Runs compared: **{result.summary.run_count}**")
+    lines.append(f"- Verdict: **{verdict}**")
+    lines.append("")
+
+    # Conditions
+    cond = result.conditions
+    if cond:
+        type_labels = {
+            "agent": "Agent Comparison",
+            "model": "Model Comparison",
+            "dataset": "Dataset Coverage",
+            "general": "General Comparison",
+        }
+        lines.append("## Conditions")
+        lines.append("")
+        lines.append(
+            f"- Comparison type: **{type_labels.get(cond.comparison_type, cond.comparison_type)}**"
+        )
+        if cond.fixed:
+            fixed_items = ", ".join(f"{k}={v}" for k, v in cond.fixed.items())
+            lines.append(f"- Fixed: {fixed_items}")
+        if cond.varying:
+            lines.append(f"- Varying: {', '.join(cond.varying)}")
+        if not cond.row_level_valid:
+            lines.append(
+                "- Note: Row-level comparison is not meaningful because datasets differ across runs."
+            )
+        lines.append("")
+
+    # Run details table — only show varying fields + always-show fields
+    varying_set = set(cond.varying) if cond else set()
+    detail_fields = [
+        ("Backend", "backend", lambda r: r.backend or "-"),
+        ("Target", None, lambda r: r.target or "-"),
+        ("Model", None, lambda r: r.model or "-"),
+        ("Agent", None, lambda r: r.agent_id or "-"),
+        ("Project", "project", lambda r: r.project_endpoint or "-"),
+        ("Dataset", "dataset", lambda r: r.dataset_name),
+        ("Bundle", "bundle", lambda r: r.bundle_name),
+        (
+            "Status",
+            None,
+            lambda r: (
+                "PASS"
+                if r.overall_passed
+                else "FAIL"
+                if r.overall_passed is not None
+                else "-"
+            ),
+        ),
+        ("Started", None, lambda r: r.started_at[:19] if r.started_at else "-"),
+    ]
+    # Keep fields that are varying or always-show (condition_key is None)
+    visible_fields = [
+        (label, ckey, getter)
+        for label, ckey, getter in detail_fields
+        if ckey is None or ckey in varying_set
+    ]
+
+    lines.append("## Run Details")
+    lines.append("")
+    lines.append("| | " + " | ".join(run_labels) + " |")
+    lines.append("|---|" + "|".join("---" for _ in run_labels) + "|")
+    lines.append(
+        "| Role | Baseline | "
+        + " | ".join(f"Run {i}" for i in range(1, len(result.runs)))
+        + " |"
+    )
+    for field, _ckey, getter in visible_fields:
+        cells = [getter(r) for r in result.runs]
+        lines.append(f"| {field} | " + " | ".join(cells) + " |")
+    lines.append("")
+    lines.append(
+        "*Status is PASS when all thresholds are met, FAIL when any threshold is missed.*"
+    )
+    lines.append("")
+
+    # Unified Evaluators table (metrics + thresholds merged)
+    if result.metric_rows:
+        threshold_map = {tr.evaluator: tr for tr in result.threshold_rows}
+        lines.append("## Evaluators")
+        lines.append("")
+        header = "| Evaluator | Target | " + " | ".join(run_labels) + " | Best |"
+        sep = "|---|---|" + "|".join("---:" for _ in run_labels) + "|---|"
+        lines.append(header)
+        lines.append(sep)
+        for mr in result.metric_rows:
+            tr = threshold_map.get(mr.name)
+            target = _fmt_target(tr.criteria, tr.target) if tr else "-"
+            cells = []
+            for i, v in enumerate(mr.values):
+                if not tr:
+                    # Informational metric — plain value, no delta/direction/best
+                    cells.append(_fmt(v))
+                    continue
+                parts_cell = [_fmt(v)]
+                # Direction vs baseline (skip for baseline itself)
+                if i > 0:
+                    d = mr.deltas[i]
+                    direction = mr.directions[i]
+                    if d is not None:
+                        parts_cell.append(f"({_fmt_delta(d)}, {direction})")
+                # Threshold check vs absolute target
+                met = _check_threshold(v, tr.criteria, tr.target)
+                parts_cell.append(_threshold_label(met))
+                cells.append(" ".join(parts_cell))
+            best = (
+                run_labels[mr.best_run_index]
+                if (mr.best_run_index is not None and tr)
+                else "-"
+            )
+            lines.append(
+                f"| {mr.name} | {target} | " + " | ".join(cells) + f" | {best} |"
+            )
+        lines.append("")
+
+    show_items = result.conditions.row_level_valid if result.conditions else True
+    if result.item_rows and show_items:
+        threshold_map = {tr.evaluator: tr for tr in result.threshold_rows}
+        lines.append("## Item Verdicts")
+        lines.append("")
+        header = "| Row | " + " | ".join(run_labels) + " |"
+        sep = "|---:|" + "|".join("---" for _ in run_labels) + "|"
+        lines.append(header)
+        lines.append(sep)
+        for ir in result.item_rows:
+            cells = []
+            for run_idx, passed in enumerate(ir.passed_all):
+                parts_cell = []
+                # Show per-evaluator scores for this row
+                for eval_name, scores_list in ir.scores.items():
+                    score = scores_list[run_idx] if run_idx < len(scores_list) else None
+                    if score is not None:
+                        tr = threshold_map.get(eval_name)
+                        if tr:
+                            met = _check_threshold(score, tr.criteria, tr.target)
+                            parts_cell.append(
+                                f"{eval_name}: {_fmt(score)} {_threshold_label(met)}"
+                            )
+                        else:
+                            parts_cell.append(f"{eval_name}: {_fmt(score)}")
+                if not parts_cell:
+                    parts_cell.append("PASS" if passed else "FAIL")
+                cells.append("; ".join(parts_cell))
+            lines.append(f"| {ir.row_index} | " + " | ".join(cells) + " |")
+    elif result.item_rows and not show_items:
+        lines.append("## Item Verdicts")
+        lines.append("")
+        lines.append(
+            "*Skipped — datasets differ across runs so row-level comparison is not meaningful.*"
+        )
+
+    return "\n".join(lines).rstrip() + "\n"
+
+
+# ---------------------------------------------------------------------------
+# Comparison HTML report (N runs)
+# ---------------------------------------------------------------------------
+
+
+def generate_comparison_html(result: ComparisonResult) -> str:
+    has_reg = result.summary.any_regressions
+    verdict_badge = (
+        _badge("REGRESSIONS DETECTED", "regressed")
+        if has_reg
+        else _badge("NO REGRESSIONS", "improved")
+    )
+    run_labels = [r.run_id for r in result.runs]
+    cond = result.conditions
+    type_labels = {
+        "agent": "Agent Comparison",
+        "model": "Model Comparison",
+        "dataset": "Dataset Coverage",
+        "general": "General Comparison",
+    }
+    threshold_map = {tr.evaluator: tr for tr in result.threshold_rows}
+
+    # Pre-compute per-run row pass counts (across all threshold evaluators)
+    run_row_pass: list[tuple[int, int]] = []  # (passed, total) per run
+    for run_idx in range(len(result.runs)):
+        total = len(result.item_rows)
+        passed = sum(1 for ir in result.item_rows if ir.passed_all[run_idx])
+        run_row_pass.append((passed, total))
+
+    # Pre-compute per-evaluator row pass rates
+    eval_row_rates: dict[str, list[tuple[int, int]]] = {}
+    for tr in result.threshold_rows:
+        rates = []
+        for run_idx in range(len(result.runs)):
+            total = 0
+            passed = 0
+            for ir in result.item_rows:
+                scores_list = ir.scores.get(tr.evaluator, [])
+                score = scores_list[run_idx] if run_idx < len(scores_list) else None
+                if score is not None:
+                    total += 1
+                    if _check_threshold(score, tr.criteria, tr.target):
+                        passed += 1
+            rates.append((passed, total))
+        eval_row_rates[tr.evaluator] = rates
+
+    parts: list[str] = []
+
+    # --- Header ---
+    parts.append(f"<h1>AgentOps Comparison Report {verdict_badge}</h1>")
+    ctype = type_labels.get(cond.comparison_type, "") if cond else ""
+    varying_str = ", ".join(cond.varying) if cond and cond.varying else ""
+    parts.append(
+        f'<div class="meta"><span>{ctype}</span><span>Varying: <strong>{_html_escape(varying_str)}</strong></span><span>{result.summary.run_count} runs</span></div>'
+    )
+
+    # --- Run Config ---
+    varying_set = set(cond.varying) if cond else set()
+    detail_fields = [
+        ("Role", None, lambda r: ""),
+        ("Target", None, lambda r: r.target or "-"),
+        ("Model", None, lambda r: r.model or "-"),
+        ("Agent", None, lambda r: r.agent_id or "-"),
+        ("Dataset", "dataset", lambda r: r.dataset_name),
+        ("Status", None, lambda r: ""),
+    ]
+    visible_fields = [
+        (lbl, c, g) for lbl, c, g in detail_fields if c is None or c in varying_set
+    ]
+
+    cols = "".join(f"<th>{_html_escape(lbl)}</th>" for lbl in run_labels)
+    parts.append(f"<table><thead><tr><th></th>{cols}</tr></thead><tbody>")
+    for field, _ckey, getter in visible_fields:
+        cells = ""
+        for i, r in enumerate(result.runs):
+            if field == "Role":
+                cells += (
+                    f"<td>{'<strong>Baseline</strong>' if i == 0 else f'Run {i}'}</td>"
+                )
+            elif field == "Status":
+                p, t = run_row_pass[i]
+                pct = int(p / t * 100) if t > 0 else 0
+                if r.overall_passed:
+                    cells += f"<td>{_status_badge(True)} <small>({pct}% · {p}/{t})</small></td>"
+                else:
+                    cells += f"<td>{_status_badge(False)} <small>({pct}% · {p}/{t})</small></td>"
+            else:
+                cells += f"<td>{_html_escape(getter(r))}</td>"
+        parts.append(f"<tr><td><strong>{field}</strong></td>{cells}</tr>")
+    parts.append("</tbody></table>")
+
+    # --- Evaluators ---
+    if result.metric_rows:
+        parts.append("<h2>Evaluators</h2>")
+        cols = "".join(
+            f'<th class="num">{_html_escape(lbl)}</th>' for lbl in run_labels
+        )
+        parts.append(
+            f"<table><thead><tr><th>Evaluator</th><th>Target</th>{cols}</tr></thead><tbody>"
+        )
+        for mr in result.metric_rows:
+            tr = threshold_map.get(mr.name)
+            target = _fmt_target(tr.criteria, tr.target) if tr else "-"
+            cells = ""
+            for i, v in enumerate(mr.values):
+                if not tr:
+                    # Informational metric — plain value only
+                    cells += (
+                        f'<td class="num" style="color:var(--muted)">{_fmt(v)}</td>'
+                    )
+                    continue
+                is_best = mr.best_run_index == i
+                # Dot indicator
+                met = _check_threshold(v, tr.criteria, tr.target)
+                dot = (
+                    '<span style="color:var(--green)">●</span> '
+                    if met
+                    else '<span style="color:var(--red)">●</span> '
+                )
+                # Value
+                val_str = _fmt(v)
+                # Delta + arrow
+                delta_str = ""
+                if i > 0:
+                    d = mr.deltas[i]
+                    direction = mr.directions[i]
+                    if d is not None:
+                        arrow = (
+                            "↑"
+                            if direction == "improved"
+                            else ("↓" if direction == "regressed" else "→")
+                        )
+                        color = (
+                            "var(--green)"
+                            if direction == "improved"
+                            else (
+                                "var(--red)"
+                                if direction == "regressed"
+                                else "var(--muted)"
+                            )
+                        )
+                        delta_str = f' <small style="color:{color}">{arrow} {_fmt_delta(d)}</small>'
+                # Row pass rate
+                rate_str = ""
+                if mr.name in eval_row_rates:
+                    p, t = eval_row_rates[mr.name][i]
+                    if t > 0:
+                        rate_str = (
+                            f' <small style="color:var(--muted)">({p}/{t})</small>'
+                        )
+                # Best highlight
+                best_style = (
+                    "background:var(--green-bg);font-weight:600;border-radius:.25rem;padding:.1rem .3rem;"
+                    if is_best
+                    else ""
+                )
+                inner = f"{dot}{val_str}{delta_str}{rate_str}"
+                if is_best:
+                    cells += f'<td class="num"><span style="{best_style}">{inner}</span></td>'
+                else:
+                    cells += f'<td class="num">{inner}</td>'
+            parts.append(
+                f"<tr><td>{_html_escape(mr.name)}</td><td>{_html_escape(target)}</td>{cells}</tr>"
+            )
+        parts.append("</tbody></table>")
+
+    # --- Row Details ---
+    show_items = result.conditions.row_level_valid if result.conditions else True
+    if result.item_rows and show_items:
+        parts.append("<h2>Row Details</h2>")
+        cols = "".join(f"<th>{_html_escape(lbl)}</th>" for lbl in run_labels)
+        parts.append(
+            f'<table><thead><tr><th class="num">Row</th>{cols}</tr></thead><tbody>'
+        )
+        for ir in result.item_rows:
+            cells = ""
+            for run_idx in range(len(result.runs)):
+                parts_cell = []
+                for eval_name, scores_list in ir.scores.items():
+                    score = scores_list[run_idx] if run_idx < len(scores_list) else None
+                    if score is not None:
+                        tr = threshold_map.get(eval_name)
+                        if tr:
+                            met = _check_threshold(score, tr.criteria, tr.target)
+                            dot = (
+                                '<span style="color:var(--green)">●</span>'
+                                if met
+                                else '<span style="color:var(--red)">●</span>'
+                            )
+                            parts_cell.append(
+                                f"{dot} {_html_escape(eval_name)}: {_fmt(score)}"
+                            )
+                        else:
+                            parts_cell.append(
+                                f"{_html_escape(eval_name)}: {_fmt(score)}"
+                            )
+                if not parts_cell:
+                    parts_cell.append(_status_badge(ir.passed_all[run_idx]))
+                cells += f"<td>{'<br>'.join(parts_cell)}</td>"
+            parts.append(f'<tr><td class="num">{ir.row_index}</td>{cells}</tr>')
+        parts.append("</tbody></table>")
+    elif result.item_rows and not show_items:
+        parts.append("<h2>Row Details</h2>")
+        parts.append(
+            '<p style="color:var(--yellow);font-size:.85rem">Skipped — datasets differ across runs, row-level comparison not meaningful.</p>'
+        )
+
+    # --- Fixed Parameters ---
+    if cond and cond.fixed:
+        parts.append("<h2>Fixed Parameters</h2>")
+        parts.append(
+            "<table><thead><tr><th>Parameter</th><th>Value</th></tr></thead><tbody>"
+        )
+        for k, v in cond.fixed.items():
+            parts.append(
+                f"<tr><td>{_html_escape(k)}</td><td>{_html_escape(v)}</td></tr>"
+            )
+        parts.append("</tbody></table>")
+
+    return _wrap_page("AgentOps Comparison Report", "\n".join(parts))
diff --git a/src/agentops/core/thresholds.py b/src/agentops/core/thresholds.py
index 6870a54..ba43f86 100644
--- a/src/agentops/core/thresholds.py
+++ b/src/agentops/core/thresholds.py
@@ -1,4 +1,5 @@
 """Threshold evaluation logic for AgentOps."""
+
 from __future__ import annotations
 
 from typing import Dict, List
@@ -14,7 +15,9 @@ def evaluate_thresholds(
 
     for rule in threshold_rules:
         if rule.evaluator not in metrics_by_name:
-            raise ValueError(f"Missing evaluator score required by threshold: {rule.evaluator}")
+            raise ValueError(
+                f"Missing evaluator score required by threshold: {rule.evaluator}"
+            )
 
         actual_value = metrics_by_name[rule.evaluator]
 
@@ -41,7 +44,9 @@ def evaluate_thresholds(
             continue
 
         if rule.value is None:
-            raise ValueError(f"Threshold for evaluator '{rule.evaluator}' requires a numeric value")
+            raise ValueError(
+                f"Threshold for evaluator '{rule.evaluator}' requires a numeric value"
+            )
 
         target_value = float(rule.value)
 
diff --git a/src/agentops/services/cicd.py b/src/agentops/services/cicd.py
index eee93c8..8ab05d6 100644
--- a/src/agentops/services/cicd.py
+++ b/src/agentops/services/cicd.py
@@ -1,4 +1,5 @@
 """CI/CD workflow generation service for `agentops config cicd`."""
+
 from __future__ import annotations
 
 from dataclasses import dataclass, field
diff --git a/src/agentops/services/comparison.py b/src/agentops/services/comparison.py
new file mode 100644
index 0000000..388fecf
--- /dev/null
+++ b/src/agentops/services/comparison.py
@@ -0,0 +1,382 @@
+"""Comparison service for evaluating baseline vs current run results."""
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from agentops.core.models import (
+    ComparisonConditions,
+    ComparisonItemRow,
+    ComparisonMetricRow,
+    ComparisonResult,
+    ComparisonSummary,
+    ComparisonThresholdRow,
+    RunReference,
+    RunResult,
+)
+
+
+@dataclass(frozen=True)
+class ComparisonServiceResult:
+    comparison_json_path: Path
+    comparison_md_path: Path | None
+    comparison_html_path: Path | None
+    has_regressions: bool
+
+
+def _resolve_run_path(run_id: str, workspace_dir: Path | None = None) -> Path:
+    """Resolve a run identifier to a results.json path.
+
+    Supports:
+    - Absolute or relative path to a results.json file
+    - Absolute or relative path to a run directory containing results.json
+    - Timestamped run ID (e.g. '2026-03-03_143022') resolved under workspace results
+    - The keyword 'latest'
+    """
+    candidate = Path(run_id)
+
+    if candidate.is_absolute():
+        if candidate.is_file():
+            return candidate
+        results_in_dir = candidate / "results.json"
+        if results_in_dir.is_file():
+            return results_in_dir
+        raise FileNotFoundError(f"Cannot find results.json at: {candidate}")
+
+    if candidate.is_file():
+        return candidate.resolve()
+    if candidate.is_dir():
+        results_in_dir = candidate / "results.json"
+        if results_in_dir.is_file():
+            return results_in_dir.resolve()
+
+    results_base = workspace_dir or (Path.cwd() / ".agentops")
+    results_dir = results_base / "results" if results_base.name != "results" else results_base
+    run_dir = results_dir / run_id
+    results_file = run_dir / "results.json"
+    if results_file.is_file():
+        return results_file.resolve()
+
+    raise FileNotFoundError(
+        f"Cannot resolve run '{run_id}' to a results.json file. "
+        f"Searched: {results_file}"
+    )
+
+
+def _load_run_result(path: Path) -> RunResult:
+    payload = json.loads(path.read_text(encoding="utf-8"))
+    return RunResult.model_validate(payload)
+
+
+def _parse_command_field(command: str) -> dict[str, str]:
+    """Extract key=value pairs from the execution command string."""
+    parts = command.split()
+    result: dict[str, str] = {}
+    for part in parts:
+        if "=" in part:
+            key, _, value = part.partition("=")
+            result[key] = value
+    return result
+
+
+def _run_reference(result: RunResult, run_id: str) -> RunReference:
+    cmd = _parse_command_field(result.execution.command)
+    # Infer target from command fields
+    target = cmd.get("target")
+    if not target:
+        if cmd.get("agent_id"):
+            target = "agent"
+        elif cmd.get("model"):
+            target = "model"
+    return RunReference(
+        run_id=run_id,
+        bundle_name=result.bundle.name,
+        dataset_name=result.dataset.name,
+        started_at=result.execution.started_at,
+        backend=result.execution.backend,
+        target=target,
+        model=cmd.get("model"),
+        agent_id=cmd.get("agent_id"),
+        project_endpoint=cmd.get("project_endpoint"),
+        overall_passed=result.summary.overall_passed,
+    )
+
+
+def _lower_is_better_metrics(*results: RunResult) -> frozenset[str]:
+    """Derive which metrics are lower-is-better from threshold criteria.
+
+    If a threshold uses ``<=`` or ``<``, the metric is lower-is-better.
+    """
+    names: set[str] = set()
+    for r in results:
+        for t in r.thresholds:
+            if t.criteria in {"<=", "<"}:
+                names.add(t.evaluator)
+    return frozenset(names)
+
+
+def _compute_metric_direction(delta: float, lower_is_better: bool) -> str:
+    if delta == 0:
+        return "unchanged"
+    if lower_is_better:
+        return "improved" if delta < 0 else "regressed"
+    return "improved" if delta > 0 else "regressed"
+
+
+def _detect_conditions(refs: List[RunReference]) -> ComparisonConditions:
+    """Detect what's fixed vs varying across runs to determine comparison type."""
+    dimensions = {
+        "dataset": [r.dataset_name for r in refs],
+        "agent": [r.agent_id or "-" for r in refs],
+        "model": [r.model or "-" for r in refs],
+        "backend": [r.backend or "-" for r in refs],
+        "target": [r.target or "-" for r in refs],
+        "bundle": [r.bundle_name for r in refs],
+        "project": [r.project_endpoint or "-" for r in refs],
+    }
+
+    fixed: Dict[str, str] = {}
+    varying: List[str] = []
+    # Fields always shown in Run Details — exclude from fixed list
+    always_shown = {"target", "model", "agent"}
+    for key, values in dimensions.items():
+        unique = set(values)
+        if len(unique) == 1:
+            if key not in always_shown:
+                fixed[key] = values[0]
+        else:
+            varying.append(key)
+
+    # Determine comparison type
+    if "dataset" not in varying and "agent" in varying:
+        ctype = "agent"
+    elif "dataset" not in varying and "model" in varying:
+        ctype = "model"
+    elif "dataset" in varying and "agent" not in varying and "model" not in varying:
+        ctype = "dataset"
+    else:
+        ctype = "general"
+
+    # Row-level comparison is only valid when all runs use the same dataset
+    row_level_valid = "dataset" not in varying
+
+    return ComparisonConditions(
+        comparison_type=ctype,
+        fixed=fixed,
+        varying=varying,
+        row_level_valid=row_level_valid,
+    )
+
+
+def compare_runs(
+    run_paths: List[Path],
+    run_ids: List[str],
+) -> ComparisonResult:
+    """Compare N evaluation runs. The first run is the baseline."""
+    results = [_load_run_result(p) for p in run_paths]
+    refs = [_run_reference(r, rid) for r, rid in zip(results, run_ids)]
+
+    lib_metrics = _lower_is_better_metrics(*results)
+
+    # Collect all metric names preserving order
+    all_metric_names: List[str] = []
+    seen_names: set[str] = set()
+    for r in results:
+        for m in r.metrics:
+            if m.name not in seen_names:
+                all_metric_names.append(m.name)
+                seen_names.add(m.name)
+
+    # Build metric rows
+    metric_rows: List[ComparisonMetricRow] = []
+    for name in all_metric_names:
+        values: List[float] = []
+        deltas: List[Optional[float]] = []
+        delta_percents: List[Optional[float]] = []
+        directions: List[str] = []
+        baseline_val: Optional[float] = None
+
+        for i, r in enumerate(results):
+            val_map = {m.name: m.value for m in r.metrics}
+            val = val_map.get(name)
+            if val is None:
+                values.append(0.0)
+                deltas.append(None)
+                delta_percents.append(None)
+                directions.append("unchanged")
+                continue
+
+            values.append(val)
+            if i == 0:
+                baseline_val = val
+                deltas.append(None)
+                delta_percents.append(None)
+                directions.append("unchanged")
+            else:
+                if baseline_val is not None:
+                    d = val - baseline_val
+                    dp = (d / abs(baseline_val) * 100) if baseline_val != 0 else None
+                    deltas.append(d)
+                    delta_percents.append(dp)
+                    directions.append(_compute_metric_direction(d, name in lib_metrics))
+                else:
+                    deltas.append(None)
+                    delta_percents.append(None)
+                    directions.append("unchanged")
+
+        # Best run: for lower-is-better pick min, otherwise pick max
+        valid_vals = [
+            (i, v) for i, v in enumerate(values)
+            if any(m.name == name for m in results[i].metrics)
+        ]
+        best_idx: Optional[int] = None
+        if valid_vals:
+            if name in lib_metrics:
+                best_idx = min(valid_vals, key=lambda x: x[1])[0]
+            else:
+                best_idx = max(valid_vals, key=lambda x: x[1])[0]
+
+        metric_rows.append(ComparisonMetricRow(
+            name=name,
+            values=values,
+            deltas=deltas,
+            delta_percents=delta_percents,
+            directions=directions,
+            best_run_index=best_idx,
+        ))
+
+    # Build threshold rows
+    all_thresholds: List[tuple[str, str]] = []
+    seen_thresholds: set[tuple[str, str]] = set()
+    for r in results:
+        for t in r.thresholds:
+            key = (t.evaluator, t.criteria)
+            if key not in seen_thresholds:
+                all_thresholds.append(key)
+                seen_thresholds.add(key)
+
+    threshold_rows: List[ComparisonThresholdRow] = []
+    for evaluator, criteria in all_thresholds:
+        passed_list: List[bool] = []
+        target_val: str | None = None
+        for r in results:
+            t_map = {(t.evaluator, t.criteria): t for t in r.thresholds}
+            t = t_map.get((evaluator, criteria))
+            passed_list.append(t.passed if t else False)
+            if t and target_val is None:
+                target_val = t.expected
+        threshold_rows.append(ComparisonThresholdRow(
+            evaluator=evaluator,
+            criteria=criteria,
+            target=target_val,
+            passed=passed_list,
+        ))
+
+    # Build item rows
+    all_row_indices: set[int] = set()
+    for r in results:
+        for item in r.item_evaluations:
+            all_row_indices.add(item.row_index)
+
+    # Collect evaluator names that have thresholds (for row-level display)
+    threshold_evaluator_names = [tr.evaluator for tr in threshold_rows]
+
+    item_rows: List[ComparisonItemRow] = []
+    for idx in sorted(all_row_indices):
+        passed_list = []
+        # Per-evaluator scores for this row across all runs
+        scores: Dict[str, List[Optional[float]]] = {name: [] for name in threshold_evaluator_names}
+        for r in results:
+            item_map = {item.row_index: item for item in r.item_evaluations}
+            item = item_map.get(idx)
+            passed_list.append(item.passed_all if item else False)
+            # Extract row-level metric scores
+            row_metrics_map = {row.row_index: row for row in r.row_metrics}
+            row_m = row_metrics_map.get(idx)
+            for name in threshold_evaluator_names:
+                if row_m:
+                    val_map = {m.name: m.value for m in row_m.metrics}
+                    scores[name].append(val_map.get(name))
+                else:
+                    scores[name].append(None)
+        item_rows.append(ComparisonItemRow(row_index=idx, passed_all=passed_list, scores=scores))
+
+    # Summary: regression = a run whose status flipped from PASS to FAIL,
+    # or a threshold that was met by baseline but missed by this run.
+    # Minor numeric shifts within passing thresholds are NOT regressions.
+    runs_with_regressions: List[int] = []
+    for i in range(1, len(results)):
+        has_reg = False
+        # Check if overall run status flipped PASS→FAIL
+        if results[0].summary.overall_passed and not results[i].summary.overall_passed:
+            has_reg = True
+        # Check if any row flipped from passing to failing
+        if not has_reg:
+            for ir in item_rows:
+                if ir.passed_all[0] and not ir.passed_all[i]:
+                    has_reg = True
+                    break
+        if has_reg:
+            runs_with_regressions.append(i)
+
+    summary = ComparisonSummary(
+        run_count=len(results),
+        any_regressions=len(runs_with_regressions) > 0,
+        runs_with_regressions=runs_with_regressions,
+    )
+
+    return ComparisonResult(
+        version=1,
+        runs=refs,
+        baseline_index=0,
+        conditions=_detect_conditions(refs),
+        metric_rows=metric_rows,
+        threshold_rows=threshold_rows,
+        item_rows=item_rows,
+        summary=summary,
+    )
+
+
+def run_comparison(
+    run_ids: List[str],
+    output_dir: Path | None = None,
+    report_format: str = "md",
+) -> ComparisonServiceResult:
+    """Resolve run IDs, compare, and write comparison outputs."""
+    from agentops.core.reporter import generate_comparison_html, generate_comparison_markdown
+
+    paths = [_resolve_run_path(rid) for rid in run_ids]
+    result = compare_runs(run_paths=paths, run_ids=run_ids)
+
+    resolved_output = output_dir.resolve() if output_dir else paths[-1].parent
+    resolved_output.mkdir(parents=True, exist_ok=True)
+
+    comparison_json_path = resolved_output / "comparison.json"
+    comparison_md_path: Path | None = None
+    comparison_html_path: Path | None = None
+
+    comparison_json_path.write_text(
+        json.dumps(result.model_dump(mode="json"), indent=2),
+        encoding="utf-8",
+    )
+    if report_format in ("md", "all"):
+        comparison_md_path = resolved_output / "comparison.md"
+        comparison_md_path.write_text(
+            generate_comparison_markdown(result),
+            encoding="utf-8",
+        )
+    if report_format in ("html", "all"):
+        comparison_html_path = resolved_output / "comparison.html"
+        comparison_html_path.write_text(
+            generate_comparison_html(result),
+            encoding="utf-8",
+        )
+
+    return ComparisonServiceResult(
+        comparison_json_path=comparison_json_path,
+        comparison_md_path=comparison_md_path,
+        comparison_html_path=comparison_html_path,
+        has_regressions=result.summary.any_regressions,
+    )
diff --git a/src/agentops/services/initializer.py b/src/agentops/services/initializer.py
index ef1e416..11c024b 100644
--- a/src/agentops/services/initializer.py
+++ b/src/agentops/services/initializer.py
@@ -1,4 +1,5 @@
 """Workspace initialization service for `agentops init`."""
+
 from __future__ import annotations
 
 from dataclasses import dataclass, field
diff --git a/src/agentops/services/reporting.py b/src/agentops/services/reporting.py
index 307d4ce..e3995e3 100644
--- a/src/agentops/services/reporting.py
+++ b/src/agentops/services/reporting.py
@@ -1,4 +1,5 @@
 """Report orchestration service."""
+
 from __future__ import annotations
 
 import json
@@ -6,16 +7,19 @@
 from pathlib import Path
 
 from agentops.core.models import RunResult
-from agentops.core.reporter import generate_report_markdown
+from agentops.core.reporter import generate_report_html, generate_report_markdown
 
 
 @dataclass(frozen=True)
 class ReportResult:
     input_results_path: Path
     output_report_path: Path
+    html_report_path: Path | None = None
 
 
-def generate_report_from_results(results_path: Path, output_path: Path | None = None) -> ReportResult:
+def generate_report_from_results(
+    results_path: Path, output_path: Path | None = None, report_format: str = "md"
+) -> ReportResult:
     resolved_results_path = results_path.resolve()
     if not resolved_results_path.exists():
         raise FileNotFoundError(f"results.json not found: {resolved_results_path}")
@@ -23,11 +27,34 @@ def generate_report_from_results(results_path: Path, output_path: Path | None =
     payload = json.loads(resolved_results_path.read_text(encoding="utf-8"))
     result = RunResult.model_validate(payload)
 
-    resolved_output_path = output_path.resolve() if output_path is not None else resolved_results_path.with_name("report.md")
+    default_suffix = ".html" if report_format == "html" else ".md"
+    resolved_output_path = (
+        output_path.resolve()
+        if output_path is not None
+        else resolved_results_path.with_name(f"report{default_suffix}")
+    )
     resolved_output_path.parent.mkdir(parents=True, exist_ok=True)
-    resolved_output_path.write_text(generate_report_markdown(result), encoding="utf-8")
+
+    primary_path = resolved_output_path
+    html_report_path: Path | None = None
+    if report_format in ("md", "all"):
+        md_path = (
+            resolved_output_path
+            if resolved_output_path.suffix == ".md"
+            else resolved_output_path.with_suffix(".md")
+        )
+        md_path.write_text(generate_report_markdown(result), encoding="utf-8")
+        primary_path = md_path
+    if report_format in ("html", "all"):
+        html_path = resolved_output_path.with_suffix(".html")
+        html_path.write_text(generate_report_html(result), encoding="utf-8")
+        primary_path = html_path
+        html_report_path = html_path
+    if report_format == "all":
+        primary_path = resolved_output_path.with_suffix(".md")
 
     return ReportResult(
         input_results_path=resolved_results_path,
-        output_report_path=resolved_output_path,
+        output_report_path=primary_path,
+        html_report_path=html_report_path,
     )
diff --git a/src/agentops/services/runner.py b/src/agentops/services/runner.py
index e74e5dd..37731ae 100644
--- a/src/agentops/services/runner.py
+++ b/src/agentops/services/runner.py
@@ -28,7 +28,7 @@
     ThresholdEvaluationResult,
     ThresholdRule,
 )
-from agentops.core.reporter import generate_report_markdown
+from agentops.core.reporter import generate_report_html, generate_report_markdown
 from agentops.services.foundry_evals import publish_foundry_evaluation
 
 
@@ -367,7 +367,7 @@ def _append_run_metric(name: str, value: float) -> None:
 
 
 def run_evaluation(
-    config_path: Path | None = None, output_override: Path | None = None
+    config_path: Path | None = None, output_override: Path | None = None, report_format: str = "md",
 ) -> EvalRunServiceResult:
     run_config_path = (
         config_path.resolve() if config_path is not None else _default_run_config_path()
@@ -498,15 +498,26 @@ def run_evaluation(
     )
 
     results_path = output_dir / "results.json"
-    report_path = output_dir / "report.md"
+    report_path: Path
 
     results_path.write_text(
         json.dumps(normalized_result.model_dump(mode="json"), indent=2),
         encoding="utf-8",
     )
-    report_path.write_text(
-        generate_report_markdown(normalized_result), encoding="utf-8"
-    )
+    if report_format in ("md", "all"):
+        md_path = output_dir / "report.md"
+        md_path.write_text(
+            generate_report_markdown(normalized_result), encoding="utf-8"
+        )
+        report_path = md_path
+    if report_format in ("html", "all"):
+        html_path = output_dir / "report.html"
+        html_path.write_text(
+            generate_report_html(normalized_result), encoding="utf-8"
+        )
+        report_path = html_path
+    if report_format == "all":
+        report_path = md_path
 
     latest_dir = _latest_output_dir(run_config_path)
     _sync_latest_output(output_dir, latest_dir)
diff --git a/src/agentops/templates/bundles/agent_tools_baseline.yaml b/src/agentops/templates/bundles/agent_tools_baseline.yaml
index e4e0856..f85ea99 100644
--- a/src/agentops/templates/bundles/agent_tools_baseline.yaml
+++ b/src/agentops/templates/bundles/agent_tools_baseline.yaml
@@ -1,18 +1,24 @@
 version: 1
 name: agent_tools_baseline
 description: >
-  Placeholder evaluation bundle for Agent with Tools scenarios.
-  This bundle will be expanded in a future release to include tool-call accuracy
-  and task-completion evaluators. For now it validates basic response quality.
+  Evaluation bundle for Agent with Tools scenarios.
+  Measures task completion quality and tool call accuracy using
+  AI-assisted evaluators from the Foundry evaluation suite.
 evaluators:
-  - name: SimilarityEvaluator
+  - name: TaskCompletionEvaluator
+    source: foundry
+    enabled: true
+  - name: ToolCallAccuracyEvaluator
     source: foundry
     enabled: true
   - name: avg_latency_seconds
     source: local
     enabled: true
 thresholds:
-  - evaluator: SimilarityEvaluator
+  - evaluator: TaskCompletionEvaluator
+    criteria: ">="
+    value: 3
+  - evaluator: ToolCallAccuracyEvaluator
     criteria: ">="
     value: 3
   - evaluator: avg_latency_seconds
@@ -25,4 +31,5 @@ metadata:
     - baseline
     - agent
     - tools
-    - placeholder
+    - task-completion
+    - tool-call-accuracy
diff --git a/src/agentops/templates/data/smoke-agent-tools.jsonl b/src/agentops/templates/data/smoke-agent-tools.jsonl
index a8a4c43..17d9a5f 100644
--- a/src/agentops/templates/data/smoke-agent-tools.jsonl
+++ b/src/agentops/templates/data/smoke-agent-tools.jsonl
@@ -1,5 +1,5 @@
-{"id":"1","input":"What is the weather in Seattle today?","expected":"I'll check the weather for Seattle. The current temperature is 55°F with partly cloudy skies."}
-{"id":"2","input":"Convert 100 USD to EUR","expected":"100 USD is approximately 92 EUR at the current exchange rate."}
-{"id":"3","input":"Search for the latest news about AI regulation","expected":"Here are the latest news articles about AI regulation from trusted sources."}
-{"id":"4","input":"Calculate the compound interest on $10000 at 5% for 3 years","expected":"The compound interest on $10,000 at 5% annual rate for 3 years is $1,576.25, for a total of $11,576.25."}
-{"id":"5","input":"Book a flight from New York to London for next Monday","expected":"I found several flights from New York to London for next Monday. Here are the best options."}
\ No newline at end of file
+{"id":"1","input":"What is the weather in Seattle today?","expected":"I'll check the weather for Seattle. The current temperature is 55°F with partly cloudy skies.","tool_definitions":[{"name":"get_weather","description":"Get current weather for a city","parameters":{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}}],"tool_calls":[{"name":"get_weather","arguments":{"city":"Seattle"}}]}
+{"id":"2","input":"Convert 100 USD to EUR","expected":"100 USD is approximately 92 EUR at the current exchange rate.","tool_definitions":[{"name":"convert_currency","description":"Convert an amount from one currency to another","parameters":{"type":"object","properties":{"amount":{"type":"number"},"from_currency":{"type":"string"},"to_currency":{"type":"string"}},"required":["amount","from_currency","to_currency"]}}],"tool_calls":[{"name":"convert_currency","arguments":{"amount":100,"from_currency":"USD","to_currency":"EUR"}}]}
+{"id":"3","input":"Search for the latest news about AI regulation","expected":"Here are the latest news articles about AI regulation from trusted sources.","tool_definitions":[{"name":"search_news","description":"Search for recent news articles","parameters":{"type":"object","properties":{"query":{"type":"string"},"max_results":{"type":"integer"}},"required":["query"]}}],"tool_calls":[{"name":"search_news","arguments":{"query":"AI regulation","max_results":5}}]}
+{"id":"4","input":"Calculate the compound interest on $10000 at 5% for 3 years","expected":"The compound interest on $10,000 at 5% annual rate for 3 years is $1,576.25, for a total of $11,576.25.","tool_definitions":[{"name":"calculate_compound_interest","description":"Calculate compound interest","parameters":{"type":"object","properties":{"principal":{"type":"number"},"rate":{"type":"number"},"years":{"type":"integer"}},"required":["principal","rate","years"]}}],"tool_calls":[{"name":"calculate_compound_interest","arguments":{"principal":10000,"rate":0.05,"years":3}}]}
+{"id":"5","input":"Book a flight from New York to London for next Monday","expected":"I found several flights from New York to London for next Monday. Here are the best options.","tool_definitions":[{"name":"search_flights","description":"Search for available flights","parameters":{"type":"object","properties":{"origin":{"type":"string"},"destination":{"type":"string"},"date":{"type":"string"}},"required":["origin","destination","date"]}}],"tool_calls":[{"name":"search_flights","arguments":{"origin":"New York","destination":"London","date":"next Monday"}}]}
\ No newline at end of file
diff --git a/src/agentops/templates/datasets/smoke-rag.yaml b/src/agentops/templates/datasets/smoke-rag.yaml
index 5e20c9a..2aab88e 100644
--- a/src/agentops/templates/datasets/smoke-rag.yaml
+++ b/src/agentops/templates/datasets/smoke-rag.yaml
@@ -8,6 +8,7 @@ format:
   type: jsonl
   input_field: input
   expected_field: expected
+  context_field: context
 metadata:
   scenario: rag_retrieval
   size_hint: 5
diff --git a/src/agentops/utils/logging.py b/src/agentops/utils/logging.py
index 5c4e7a0..52bde03 100644
--- a/src/agentops/utils/logging.py
+++ b/src/agentops/utils/logging.py
@@ -3,6 +3,7 @@
 No side effects at import time — call setup_logging() explicitly from the
 CLI callback before any command runs.
 """
+
 from __future__ import annotations
 
 import logging
@@ -34,7 +35,9 @@ def setup_logging(verbose: bool = False) -> None:
         logging.getLogger("azure.identity").setLevel(logging.WARNING)
         logging.getLogger("azure.core").setLevel(logging.WARNING)
         logging.getLogger("azure.core.pipeline").setLevel(logging.WARNING)
-        logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.WARNING)
+        logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
+            logging.WARNING
+        )
         logging.getLogger("azure.ai.evaluation").setLevel(logging.WARNING)
         logging.getLogger("httpx").setLevel(logging.WARNING)
         logging.getLogger("openai").setLevel(logging.WARNING)
diff --git a/src/agentops/utils/yaml.py b/src/agentops/utils/yaml.py
index febdde4..2f964d9 100644
--- a/src/agentops/utils/yaml.py
+++ b/src/agentops/utils/yaml.py
@@ -1,4 +1,5 @@
 """YAML load/save helpers using ruamel.yaml."""
+
 from __future__ import annotations
 
 from pathlib import Path
diff --git a/tests/integration/test_eval_run_integration.py b/tests/integration/test_eval_run_integration.py
index cb31ef9..b42a9c9 100644
--- a/tests/integration/test_eval_run_integration.py
+++ b/tests/integration/test_eval_run_integration.py
@@ -39,7 +39,11 @@ def _write_project_files(tmp_path: Path, *, fail_thresholds: bool) -> Path:
                 {"name": "fluency", "source": "local", "enabled": True},
             ],
             "thresholds": [
-                {"evaluator": "groundedness", "criteria": ">=", "value": threshold_value},
+                {
+                    "evaluator": "groundedness",
+                    "criteria": ">=",
+                    "value": threshold_value,
+                },
                 {"evaluator": "relevance", "criteria": ">=", "value": threshold_value},
                 {"evaluator": "coherence", "criteria": ">=", "value": threshold_value},
                 {"evaluator": "fluency", "criteria": ">=", "value": threshold_value},
@@ -55,19 +59,23 @@ def _write_project_files(tmp_path: Path, *, fail_thresholds: bool) -> Path:
             "name": "smoke",
             "description": "Integration dataset",
             "source": {"type": "file", "path": "../data/smoke.jsonl"},
-            "format": {"type": "jsonl", "input_field": "input", "expected_field": "expected"},
+            "format": {
+                "type": "jsonl",
+                "input_field": "input",
+                "expected_field": "expected",
+            },
             "metadata": {"owner": "tests"},
         },
     )
 
     (data_dir / "smoke.jsonl").write_text(
-        '\n'.join(
+        "\n".join(
             [
                 '{"id":"1","input":"hello","expected":"hello"}',
                 '{"id":"2","input":"world","expected":"world"}',
             ]
         )
-        + '\n',
+        + "\n",
         encoding="utf-8",
     )
 
@@ -166,7 +174,9 @@ def test_eval_run_integration_threshold_fail(tmp_path: Path, monkeypatch) -> Non
     assert run_metrics["items_pass_rate"] == 0.0
 
 
-def test_eval_run_integration_uses_default_run_yaml_and_updates_latest(tmp_path: Path, monkeypatch) -> None:
+def test_eval_run_integration_uses_default_run_yaml_and_updates_latest(
+    tmp_path: Path, monkeypatch
+) -> None:
     _write_project_files(tmp_path, fail_thresholds=False)
 
     monkeypatch.chdir(tmp_path)
@@ -182,7 +192,9 @@ def test_eval_run_integration_uses_default_run_yaml_and_updates_latest(tmp_path:
     assert (latest_dir / "report.md").is_file()
 
     timestamp_dirs = [
-        path for path in results_root.iterdir() if path.is_dir() and path.name != "latest"
+        path
+        for path in results_root.iterdir()
+        if path.is_dir() and path.name != "latest"
     ]
     assert len(timestamp_dirs) == 1
     assert (timestamp_dirs[0] / "results.json").is_file()
diff --git a/tests/unit/test_cli_commands.py b/tests/unit/test_cli_commands.py
index 2ef8b87..4676f84 100644
--- a/tests/unit/test_cli_commands.py
+++ b/tests/unit/test_cli_commands.py
@@ -20,11 +20,11 @@ def test_init_help_exposes_path_alias() -> None:
     assert "--path" in _strip_ansi(result.stdout)
 
 
-def test_eval_compare_is_planned_stub() -> None:
-    result = runner.invoke(app, ["eval", "compare", "--runs", "r1,r2"])
+def test_eval_compare_rejects_wrong_run_count() -> None:
+    result = runner.invoke(app, ["eval", "compare", "--runs", "only_one"])
 
     assert result.exit_code == 1
-    assert "planned but not implemented" in result.stdout.lower()
+    assert "at least two" in result.stdout.lower() or "at least two" in (result.stderr or "").lower()
 
 
 def test_trace_init_is_planned_stub() -> None:
diff --git a/tests/unit/test_comparison.py b/tests/unit/test_comparison.py
new file mode 100644
index 0000000..2258829
--- /dev/null
+++ b/tests/unit/test_comparison.py
@@ -0,0 +1,486 @@
+"""Unit tests for the unified comparison service and models."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+from agentops.core.models import (
+    ComparisonItemRow,
+    ComparisonMetricRow,
+    ComparisonResult,
+    ComparisonSummary,
+    ComparisonThresholdRow,
+    RunReference,
+    RunResult,
+)
+from agentops.core.reporter import generate_comparison_markdown
+from agentops.services.comparison import (
+    _compute_metric_direction,
+    _resolve_run_path,
+    compare_runs,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _sample_result(
+    *,
+    groundedness: float = 0.84,
+    relevance: float = 0.83,
+    overall_passed: bool = True,
+    row1_groundedness: float = 0.90,
+    row2_groundedness: float = 0.78,
+) -> RunResult:
+    return RunResult.model_validate(
+        {
+            "version": 1,
+            "status": "completed",
+            "bundle": {
+                "name": "rag_baseline",
+                "path": ".agentops/bundles/rag_baseline.yaml",
+            },
+            "dataset": {"name": "smoke", "path": ".agentops/datasets/smoke.yaml"},
+            "execution": {
+                "backend": "subprocess",
+                "command": "python -m fake_eval_runner",
+                "started_at": "2026-03-01T10:00:00Z",
+                "finished_at": "2026-03-01T10:00:05Z",
+                "duration_seconds": 5.0,
+                "exit_code": 0,
+            },
+            "metrics": [
+                {"name": "groundedness", "value": groundedness},
+                {"name": "relevance", "value": relevance},
+            ],
+            "row_metrics": [
+                {
+                    "row_index": 1,
+                    "metrics": [{"name": "groundedness", "value": row1_groundedness}],
+                },
+                {
+                    "row_index": 2,
+                    "metrics": [{"name": "groundedness", "value": row2_groundedness}],
+                },
+            ],
+            "item_evaluations": [
+                {
+                    "row_index": 1,
+                    "passed_all": True,
+                    "thresholds": [
+                        {
+                            "row_index": 1,
+                            "evaluator": "groundedness",
+                            "criteria": ">=",
+                            "expected": "0.800000",
+                            "actual": str(row1_groundedness),
+                            "passed": row1_groundedness >= 0.8,
+                        },
+                    ],
+                },
+                {
+                    "row_index": 2,
+                    "passed_all": overall_passed,
+                    "thresholds": [
+                        {
+                            "row_index": 2,
+                            "evaluator": "groundedness",
+                            "criteria": ">=",
+                            "expected": "0.800000",
+                            "actual": str(row2_groundedness),
+                            "passed": row2_groundedness >= 0.8,
+                        },
+                    ],
+                },
+            ],
+            "thresholds": [
+                {
+                    "evaluator": "groundedness",
+                    "criteria": ">=",
+                    "expected": "0.800000",
+                    "actual": f"{groundedness:.6f}",
+                    "passed": groundedness >= 0.8,
+                },
+                {
+                    "evaluator": "relevance",
+                    "criteria": ">=",
+                    "expected": "0.800000",
+                    "actual": f"{relevance:.6f}",
+                    "passed": relevance >= 0.8,
+                },
+            ],
+            "summary": {
+                "metrics_count": 2,
+                "thresholds_count": 2,
+                "thresholds_passed": 2 if overall_passed else 1,
+                "thresholds_failed": 0 if overall_passed else 1,
+                "overall_passed": overall_passed,
+            },
+        }
+    )
+
+
+def _sample_result_with_latency(
+    *, similarity: float = 5.0, latency: float = 5.0
+) -> RunResult:
+    return RunResult.model_validate(
+        {
+            "version": 1,
+            "status": "completed",
+            "bundle": {
+                "name": "model_direct",
+                "path": ".agentops/bundles/model_direct.yaml",
+            },
+            "dataset": {"name": "smoke", "path": ".agentops/datasets/smoke.yaml"},
+            "execution": {
+                "backend": "foundry",
+                "command": "foundry.cloud_evaluation",
+                "started_at": "2026-03-01T10:00:00Z",
+                "finished_at": "2026-03-01T10:00:05Z",
+                "duration_seconds": 5.0,
+                "exit_code": 0,
+            },
+            "metrics": [
+                {"name": "SimilarityEvaluator", "value": similarity},
+                {"name": "avg_latency_seconds", "value": latency},
+            ],
+            "row_metrics": [
+                {
+                    "row_index": 1,
+                    "metrics": [
+                        {"name": "SimilarityEvaluator", "value": similarity},
+                        {"name": "avg_latency_seconds", "value": latency},
+                    ],
+                },
+            ],
+            "item_evaluations": [
+                {
+                    "row_index": 1,
+                    "passed_all": True,
+                    "thresholds": [
+                        {
+                            "row_index": 1,
+                            "evaluator": "SimilarityEvaluator",
+                            "criteria": ">=",
+                            "expected": "3.000000",
+                            "actual": str(similarity),
+                            "passed": similarity >= 3,
+                        },
+                        {
+                            "row_index": 1,
+                            "evaluator": "avg_latency_seconds",
+                            "criteria": "<=",
+                            "expected": "10.000000",
+                            "actual": str(latency),
+                            "passed": latency <= 10,
+                        },
+                    ],
+                },
+            ],
+            "thresholds": [
+                {
+                    "evaluator": "SimilarityEvaluator",
+                    "criteria": ">=",
+                    "expected": "3.000000",
+                    "actual": f"{similarity:.6f}",
+                    "passed": similarity >= 3,
+                },
+                {
+                    "evaluator": "avg_latency_seconds",
+                    "criteria": "<=",
+                    "expected": "10.000000",
+                    "actual": f"{latency:.6f}",
+                    "passed": latency <= 10,
+                },
+            ],
+            "summary": {
+                "metrics_count": 2,
+                "thresholds_count": 2,
+                "thresholds_passed": 2,
+                "thresholds_failed": 0,
+                "overall_passed": True,
+            },
+        }
+    )
+
+
+def _write_result(path: Path, result: RunResult) -> Path:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(result.model_dump(mode="json"), indent=2))
+    return path
+
+
+# ---------------------------------------------------------------------------
+# Model tests
+# ---------------------------------------------------------------------------
+
+
+class TestComparisonModels:
+    def test_comparison_result_roundtrip(self) -> None:
+        result = ComparisonResult(
+            version=1,
+            runs=[
+                RunReference(
+                    run_id="run1", bundle_name="b", dataset_name="d", started_at="t1"
+                ),
+                RunReference(
+                    run_id="run2", bundle_name="b", dataset_name="d", started_at="t2"
+                ),
+            ],
+            metric_rows=[],
+            threshold_rows=[],
+            item_rows=[],
+            summary=ComparisonSummary(
+                run_count=2, any_regressions=False, runs_with_regressions=[]
+            ),
+        )
+        payload = json.loads(result.model_dump_json())
+        restored = ComparisonResult.model_validate(payload)
+        assert restored.version == 1
+        assert restored.summary.any_regressions is False
+        assert len(restored.runs) == 2
+
+
+# ---------------------------------------------------------------------------
+# Direction helpers
+# ---------------------------------------------------------------------------
+
+
+class TestComputeMetricDirection:
+    def test_higher_is_better_positive_delta(self) -> None:
+        assert _compute_metric_direction(0.05, lower_is_better=False) == "improved"
+
+    def test_higher_is_better_negative_delta(self) -> None:
+        assert _compute_metric_direction(-0.05, lower_is_better=False) == "regressed"
+
+    def test_lower_is_better_negative_delta_is_improved(self) -> None:
+        assert _compute_metric_direction(-0.05, lower_is_better=True) == "improved"
+
+    def test_lower_is_better_positive_delta_is_regressed(self) -> None:
+        assert _compute_metric_direction(0.05, lower_is_better=True) == "regressed"
+
+    def test_zero_is_unchanged(self) -> None:
+        assert _compute_metric_direction(0.0, lower_is_better=False) == "unchanged"
+        assert _compute_metric_direction(0.0, lower_is_better=True) == "unchanged"
+
+
+# ---------------------------------------------------------------------------
+# compare_runs (2 runs)
+# ---------------------------------------------------------------------------
+
+
+class TestCompareRunsTwoRuns:
+    def test_regression_detected(self, tmp_path: Path) -> None:
+        baseline = _sample_result(
+            groundedness=0.90, relevance=0.90, overall_passed=True
+        )
+        current = _sample_result(
+            groundedness=0.70, relevance=0.95, overall_passed=False
+        )
+
+        bp = _write_result(tmp_path / "baseline" / "results.json", baseline)
+        cp = _write_result(tmp_path / "current" / "results.json", current)
+
+        result = compare_runs([bp, cp], ["baseline", "current"])
+
+        assert result.summary.any_regressions is True
+        assert len(result.summary.runs_with_regressions) >= 1
+        g_row = next(r for r in result.metric_rows if r.name == "groundedness")
+        assert g_row.directions[1] == "regressed"
+        r_row = next(r for r in result.metric_rows if r.name == "relevance")
+        assert r_row.directions[1] == "improved"
+
+    def test_no_regression(self, tmp_path: Path) -> None:
+        baseline = _sample_result(
+            groundedness=0.80, relevance=0.80, overall_passed=True
+        )
+        current = _sample_result(groundedness=0.90, relevance=0.90, overall_passed=True)
+
+        bp = _write_result(tmp_path / "baseline" / "results.json", baseline)
+        cp = _write_result(tmp_path / "current" / "results.json", current)
+
+        result = compare_runs([bp, cp], ["baseline", "current"])
+
+        assert result.summary.any_regressions is False
+
+    def test_lower_is_better_latency(self, tmp_path: Path) -> None:
+        baseline = _sample_result_with_latency(similarity=5.0, latency=6.0)
+        current = _sample_result_with_latency(similarity=5.0, latency=4.0)
+
+        bp = _write_result(tmp_path / "baseline" / "results.json", baseline)
+        cp = _write_result(tmp_path / "current" / "results.json", current)
+
+        result = compare_runs([bp, cp], ["baseline", "current"])
+
+        lat = next(r for r in result.metric_rows if r.name == "avg_latency_seconds")
+        assert lat.directions[1] == "improved"
+        assert lat.deltas[1] == pytest.approx(-2.0, abs=1e-6)
+
+
+# ---------------------------------------------------------------------------
+# compare_runs (3+ runs)
+# ---------------------------------------------------------------------------
+
+
+class TestCompareRunsMultiple:
+    def test_three_runs(self, tmp_path: Path) -> None:
+        r1 = _sample_result(groundedness=0.80, relevance=0.80, overall_passed=True)
+        r2 = _sample_result(groundedness=0.90, relevance=0.85, overall_passed=True)
+        r3 = _sample_result(groundedness=0.70, relevance=0.95, overall_passed=False)
+
+        p1 = _write_result(tmp_path / "run1" / "results.json", r1)
+        p2 = _write_result(tmp_path / "run2" / "results.json", r2)
+        p3 = _write_result(tmp_path / "run3" / "results.json", r3)
+
+        result = compare_runs([p1, p2, p3], ["run1", "run2", "run3"])
+
+        assert result.summary.run_count == 3
+        assert len(result.runs) == 3
+
+        # run2 improved groundedness, run3 regressed
+        g_row = next(r for r in result.metric_rows if r.name == "groundedness")
+        assert g_row.directions[1] == "improved"
+        assert g_row.directions[2] == "regressed"
+
+        # run3 should be in regressions list
+        assert result.summary.any_regressions is True
+        assert 2 in result.summary.runs_with_regressions
+        # run2 should not have regressions
+        assert 1 not in result.summary.runs_with_regressions
+
+    def test_best_run_index(self, tmp_path: Path) -> None:
+        r1 = _sample_result(groundedness=0.80, relevance=0.80)
+        r2 = _sample_result(groundedness=0.95, relevance=0.70)
+        r3 = _sample_result(groundedness=0.85, relevance=0.90)
+
+        p1 = _write_result(tmp_path / "run1" / "results.json", r1)
+        p2 = _write_result(tmp_path / "run2" / "results.json", r2)
+        p3 = _write_result(tmp_path / "run3" / "results.json", r3)
+
+        result = compare_runs([p1, p2, p3], ["run1", "run2", "run3"])
+
+        g_row = next(r for r in result.metric_rows if r.name == "groundedness")
+        assert g_row.best_run_index == 1  # run2 has 0.95
+
+        r_row = next(r for r in result.metric_rows if r.name == "relevance")
+        assert r_row.best_run_index == 2  # run3 has 0.90
+
+
+# ---------------------------------------------------------------------------
+# Run path resolution
+# ---------------------------------------------------------------------------
+
+
+class TestResolveRunPath:
+    def test_resolve_absolute_file(self, tmp_path: Path) -> None:
+        f = tmp_path / "results.json"
+        f.write_text("{}")
+        assert _resolve_run_path(str(f)) == f
+
+    def test_resolve_absolute_dir(self, tmp_path: Path) -> None:
+        d = tmp_path / "run1"
+        d.mkdir()
+        f = d / "results.json"
+        f.write_text("{}")
+        assert _resolve_run_path(str(d)) == f
+
+    def test_resolve_by_run_id(self, tmp_path: Path) -> None:
+        results_dir = tmp_path / "results" / "2026-03-01_100000"
+        results_dir.mkdir(parents=True)
+        f = results_dir / "results.json"
+        f.write_text("{}")
+        resolved = _resolve_run_path("2026-03-01_100000", workspace_dir=tmp_path)
+        assert resolved == f.resolve()
+
+    def test_resolve_missing_raises(self, tmp_path: Path) -> None:
+        with pytest.raises(FileNotFoundError):
+            _resolve_run_path("nonexistent_run", workspace_dir=tmp_path)
+
+
+# ---------------------------------------------------------------------------
+# Comparison report markdown
+# ---------------------------------------------------------------------------
+
+
+class TestComparisonReport:
+    def test_report_contains_required_sections(self) -> None:
+        result = ComparisonResult(
+            version=1,
+            runs=[
+                RunReference(
+                    run_id="run1",
+                    bundle_name="rag_baseline",
+                    dataset_name="smoke",
+                    started_at="t1",
+                ),
+                RunReference(
+                    run_id="run2",
+                    bundle_name="rag_baseline",
+                    dataset_name="smoke",
+                    started_at="t2",
+                ),
+            ],
+            metric_rows=[
+                ComparisonMetricRow(
+                    name="groundedness",
+                    values=[0.9, 0.7],
+                    deltas=[None, -0.2],
+                    delta_percents=[None, -22.22],
+                    directions=["unchanged", "regressed"],
+                    best_run_index=0,
+                ),
+            ],
+            threshold_rows=[
+                ComparisonThresholdRow(
+                    evaluator="groundedness", criteria=">=", passed=[True, False]
+                ),
+            ],
+            item_rows=[
+                ComparisonItemRow(row_index=1, passed_all=[True, False]),
+            ],
+            summary=ComparisonSummary(
+                run_count=2, any_regressions=True, runs_with_regressions=[1]
+            ),
+        )
+
+        md = generate_comparison_markdown(result)
+
+        assert "# AgentOps Comparison Report" in md
+        assert "REGRESSIONS DETECTED" in md
+        assert "groundedness" in md
+        assert "regressed" in md
+        assert "FAIL" in md
+
+    def test_report_no_regressions(self) -> None:
+        result = ComparisonResult(
+            version=1,
+            runs=[
+                RunReference(
+                    run_id="run1", bundle_name="b", dataset_name="d", started_at="t1"
+                ),
+                RunReference(
+                    run_id="run2", bundle_name="b", dataset_name="d", started_at="t2"
+                ),
+            ],
+            metric_rows=[
+                ComparisonMetricRow(
+                    name="g",
+                    values=[0.7, 0.9],
+                    deltas=[None, 0.2],
+                    directions=["unchanged", "improved"],
+                ),
+            ],
+            threshold_rows=[],
+            item_rows=[],
+            summary=ComparisonSummary(
+                run_count=2, any_regressions=False, runs_with_regressions=[]
+            ),
+        )
+
+        md = generate_comparison_markdown(result)
+        assert "NO REGRESSIONS" in md
diff --git a/tests/unit/test_foundry_backend.py b/tests/unit/test_foundry_backend.py
index e63f296..128a387 100644
--- a/tests/unit/test_foundry_backend.py
+++ b/tests/unit/test_foundry_backend.py
@@ -5,7 +5,12 @@
 from unittest.mock import patch
 
 from agentops.backends.base import BackendRunContext
-from agentops.backends.foundry_backend import FoundryBackend, FoundryEvaluatorRuntime
+from agentops.backends.foundry_backend import (
+    FoundryBackend,
+    FoundryEvaluatorRuntime,
+    _cloud_evaluator_data_mapping,
+    _default_foundry_input_mapping,
+)
 from agentops.core.models import BackendConfig
 from agentops.utils.yaml import save_yaml
 
@@ -43,7 +48,11 @@ def _dataset_yaml(tmp_path: Path) -> Path:
             "version": 1,
             "name": "smoke",
             "source": {"type": "file", "path": str(dataset_file)},
-            "format": {"type": "jsonl", "input_field": "input", "expected_field": "expected"},
+            "format": {
+                "type": "jsonl",
+                "input_field": "input",
+                "expected_field": "expected",
+            },
         },
     )
     return config_path
@@ -60,8 +69,17 @@ def _bundle_yaml(tmp_path: Path, *, similarity_source: str | None = None) -> Pat
     ]
 
     if similarity_source is not None:
-        evaluators.insert(0, {"name": "SimilarityEvaluator", "source": similarity_source, "enabled": True})
-        thresholds.insert(0, {"evaluator": "SimilarityEvaluator", "criteria": ">=", "value": 3})
+        evaluators.insert(
+            0,
+            {
+                "name": "SimilarityEvaluator",
+                "source": similarity_source,
+                "enabled": True,
+            },
+        )
+        thresholds.insert(
+            0, {"evaluator": "SimilarityEvaluator", "criteria": ">=", "value": 3}
+        )
 
     bundle_path = tmp_path / "bundle.yaml"
     save_yaml(
@@ -95,7 +113,10 @@ def test_foundry_backend_uses_default_azure_credential(tmp_path: Path) -> None:
     )
 
     # When _acquire_token raises, the error should propagate clearly
-    with patch("agentops.backends.foundry_backend._acquire_token", side_effect=RuntimeError("azure-identity not installed")):
+    with patch(
+        "agentops.backends.foundry_backend._acquire_token",
+        side_effect=RuntimeError("azure-identity not installed"),
+    ):
         try:
             FoundryBackend().execute(context)
             assert False, "expected RuntimeError"
@@ -127,17 +148,29 @@ def test_foundry_backend_agent_service_target(tmp_path: Path) -> None:
         _FakeHttpResponse({"id": "msg_1"}),
         _FakeHttpResponse({"id": "run_1"}),
         _FakeHttpResponse({"status": "completed"}),
-        _FakeHttpResponse({"data": [{"role": "assistant", "content": [{"text": {"value": "4"}}]}]}),
+        _FakeHttpResponse(
+            {"data": [{"role": "assistant", "content": [{"text": {"value": "4"}}]}]}
+        ),
         _FakeHttpResponse({"id": "thread_2"}),
         _FakeHttpResponse({"id": "msg_2"}),
         _FakeHttpResponse({"id": "run_2"}),
         _FakeHttpResponse({"status": "completed"}),
-        _FakeHttpResponse({"data": [{"role": "assistant", "content": [{"text": {"value": "8"}}]}]}),
+        _FakeHttpResponse(
+            {"data": [{"role": "assistant", "content": [{"text": {"value": "8"}}]}]}
+        ),
     ]
 
-    with patch("agentops.backends.foundry_backend._acquire_token", return_value="fake-agent-token"):
-        with patch("agentops.backends.foundry_backend.urllib.request.urlopen", side_effect=responses):
-            result = FoundryBackend().execute(context)
+    with (
+        patch(
+            "agentops.backends.foundry_backend._acquire_token",
+            return_value="fake-agent-token",
+        ),
+        patch(
+            "agentops.backends.foundry_backend.urllib.request.urlopen",
+            side_effect=responses,
+        ),
+    ):
+        result = FoundryBackend().execute(context)
 
     assert result.backend == "foundry"
     assert result.exit_code == 0
@@ -152,12 +185,16 @@ def test_foundry_backend_agent_service_target(tmp_path: Path) -> None:
     assert "GroundednessEvaluator" not in metrics_by_name
     assert metrics_by_name["samples_evaluated"] == 2.0
     assert len(payload["row_metrics"]) == 2
-    first_row_metrics = {item["name"]: item["value"] for item in payload["row_metrics"][0]["metrics"]}
+    first_row_metrics = {
+        item["name"]: item["value"] for item in payload["row_metrics"][0]["metrics"]
+    }
     assert "GroundednessEvaluator" not in first_row_metrics
     assert first_row_metrics["exact_match"] == 1.0
 
 
-def test_foundry_backend_uses_similarity_evaluator_when_source_is_foundry(tmp_path: Path) -> None:
+def test_foundry_backend_uses_similarity_evaluator_when_source_is_foundry(
+    tmp_path: Path,
+) -> None:
     dataset_path = _dataset_yaml(tmp_path)
     bundle_path = _bundle_yaml(tmp_path, similarity_source="foundry")
     context = BackendRunContext(
@@ -181,12 +218,16 @@ def test_foundry_backend_uses_similarity_evaluator_when_source_is_foundry(tmp_pa
         _FakeHttpResponse({"id": "msg_1"}),
         _FakeHttpResponse({"id": "run_1"}),
         _FakeHttpResponse({"status": "completed"}),
-        _FakeHttpResponse({"data": [{"role": "assistant", "content": [{"text": {"value": "4"}}]}]}),
+        _FakeHttpResponse(
+            {"data": [{"role": "assistant", "content": [{"text": {"value": "4"}}]}]}
+        ),
         _FakeHttpResponse({"id": "thread_2"}),
         _FakeHttpResponse({"id": "msg_2"}),
         _FakeHttpResponse({"id": "run_2"}),
         _FakeHttpResponse({"status": "completed"}),
-        _FakeHttpResponse({"data": [{"role": "assistant", "content": [{"text": {"value": "8"}}]}]}),
+        _FakeHttpResponse(
+            {"data": [{"role": "assistant", "content": [{"text": {"value": "8"}}]}]}
+        ),
     ]
 
     class _FakeSimilarityEvaluator:
@@ -196,8 +237,12 @@ def __call__(self, **kwargs):
             assert "ground_truth" in kwargs
             return {"similarity": 4.0}
 
-    with patch("agentops.backends.foundry_backend._acquire_token", return_value="fake-agent-token"):
-        with patch(
+    with (
+        patch(
+            "agentops.backends.foundry_backend._acquire_token",
+            return_value="fake-agent-token",
+        ),
+        patch(
             "agentops.backends.foundry_backend._build_foundry_evaluator_runtimes",
             return_value=[
                 FoundryEvaluatorRuntime(
@@ -211,9 +256,13 @@ def __call__(self, **kwargs):
                     score_keys=["similarity"],
                 )
             ],
-        ):
-            with patch("agentops.backends.foundry_backend.urllib.request.urlopen", side_effect=responses):
-                result = FoundryBackend().execute(context)
+        ),
+        patch(
+            "agentops.backends.foundry_backend.urllib.request.urlopen",
+            side_effect=responses,
+        ),
+    ):
+        result = FoundryBackend().execute(context)
 
     assert result.backend == "foundry"
     assert result.exit_code == 0
@@ -243,7 +292,10 @@ def test_foundry_backend_rejects_unsupported_local_evaluator(tmp_path: Path) ->
         backend_output_dir=tmp_path / "out-agent-unsupported-local",
     )
 
-    with patch("agentops.backends.foundry_backend._acquire_token", return_value="fake-agent-token"):
+    with patch(
+        "agentops.backends.foundry_backend._acquire_token",
+        return_value="fake-agent-token",
+    ):
         try:
             FoundryBackend().execute(context)
             assert False, "expected ValueError"
@@ -276,9 +328,14 @@ def _fake_invoke_model_direct(self_backend, settings, prompt):
             return "4"
         return "8"
 
-    with patch("agentops.backends.foundry_backend._acquire_token", return_value="fake-token"):
-        with patch.object(FoundryBackend, "_invoke_model_direct", _fake_invoke_model_direct):
-            result = FoundryBackend().execute(context)
+    with (
+        patch(
+            "agentops.backends.foundry_backend._acquire_token",
+            return_value="fake-token",
+        ),
+        patch.object(FoundryBackend, "_invoke_model_direct", _fake_invoke_model_direct),
+    ):
+        result = FoundryBackend().execute(context)
 
     assert result.backend == "foundry"
     assert result.exit_code == 0
@@ -315,3 +372,73 @@ def test_foundry_backend_model_target_requires_explicit_model(tmp_path: Path) ->
     except ValueError as exc:
         assert "target=model" in str(exc)
         assert "backend.model" in str(exc)
+
+
+# ---------------------------------------------------------------------------
+# Unit tests for _cloud_evaluator_data_mapping and _default_foundry_input_mapping
+# ---------------------------------------------------------------------------
+
+
+def test_cloud_evaluator_data_mapping_similarity() -> None:
+    mapping = _cloud_evaluator_data_mapping("similarity", "input", "expected")
+    assert mapping["query"] == "{{item.input}}"
+    assert mapping["response"] == "{{sample.output_text}}"
+    assert mapping["ground_truth"] == "{{item.expected}}"
+    assert "context" not in mapping
+
+
+def test_cloud_evaluator_data_mapping_groundedness_uses_expected_when_no_context_field() -> (
+    None
+):
+    mapping = _cloud_evaluator_data_mapping("groundedness", "input", "expected")
+    assert mapping["context"] == "{{item.expected}}"
+
+
+def test_cloud_evaluator_data_mapping_groundedness_uses_context_field_when_set() -> (
+    None
+):
+    mapping = _cloud_evaluator_data_mapping(
+        "groundedness", "input", "expected", context_field="context"
+    )
+    assert mapping["context"] == "{{item.context}}"
+    assert "ground_truth" not in mapping
+
+
+def test_cloud_evaluator_data_mapping_task_completion() -> None:
+    mapping = _cloud_evaluator_data_mapping("task_completion", "input", "expected")
+    assert mapping["query"] == "{{item.input}}"
+    assert mapping["response"] == "{{sample.output_text}}"
+    assert "ground_truth" not in mapping
+    assert "context" not in mapping
+    assert "tool_calls" not in mapping
+
+
+def test_cloud_evaluator_data_mapping_tool_call_accuracy() -> None:
+    mapping = _cloud_evaluator_data_mapping("tool_call_accuracy", "input", "expected")
+    assert mapping["query"] == "{{item.input}}"
+    assert mapping["response"] == "{{sample.output_text}}"
+    assert mapping["tool_calls"] == "{{sample.tool_calls}}"
+    assert mapping["tool_definitions"] == "{{item.tool_definitions}}"
+
+
+def test_default_foundry_input_mapping_groundedness_uses_row_context() -> None:
+    mapping = _default_foundry_input_mapping("GroundednessEvaluator")
+    assert mapping["context"] == "$row.context"
+    assert mapping["query"] == "$prompt"
+    assert mapping["response"] == "$prediction"
+
+
+def test_default_foundry_input_mapping_task_completion() -> None:
+    mapping = _default_foundry_input_mapping("TaskCompletionEvaluator")
+    assert mapping["query"] == "$prompt"
+    assert mapping["response"] == "$prediction"
+    assert "ground_truth" not in mapping
+    assert "context" not in mapping
+
+
+def test_default_foundry_input_mapping_tool_call_accuracy() -> None:
+    mapping = _default_foundry_input_mapping("ToolCallAccuracyEvaluator")
+    assert mapping["query"] == "$prompt"
+    assert mapping["response"] == "$prediction"
+    assert mapping["tool_calls"] == "$row.tool_calls"
+    assert mapping["tool_definitions"] == "$row.tool_definitions"
diff --git a/tests/unit/test_initializer.py b/tests/unit/test_initializer.py
index 0f50182..390489a 100644
--- a/tests/unit/test_initializer.py
+++ b/tests/unit/test_initializer.py
@@ -15,7 +15,9 @@ def test_init_creates_expected_files(tmp_path: Path) -> None:
 
     assert (tmp_path / ".agentops" / "config.yaml").is_file()
     assert (tmp_path / ".agentops" / "bundles" / "model_direct_baseline.yaml").is_file()
-    assert (tmp_path / ".agentops" / "bundles" / "rag_retrieval_baseline.yaml").is_file()
+    assert (
+        tmp_path / ".agentops" / "bundles" / "rag_retrieval_baseline.yaml"
+    ).is_file()
     assert (tmp_path / ".agentops" / "bundles" / "agent_tools_baseline.yaml").is_file()
     assert (tmp_path / ".agentops" / "datasets" / "smoke-model-direct.yaml").is_file()
     assert (tmp_path / ".agentops" / "datasets" / "smoke-rag.yaml").is_file()
diff --git a/tests/unit/test_models.py b/tests/unit/test_models.py
index 00208cf..c757f5e 100644
--- a/tests/unit/test_models.py
+++ b/tests/unit/test_models.py
@@ -126,6 +126,24 @@ def test_dataset_config_parses() -> None:
 
     dataset = DatasetConfig.model_validate(data)
     assert dataset.source.path.name == "smoke.jsonl"
+    assert dataset.format.context_field is None
+
+
+def test_dataset_config_parses_context_field() -> None:
+    data = {
+        "version": 1,
+        "name": "smoke-rag",
+        "source": {"type": "file", "path": "./data/smoke-rag.jsonl"},
+        "format": {
+            "type": "jsonl",
+            "input_field": "input",
+            "expected_field": "expected",
+            "context_field": "context",
+        },
+    }
+
+    dataset = DatasetConfig.model_validate(data)
+    assert dataset.format.context_field == "context"
 
 
 def test_backend_requires_command_and_args_for_subprocess() -> None:
diff --git a/tests/unit/test_reporter.py b/tests/unit/test_reporter.py
index 269c40c..3183571 100644
--- a/tests/unit/test_reporter.py
+++ b/tests/unit/test_reporter.py
@@ -7,7 +7,10 @@ def _sample_result(overall_passed: bool = True) -> RunResult:
         {
             "version": 1,
             "status": "completed",
-            "bundle": {"name": "rag_baseline", "path": ".agentops/bundles/rag_baseline.yaml"},
+            "bundle": {
+                "name": "rag_baseline",
+                "path": ".agentops/bundles/rag_baseline.yaml",
+            },
             "dataset": {"name": "smoke", "path": ".agentops/datasets/smoke-agent.yaml"},
             "execution": {
                 "backend": "subprocess",
@@ -23,11 +26,20 @@ def _sample_result(overall_passed: bool = True) -> RunResult:
             ],
             "run_metrics": [
                 {"name": "run_pass", "value": 0.0 if not overall_passed else 1.0},
-                {"name": "threshold_pass_rate", "value": 0.5 if not overall_passed else 1.0},
+                {
+                    "name": "threshold_pass_rate",
+                    "value": 0.5 if not overall_passed else 1.0,
+                },
                 {"name": "accuracy", "value": 0.84},
             ],
             "thresholds": [
-                {"evaluator": "groundedness", "criteria": ">=", "expected": "0.800000", "actual": "0.840000", "passed": True},
+                {
+                    "evaluator": "groundedness",
+                    "criteria": ">=",
+                    "expected": "0.800000",
+                    "actual": "0.840000",
+                    "passed": True,
+                },
                 {
                     "evaluator": "relevance",
                     "criteria": ">=",
@@ -67,14 +79,14 @@ def test_report_markdown_contains_required_sections_and_tables() -> None:
 
     assert "## Metrics" in markdown
     assert "| Metric | Value |" in markdown
-    assert "| groundedness | 0.840000 |" in markdown
+    assert "| groundedness | 0.84 |" in markdown
 
     assert "## Run Metrics" in markdown
-    assert "| run_pass | 0.000000 |" in markdown
+    assert "| run_pass | 0 |" in markdown
 
     assert "## Threshold Checks" in markdown
     assert "| Evaluator | Criteria | Expected | Actual | Status |" in markdown
-    assert "| relevance | >= | 0.950000 | 0.830000 | FAIL |" in markdown
+    assert "| relevance | >= | 0.950000 | 0.830000 | Missed |" in markdown
 
 
 def test_report_markdown_pass_status() -> None:
diff --git a/tests/unit/test_subprocess_backend.py b/tests/unit/test_subprocess_backend.py
index 6b3cb08..eda93cb 100644
--- a/tests/unit/test_subprocess_backend.py
+++ b/tests/unit/test_subprocess_backend.py
@@ -58,7 +58,10 @@ def test_execute_builds_command_and_writes_logs(tmp_path: Path) -> None:
         stderr="ok stderr",
     )
 
-    with patch("agentops.backends.subprocess_backend.subprocess.run", return_value=fake_completed) as run_mock:
+    with patch(
+        "agentops.backends.subprocess_backend.subprocess.run",
+        return_value=fake_completed,
+    ) as run_mock:
         result = backend.execute(context)
 
     run_kwargs = run_mock.call_args.kwargs
diff --git a/uv.lock b/uv.lock
index 681c722..855bb09 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4,9 +4,9 @@ requires-python = ">=3.11"
 
 [[package]]
 name = "agentops-toolkit"
-version = "0.1.2"
 source = { editable = "." }
 dependencies = [
+    { name = "azure-ai-projects" },
     { name = "pydantic" },
     { name = "ruamel-yaml" },
     { name = "typer" },
@@ -15,6 +15,7 @@ dependencies = [
 [package.dev-dependencies]
 dev = [
     { name = "mypy" },
+    { name = "pre-commit" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
@@ -23,6 +24,7 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "azure-ai-projects", specifier = ">=2.0.1" },
     { name = "pydantic", specifier = ">=2,<3" },
     { name = "ruamel-yaml", specifier = ">=0.18,<1.0" },
     { name = "typer", specifier = ">=0.12,<1.0" },
@@ -31,6 +33,7 @@ requires-dist = [
 [package.metadata.requires-dev]
 dev = [
     { name = "mypy", specifier = ">=1.19.1" },
+    { name = "pre-commit", specifier = ">=4.0" },
     { name = "pytest", specifier = ">=8.0" },
     { name = "pytest-asyncio", specifier = ">=0.24" },
     { name = "pytest-cov", specifier = ">=5.0" },
@@ -55,6 +58,257 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]
 
+[[package]]
+name = "anyio"
+version = "4.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
+]
+
+[[package]]
+name = "azure-ai-projects"
+version = "2.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "azure-core" },
+    { name = "azure-identity" },
+    { name = "azure-storage-blob" },
+    { name = "isodate" },
+    { name = "openai" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/86/f9/a15c8a16e35e6d620faebabc6cc4f9e2f4b7f1d962cc6f58931c46947e24/azure_ai_projects-2.0.1.tar.gz", hash = "sha256:c8c64870aa6b89903af69a4ff28b4eff3df9744f14615ea572cae87394946a0c", size = 491774, upload-time = "2026-03-12T19:59:02.712Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/f7/290ca39501c06c6e23b46ba9f7f3dfb05ecc928cde105fed85d6845060dd/azure_ai_projects-2.0.1-py3-none-any.whl", hash = "sha256:dfda540d256e67a52bf81c75418b6bf92b811b96693fe45787e154a888ad2396", size = 236560, upload-time = "2026-03-12T19:59:04.249Z" },
+]
+
+[[package]]
+name = "azure-core"
+version = "1.39.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "requests" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/34/83/bbde3faa84ddcb8eb0eca4b3ffb3221252281db4ce351300fe248c5c70b1/azure_core-1.39.0.tar.gz", hash = "sha256:8a90a562998dd44ce84597590fff6249701b98c0e8797c95fcdd695b54c35d74", size = 367531, upload-time = "2026-03-19T01:31:29.461Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/d6/8ebcd05b01a580f086ac9a97fb9fac65c09a4b012161cc97c21a336e880b/azure_core-1.39.0-py3-none-any.whl", hash = "sha256:4ac7b70fab5438c3f68770649a78daf97833caa83827f91df9c14e0e0ea7d34f", size = 218318, upload-time = "2026-03-19T01:31:31.25Z" },
+]
+
+[[package]]
+name = "azure-identity"
+version = "1.25.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "azure-core" },
+    { name = "cryptography" },
+    { name = "msal" },
+    { name = "msal-extensions" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c5/0e/3a63efb48aa4a5ae2cfca61ee152fbcb668092134d3eb8bfda472dd5c617/azure_identity-1.25.3.tar.gz", hash = "sha256:ab23c0d63015f50b630ef6c6cf395e7262f439ce06e5d07a64e874c724f8d9e6", size = 286304, upload-time = "2026-03-13T01:12:20.892Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/49/9a/417b3a533e01953a7c618884df2cb05a71e7b68bdbce4fbdb62349d2a2e8/azure_identity-1.25.3-py3-none-any.whl", hash = "sha256:f4d0b956a8146f30333e071374171f3cfa7bdb8073adb8c3814b65567aa7447c", size = 192138, upload-time = "2026-03-13T01:12:22.951Z" },
+]
+
+[[package]]
+name = "azure-storage-blob"
+version = "12.28.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "azure-core" },
+    { name = "cryptography" },
+    { name = "isodate" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/24/072ba8e27b0e2d8fec401e9969b429d4f5fc4c8d4f0f05f4661e11f7234a/azure_storage_blob-12.28.0.tar.gz", hash = "sha256:e7d98ea108258d29aa0efbfd591b2e2075fa1722a2fae8699f0b3c9de11eff41", size = 604225, upload-time = "2026-01-06T23:48:57.282Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d8/3a/6ef2047a072e54e1142718d433d50e9514c999a58f51abfff7902f3a72f8/azure_storage_blob-12.28.0-py3-none-any.whl", hash = "sha256:00fb1db28bf6a7b7ecaa48e3b1d5c83bfadacc5a678b77826081304bd87d6461", size = 431499, upload-time = "2026-01-06T23:48:58.995Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2026.2.25"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" },
+]
+
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/4a/3dfd5f7850cbf0d06dc84ba9aa00db766b52ca38d8b86e3a38314d52498c/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe", size = 184344, upload-time = "2025-09-08T23:22:26.456Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/8b/f0e4c441227ba756aafbe78f117485b25bb26b1c059d01f137fa6d14896b/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c", size = 180560, upload-time = "2025-09-08T23:22:28.197Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476, upload-time = "2025-09-08T23:22:31.063Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374, upload-time = "2025-09-08T23:22:32.507Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597, upload-time = "2025-09-08T23:22:34.132Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574, upload-time = "2025-09-08T23:22:35.443Z" },
+    { url = "https://files.pythonhosted.org/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971, upload-time = "2025-09-08T23:22:36.805Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972, upload-time = "2025-09-08T23:22:38.436Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078, upload-time = "2025-09-08T23:22:39.776Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/c0/015b25184413d7ab0a410775fdb4a50fca20f5589b5dab1dbbfa3baad8ce/cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5", size = 172076, upload-time = "2025-09-08T23:22:40.95Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/8f/dc5531155e7070361eb1b7e4c1a9d896d0cb21c49f807a6c03fd63fc877e/cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5", size = 182820, upload-time = "2025-09-08T23:22:42.463Z" },
+    { url = "https://files.pythonhosted.org/packages/95/5c/1b493356429f9aecfd56bc171285a4c4ac8697f76e9bbbbb105e537853a1/cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d", size = 177635, upload-time = "2025-09-08T23:22:43.623Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" },
+    { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" },
+    { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
+    { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" },
+    { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
+    { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" },
+    { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
+    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" },
+    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
+    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" },
+]
+
+[[package]]
+name = "cfgv"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7b/60/e3bec1881450851b087e301bedc3daa9377a4d45f1c26aa90b0b235e38aa/charset_normalizer-3.4.6.tar.gz", hash = "sha256:1ae6b62897110aa7c79ea2f5dd38d1abca6db663687c0b1ad9aed6f6bae3d9d6", size = 143363, upload-time = "2026-03-15T18:53:25.478Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/62/28/ff6f234e628a2de61c458be2779cb182bc03f6eec12200d4a525bbfc9741/charset_normalizer-3.4.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:82060f995ab5003a2d6e0f4ad29065b7672b6593c8c63559beefe5b443242c3e", size = 293582, upload-time = "2026-03-15T18:50:25.454Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/b7/b1a117e5385cbdb3205f6055403c2a2a220c5ea80b8716c324eaf75c5c95/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60c74963d8350241a79cb8feea80e54d518f72c26db618862a8f53e5023deaf9", size = 197240, upload-time = "2026-03-15T18:50:27.196Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/5f/2574f0f09f3c3bc1b2f992e20bce6546cb1f17e111c5be07308dc5427956/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6e4333fb15c83f7d1482a76d45a0818897b3d33f00efd215528ff7c51b8e35d", size = 217363, upload-time = "2026-03-15T18:50:28.601Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d1/0ae20ad77bc949ddd39b51bf383b6ca932f2916074c95cad34ae465ab71f/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bc72863f4d9aba2e8fd9085e63548a324ba706d2ea2c83b260da08a59b9482de", size = 212994, upload-time = "2026-03-15T18:50:30.102Z" },
+    { url = "https://files.pythonhosted.org/packages/60/ac/3233d262a310c1b12633536a07cde5ddd16985e6e7e238e9f3f9423d8eb9/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9cc4fc6c196d6a8b76629a70ddfcd4635a6898756e2d9cac5565cf0654605d73", size = 204697, upload-time = "2026-03-15T18:50:31.654Z" },
+    { url = "https://files.pythonhosted.org/packages/25/3c/8a18fc411f085b82303cfb7154eed5bd49c77035eb7608d049468b53f87c/charset_normalizer-3.4.6-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:0c173ce3a681f309f31b87125fecec7a5d1347261ea11ebbb856fa6006b23c8c", size = 191673, upload-time = "2026-03-15T18:50:33.433Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/a7/11cfe61d6c5c5c7438d6ba40919d0306ed83c9ab957f3d4da2277ff67836/charset_normalizer-3.4.6-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c907cdc8109f6c619e6254212e794d6548373cc40e1ec75e6e3823d9135d29cc", size = 201120, upload-time = "2026-03-15T18:50:35.105Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/10/cf491fa1abd47c02f69687046b896c950b92b6cd7337a27e6548adbec8e4/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:404a1e552cf5b675a87f0651f8b79f5f1e6fd100ee88dc612f89aa16abd4486f", size = 200911, upload-time = "2026-03-15T18:50:36.819Z" },
+    { url = "https://files.pythonhosted.org/packages/28/70/039796160b48b18ed466fde0af84c1b090c4e288fae26cd674ad04a2d703/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e3c701e954abf6fc03a49f7c579cc80c2c6cc52525340ca3186c41d3f33482ef", size = 192516, upload-time = "2026-03-15T18:50:38.228Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/34/c56f3223393d6ff3124b9e78f7de738047c2d6bc40a4f16ac0c9d7a1cb3c/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7a6967aaf043bceabab5412ed6bd6bd26603dae84d5cb75bf8d9a74a4959d398", size = 218795, upload-time = "2026-03-15T18:50:39.664Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/3b/ce2d4f86c5282191a041fdc5a4ce18f1c6bd40a5bd1f74cf8625f08d51c1/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:5feb91325bbceade6afab43eb3b508c63ee53579fe896c77137ded51c6b6958e", size = 201833, upload-time = "2026-03-15T18:50:41.552Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/9b/b6a9f76b0fd7c5b5ec58b228ff7e85095370282150f0bd50b3126f5506d6/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f820f24b09e3e779fe84c3c456cb4108a7aa639b0d1f02c28046e11bfcd088ed", size = 213920, upload-time = "2026-03-15T18:50:43.33Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/98/7bc23513a33d8172365ed30ee3a3b3fe1ece14a395e5fc94129541fc6003/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b35b200d6a71b9839a46b9b7fff66b6638bb52fc9658aa58796b0326595d3021", size = 206951, upload-time = "2026-03-15T18:50:44.789Z" },
+    { url = "https://files.pythonhosted.org/packages/32/73/c0b86f3d1458468e11aec870e6b3feac931facbe105a894b552b0e518e79/charset_normalizer-3.4.6-cp311-cp311-win32.whl", hash = "sha256:9ca4c0b502ab399ef89248a2c84c54954f77a070f28e546a85e91da627d1301e", size = 143703, upload-time = "2026-03-15T18:50:46.103Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/e3/76f2facfe8eddee0bbd38d2594e709033338eae44ebf1738bcefe0a06185/charset_normalizer-3.4.6-cp311-cp311-win_amd64.whl", hash = "sha256:a9e68c9d88823b274cf1e72f28cb5dc89c990edf430b0bfd3e2fb0785bfeabf4", size = 153857, upload-time = "2026-03-15T18:50:47.563Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/dc/9abe19c9b27e6cd3636036b9d1b387b78c40dedbf0b47f9366737684b4b0/charset_normalizer-3.4.6-cp311-cp311-win_arm64.whl", hash = "sha256:97d0235baafca5f2b09cf332cc275f021e694e8362c6bb9c96fc9a0eb74fc316", size = 142751, upload-time = "2026-03-15T18:50:49.234Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/62/c0815c992c9545347aeea7859b50dc9044d147e2e7278329c6e02ac9a616/charset_normalizer-3.4.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ef7fedc7a6ecbe99969cd09632516738a97eeb8bd7258bf8a0f23114c057dab", size = 295154, upload-time = "2026-03-15T18:50:50.88Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/37/bdca6613c2e3c58c7421891d80cc3efa1d32e882f7c4a7ee6039c3fc951a/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4ea868bc28109052790eb2b52a9ab33f3aa7adc02f96673526ff47419490e21", size = 199191, upload-time = "2026-03-15T18:50:52.658Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/92/9934d1bbd69f7f398b38c5dae1cbf9cc672e7c34a4adf7b17c0a9c17d15d/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:836ab36280f21fc1a03c99cd05c6b7af70d2697e374c7af0b61ed271401a72a2", size = 218674, upload-time = "2026-03-15T18:50:54.102Z" },
+    { url = "https://files.pythonhosted.org/packages/af/90/25f6ab406659286be929fd89ab0e78e38aa183fc374e03aa3c12d730af8a/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f1ce721c8a7dfec21fcbdfe04e8f68174183cf4e8188e0645e92aa23985c57ff", size = 215259, upload-time = "2026-03-15T18:50:55.616Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/ef/79a463eb0fff7f96afa04c1d4c51f8fc85426f918db467854bfb6a569ce3/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e28d62a8fc7a1fa411c43bd65e346f3bce9716dc51b897fbe930c5987b402d5", size = 207276, upload-time = "2026-03-15T18:50:57.054Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/72/d0426afec4b71dc159fa6b4e68f868cd5a3ecd918fec5813a15d292a7d10/charset_normalizer-3.4.6-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:530d548084c4a9f7a16ed4a294d459b4f229db50df689bfe92027452452943a0", size = 195161, upload-time = "2026-03-15T18:50:58.686Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/18/c82b06a68bfcb6ce55e508225d210c7e6a4ea122bfc0748892f3dc4e8e11/charset_normalizer-3.4.6-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:30f445ae60aad5e1f8bdbb3108e39f6fbc09f4ea16c815c66578878325f8f15a", size = 203452, upload-time = "2026-03-15T18:51:00.196Z" },
+    { url = "https://files.pythonhosted.org/packages/44/d6/0c25979b92f8adafdbb946160348d8d44aa60ce99afdc27df524379875cb/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ac2393c73378fea4e52aa56285a3d64be50f1a12395afef9cce47772f60334c2", size = 202272, upload-time = "2026-03-15T18:51:01.703Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/3d/7fea3e8fe84136bebbac715dd1221cc25c173c57a699c030ab9b8900cbb7/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:90ca27cd8da8118b18a52d5f547859cc1f8354a00cd1e8e5120df3e30d6279e5", size = 195622, upload-time = "2026-03-15T18:51:03.526Z" },
+    { url = "https://files.pythonhosted.org/packages/57/8a/d6f7fd5cb96c58ef2f681424fbca01264461336d2a7fc875e4446b1f1346/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8e5a94886bedca0f9b78fecd6afb6629142fd2605aa70a125d49f4edc6037ee6", size = 220056, upload-time = "2026-03-15T18:51:05.269Z" },
+    { url = "https://files.pythonhosted.org/packages/16/50/478cdda782c8c9c3fb5da3cc72dd7f331f031e7f1363a893cdd6ca0f8de0/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:695f5c2823691a25f17bc5d5ffe79fa90972cc34b002ac6c843bb8a1720e950d", size = 203751, upload-time = "2026-03-15T18:51:06.858Z" },
+    { url = "https://files.pythonhosted.org/packages/75/fc/cc2fcac943939c8e4d8791abfa139f685e5150cae9f94b60f12520feaa9b/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:231d4da14bcd9301310faf492051bee27df11f2bc7549bc0bb41fef11b82daa2", size = 216563, upload-time = "2026-03-15T18:51:08.564Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/b7/a4add1d9a5f68f3d037261aecca83abdb0ab15960a3591d340e829b37298/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a056d1ad2633548ca18ffa2f85c202cfb48b68615129143915b8dc72a806a923", size = 209265, upload-time = "2026-03-15T18:51:10.312Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/18/c094561b5d64a24277707698e54b7f67bd17a4f857bbfbb1072bba07c8bf/charset_normalizer-3.4.6-cp312-cp312-win32.whl", hash = "sha256:c2274ca724536f173122f36c98ce188fd24ce3dad886ec2b7af859518ce008a4", size = 144229, upload-time = "2026-03-15T18:51:11.694Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/20/0567efb3a8fd481b8f34f739ebddc098ed062a59fed41a8d193a61939e8f/charset_normalizer-3.4.6-cp312-cp312-win_amd64.whl", hash = "sha256:c8ae56368f8cc97c7e40a7ee18e1cedaf8e780cd8bc5ed5ac8b81f238614facb", size = 154277, upload-time = "2026-03-15T18:51:13.004Z" },
+    { url = "https://files.pythonhosted.org/packages/15/57/28d79b44b51933119e21f65479d0864a8d5893e494cf5daab15df0247c17/charset_normalizer-3.4.6-cp312-cp312-win_arm64.whl", hash = "sha256:899d28f422116b08be5118ef350c292b36fc15ec2daeb9ea987c89281c7bb5c4", size = 142817, upload-time = "2026-03-15T18:51:14.408Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/1d/4fdabeef4e231153b6ed7567602f3b68265ec4e5b76d6024cf647d43d981/charset_normalizer-3.4.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:11afb56037cbc4b1555a34dd69151e8e069bee82e613a73bef6e714ce733585f", size = 294823, upload-time = "2026-03-15T18:51:15.755Z" },
+    { url = "https://files.pythonhosted.org/packages/47/7b/20e809b89c69d37be748d98e84dce6820bf663cf19cf6b942c951a3e8f41/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423fb7e748a08f854a08a222b983f4df1912b1daedce51a72bd24fe8f26a1843", size = 198527, upload-time = "2026-03-15T18:51:17.177Z" },
+    { url = "https://files.pythonhosted.org/packages/37/a6/4f8d27527d59c039dce6f7622593cdcd3d70a8504d87d09eb11e9fdc6062/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d73beaac5e90173ac3deb9928a74763a6d230f494e4bfb422c217a0ad8e629bf", size = 218388, upload-time = "2026-03-15T18:51:18.934Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/9b/4770ccb3e491a9bacf1c46cc8b812214fe367c86a96353ccc6daf87b01ec/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d60377dce4511655582e300dc1e5a5f24ba0cb229005a1d5c8d0cb72bb758ab8", size = 214563, upload-time = "2026-03-15T18:51:20.374Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/58/a199d245894b12db0b957d627516c78e055adc3a0d978bc7f65ddaf7c399/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:530e8cebeea0d76bdcf93357aa5e41336f48c3dc709ac52da2bb167c5b8271d9", size = 206587, upload-time = "2026-03-15T18:51:21.807Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/70/3def227f1ec56f5c69dfc8392b8bd63b11a18ca8178d9211d7cc5e5e4f27/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:a26611d9987b230566f24a0a125f17fe0de6a6aff9f25c9f564aaa2721a5fb88", size = 194724, upload-time = "2026-03-15T18:51:23.508Z" },
+    { url = "https://files.pythonhosted.org/packages/58/ab/9318352e220c05efd31c2779a23b50969dc94b985a2efa643ed9077bfca5/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:34315ff4fc374b285ad7f4a0bf7dcbfe769e1b104230d40f49f700d4ab6bbd84", size = 202956, upload-time = "2026-03-15T18:51:25.239Z" },
+    { url = "https://files.pythonhosted.org/packages/75/13/f3550a3ac25b70f87ac98c40d3199a8503676c2f1620efbf8d42095cfc40/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ddd609f9e1af8c7bd6e2aca279c931aefecd148a14402d4e368f3171769fd", size = 201923, upload-time = "2026-03-15T18:51:26.682Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/db/c5c643b912740b45e8eec21de1bbab8e7fc085944d37e1e709d3dcd9d72f/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:80d0a5615143c0b3225e5e3ef22c8d5d51f3f72ce0ea6fb84c943546c7b25b6c", size = 195366, upload-time = "2026-03-15T18:51:28.129Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/67/3b1c62744f9b2448443e0eb160d8b001c849ec3fef591e012eda6484787c/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:92734d4d8d187a354a556626c221cd1a892a4e0802ccb2af432a1d85ec012194", size = 219752, upload-time = "2026-03-15T18:51:29.556Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/98/32ffbaf7f0366ffb0445930b87d103f6b406bc2c271563644bde8a2b1093/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:613f19aa6e082cf96e17e3ffd89383343d0d589abda756b7764cf78361fd41dc", size = 203296, upload-time = "2026-03-15T18:51:30.921Z" },
+    { url = "https://files.pythonhosted.org/packages/41/12/5d308c1bbe60cabb0c5ef511574a647067e2a1f631bc8634fcafaccd8293/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2b1a63e8224e401cafe7739f77efd3f9e7f5f2026bda4aead8e59afab537784f", size = 215956, upload-time = "2026-03-15T18:51:32.399Z" },
+    { url = "https://files.pythonhosted.org/packages/53/e9/5f85f6c5e20669dbe56b165c67b0260547dea97dba7e187938833d791687/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6cceb5473417d28edd20c6c984ab6fee6c6267d38d906823ebfe20b03d607dc2", size = 208652, upload-time = "2026-03-15T18:51:34.214Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/11/897052ea6af56df3eef3ca94edafee410ca699ca0c7b87960ad19932c55e/charset_normalizer-3.4.6-cp313-cp313-win32.whl", hash = "sha256:d7de2637729c67d67cf87614b566626057e95c303bc0a55ffe391f5205e7003d", size = 143940, upload-time = "2026-03-15T18:51:36.15Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/5c/724b6b363603e419829f561c854b87ed7c7e31231a7908708ac086cdf3e2/charset_normalizer-3.4.6-cp313-cp313-win_amd64.whl", hash = "sha256:572d7c822caf521f0525ba1bce1a622a0b85cf47ffbdae6c9c19e3b5ac3c4389", size = 154101, upload-time = "2026-03-15T18:51:37.876Z" },
+    { url = "https://files.pythonhosted.org/packages/01/a5/7abf15b4c0968e47020f9ca0935fb3274deb87cb288cd187cad92e8cdffd/charset_normalizer-3.4.6-cp313-cp313-win_arm64.whl", hash = "sha256:a4474d924a47185a06411e0064b803c68be044be2d60e50e8bddcc2649957c1f", size = 143109, upload-time = "2026-03-15T18:51:39.565Z" },
+    { url = "https://files.pythonhosted.org/packages/25/6f/ffe1e1259f384594063ea1869bfb6be5cdb8bc81020fc36c3636bc8302a1/charset_normalizer-3.4.6-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:9cc6e6d9e571d2f863fa77700701dae73ed5f78881efc8b3f9a4398772ff53e8", size = 294458, upload-time = "2026-03-15T18:51:41.134Z" },
+    { url = "https://files.pythonhosted.org/packages/56/60/09bb6c13a8c1016c2ed5c6a6488e4ffef506461aa5161662bd7636936fb1/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5960d965e67165d75b7c7ffc60a83ec5abfc5c11b764ec13ea54fbef8b4421", size = 199277, upload-time = "2026-03-15T18:51:42.953Z" },
+    { url = "https://files.pythonhosted.org/packages/00/50/dcfbb72a5138bbefdc3332e8d81a23494bf67998b4b100703fd15fa52d81/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b3694e3f87f8ac7ce279d4355645b3c878d24d1424581b46282f24b92f5a4ae2", size = 218758, upload-time = "2026-03-15T18:51:44.339Z" },
+    { url = "https://files.pythonhosted.org/packages/03/b3/d79a9a191bb75f5aa81f3aaaa387ef29ce7cb7a9e5074ba8ea095cc073c2/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5d11595abf8dd942a77883a39d81433739b287b6aa71620f15164f8096221b30", size = 215299, upload-time = "2026-03-15T18:51:45.871Z" },
+    { url = "https://files.pythonhosted.org/packages/76/7e/bc8911719f7084f72fd545f647601ea3532363927f807d296a8c88a62c0d/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7bda6eebafd42133efdca535b04ccb338ab29467b3f7bf79569883676fc628db", size = 206811, upload-time = "2026-03-15T18:51:47.308Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/40/c430b969d41dda0c465aa36cc7c2c068afb67177bef50905ac371b28ccc7/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:bbc8c8650c6e51041ad1be191742b8b421d05bbd3410f43fa2a00c8db87678e8", size = 193706, upload-time = "2026-03-15T18:51:48.849Z" },
+    { url = "https://files.pythonhosted.org/packages/48/15/e35e0590af254f7df984de1323640ef375df5761f615b6225ba8deb9799a/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:22c6f0c2fbc31e76c3b8a86fba1a56eda6166e238c29cdd3d14befdb4a4e4815", size = 202706, upload-time = "2026-03-15T18:51:50.257Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/bd/f736f7b9cc5e93a18b794a50346bb16fbfd6b37f99e8f306f7951d27c17c/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7edbed096e4a4798710ed6bc75dcaa2a21b68b6c356553ac4823c3658d53743a", size = 202497, upload-time = "2026-03-15T18:51:52.012Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/ba/2cc9e3e7dfdf7760a6ed8da7446d22536f3d0ce114ac63dee2a5a3599e62/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:7f9019c9cb613f084481bd6a100b12e1547cf2efe362d873c2e31e4035a6fa43", size = 193511, upload-time = "2026-03-15T18:51:53.723Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/cb/5be49b5f776e5613be07298c80e1b02a2d900f7a7de807230595c85a8b2e/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:58c948d0d086229efc484fe2f30c2d382c86720f55cd9bc33591774348ad44e0", size = 220133, upload-time = "2026-03-15T18:51:55.333Z" },
+    { url = "https://files.pythonhosted.org/packages/83/43/99f1b5dad345accb322c80c7821071554f791a95ee50c1c90041c157ae99/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:419a9d91bd238052642a51938af8ac05da5b3343becde08d5cdeab9046df9ee1", size = 203035, upload-time = "2026-03-15T18:51:56.736Z" },
+    { url = "https://files.pythonhosted.org/packages/87/9a/62c2cb6a531483b55dddff1a68b3d891a8b498f3ca555fbcf2978e804d9d/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5273b9f0b5835ff0350c0828faea623c68bfa65b792720c453e22b25cc72930f", size = 216321, upload-time = "2026-03-15T18:51:58.17Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/79/94a010ff81e3aec7c293eb82c28f930918e517bc144c9906a060844462eb/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:0e901eb1049fdb80f5bd11ed5ea1e498ec423102f7a9b9e4645d5b8204ff2815", size = 208973, upload-time = "2026-03-15T18:51:59.998Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/57/4ecff6d4ec8585342f0c71bc03efaa99cb7468f7c91a57b105bcd561cea8/charset_normalizer-3.4.6-cp314-cp314-win32.whl", hash = "sha256:b4ff1d35e8c5bd078be89349b6f3a845128e685e751b6ea1169cf2160b344c4d", size = 144610, upload-time = "2026-03-15T18:52:02.213Z" },
+    { url = "https://files.pythonhosted.org/packages/80/94/8434a02d9d7f168c25767c64671fead8d599744a05d6a6c877144c754246/charset_normalizer-3.4.6-cp314-cp314-win_amd64.whl", hash = "sha256:74119174722c4349af9708993118581686f343adc1c8c9c007d59be90d077f3f", size = 154962, upload-time = "2026-03-15T18:52:03.658Z" },
+    { url = "https://files.pythonhosted.org/packages/46/4c/48f2cdbfd923026503dfd67ccea45c94fd8fe988d9056b468579c66ed62b/charset_normalizer-3.4.6-cp314-cp314-win_arm64.whl", hash = "sha256:e5bcc1a1ae744e0bb59641171ae53743760130600da8db48cbb6e4918e186e4e", size = 143595, upload-time = "2026-03-15T18:52:05.123Z" },
+    { url = "https://files.pythonhosted.org/packages/31/93/8878be7569f87b14f1d52032946131bcb6ebbd8af3e20446bc04053dc3f1/charset_normalizer-3.4.6-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:ad8faf8df23f0378c6d527d8b0b15ea4a2e23c89376877c598c4870d1b2c7866", size = 314828, upload-time = "2026-03-15T18:52:06.831Z" },
+    { url = "https://files.pythonhosted.org/packages/06/b6/fae511ca98aac69ecc35cde828b0a3d146325dd03d99655ad38fc2cc3293/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f5ea69428fa1b49573eef0cc44a1d43bebd45ad0c611eb7d7eac760c7ae771bc", size = 208138, upload-time = "2026-03-15T18:52:08.239Z" },
+    { url = "https://files.pythonhosted.org/packages/54/57/64caf6e1bf07274a1e0b7c160a55ee9e8c9ec32c46846ce59b9c333f7008/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:06a7e86163334edfc5d20fe104db92fcd666e5a5df0977cb5680a506fe26cc8e", size = 224679, upload-time = "2026-03-15T18:52:10.043Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/cb/9ff5a25b9273ef160861b41f6937f86fae18b0792fe0a8e75e06acb08f1d/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e1f6e2f00a6b8edb562826e4632e26d063ac10307e80f7461f7de3ad8ef3f077", size = 223475, upload-time = "2026-03-15T18:52:11.854Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/97/440635fc093b8d7347502a377031f9605a1039c958f3cd18dcacffb37743/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b52c68d64c1878818687a473a10547b3292e82b6f6fe483808fb1468e2f52f", size = 215230, upload-time = "2026-03-15T18:52:13.325Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/24/afff630feb571a13f07c8539fbb502d2ab494019492aaffc78ef41f1d1d0/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:7504e9b7dc05f99a9bbb4525c67a2c155073b44d720470a148b34166a69c054e", size = 199045, upload-time = "2026-03-15T18:52:14.752Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/17/d1399ecdaf7e0498c327433e7eefdd862b41236a7e484355b8e0e5ebd64b/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:172985e4ff804a7ad08eebec0a1640ece87ba5041d565fff23c8f99c1f389484", size = 211658, upload-time = "2026-03-15T18:52:16.278Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/38/16baa0affb957b3d880e5ac2144caf3f9d7de7bc4a91842e447fbb5e8b67/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4be9f4830ba8741527693848403e2c457c16e499100963ec711b1c6f2049b7c7", size = 210769, upload-time = "2026-03-15T18:52:17.782Z" },
+    { url = "https://files.pythonhosted.org/packages/05/34/c531bc6ac4c21da9ddfddb3107be2287188b3ea4b53b70fc58f2a77ac8d8/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:79090741d842f564b1b2827c0b82d846405b744d31e84f18d7a7b41c20e473ff", size = 201328, upload-time = "2026-03-15T18:52:19.553Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/73/a5a1e9ca5f234519c1953608a03fe109c306b97fdfb25f09182babad51a7/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:87725cfb1a4f1f8c2fc9890ae2f42094120f4b44db9360be5d99a4c6b0e03a9e", size = 225302, upload-time = "2026-03-15T18:52:21.043Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/f6/cd782923d112d296294dea4bcc7af5a7ae0f86ab79f8fefbda5526b6cfc0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:fcce033e4021347d80ed9c66dcf1e7b1546319834b74445f561d2e2221de5659", size = 211127, upload-time = "2026-03-15T18:52:22.491Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/c5/0b6898950627af7d6103a449b22320372c24c6feda91aa24e201a478d161/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:ca0276464d148c72defa8bb4390cce01b4a0e425f3b50d1435aa6d7a18107602", size = 222840, upload-time = "2026-03-15T18:52:24.113Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/25/c4bba773bef442cbdc06111d40daa3de5050a676fa26e85090fc54dd12f0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:197c1a244a274bb016dd8b79204850144ef77fe81c5b797dc389327adb552407", size = 216890, upload-time = "2026-03-15T18:52:25.541Z" },
+    { url = "https://files.pythonhosted.org/packages/35/1a/05dacadb0978da72ee287b0143097db12f2e7e8d3ffc4647da07a383b0b7/charset_normalizer-3.4.6-cp314-cp314t-win32.whl", hash = "sha256:2a24157fa36980478dd1770b585c0f30d19e18f4fb0c47c13aa568f871718579", size = 155379, upload-time = "2026-03-15T18:52:27.05Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/7a/d269d834cb3a76291651256f3b9a5945e81d0a49ab9f4a498964e83c0416/charset_normalizer-3.4.6-cp314-cp314t-win_amd64.whl", hash = "sha256:cd5e2801c89992ed8c0a3f0293ae83c159a60d9a5d685005383ef4caca77f2c4", size = 169043, upload-time = "2026-03-15T18:52:28.502Z" },
+    { url = "https://files.pythonhosted.org/packages/23/06/28b29fba521a37a8932c6a84192175c34d49f84a6d4773fa63d05f9aff22/charset_normalizer-3.4.6-cp314-cp314t-win_arm64.whl", hash = "sha256:47955475ac79cc504ef2704b192364e51d0d473ad452caedd0002605f780101c", size = 148523, upload-time = "2026-03-15T18:52:29.956Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/68/687187c7e26cb24ccbd88e5069f5ef00eba804d36dde11d99aad0838ab45/charset_normalizer-3.4.6-py3-none-any.whl", hash = "sha256:947cf925bc916d90adba35a64c82aace04fa39b46b52d4630ece166655905a69", size = 61455, upload-time = "2026-03-15T18:53:23.833Z" },
+]
+
 [[package]]
 name = "click"
 version = "8.3.1"
@@ -180,6 +434,147 @@ toml = [
     { name = "tomli", marker = "python_full_version <= '3.11'" },
 ]
 
+[[package]]
+name = "cryptography"
+version = "46.0.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f7/81/b0bb27f2ba931a65409c6b8a8b358a7f03c0e46eceacddff55f7c84b1f3b/cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad", size = 7176289, upload-time = "2026-02-10T19:17:08.274Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/9e/6b4397a3e3d15123de3b1806ef342522393d50736c13b20ec4c9ea6693a6/cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", size = 4275637, upload-time = "2026-02-10T19:17:10.53Z" },
+    { url = "https://files.pythonhosted.org/packages/63/e7/471ab61099a3920b0c77852ea3f0ea611c9702f651600397ac567848b897/cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", size = 4424742, upload-time = "2026-02-10T19:17:12.388Z" },
+    { url = "https://files.pythonhosted.org/packages/37/53/a18500f270342d66bf7e4d9f091114e31e5ee9e7375a5aba2e85a91e0044/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", size = 4277528, upload-time = "2026-02-10T19:17:13.853Z" },
+    { url = "https://files.pythonhosted.org/packages/22/29/c2e812ebc38c57b40e7c583895e73c8c5adb4d1e4a0cc4c5a4fdab2b1acc/cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", size = 4947993, upload-time = "2026-02-10T19:17:15.618Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", size = 4456855, upload-time = "2026-02-10T19:17:17.221Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/87/fc628a7ad85b81206738abbd213b07702bcbdada1dd43f72236ef3cffbb5/cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", size = 3984635, upload-time = "2026-02-10T19:17:18.792Z" },
+    { url = "https://files.pythonhosted.org/packages/84/29/65b55622bde135aedf4565dc509d99b560ee4095e56989e815f8fd2aa910/cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", size = 4277038, upload-time = "2026-02-10T19:17:20.256Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/36/45e76c68d7311432741faf1fbf7fac8a196a0a735ca21f504c75d37e2558/cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", size = 4912181, upload-time = "2026-02-10T19:17:21.825Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/1a/c1ba8fead184d6e3d5afcf03d569acac5ad063f3ac9fb7258af158f7e378/cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", size = 4456482, upload-time = "2026-02-10T19:17:25.133Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/e5/3fb22e37f66827ced3b902cf895e6a6bc1d095b5b26be26bd13c441fdf19/cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", size = 4405497, upload-time = "2026-02-10T19:17:26.66Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/df/9d58bb32b1121a8a2f27383fabae4d63080c7ca60b9b5c88be742be04ee7/cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", size = 4667819, upload-time = "2026-02-10T19:17:28.569Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/ed/325d2a490c5e94038cdb0117da9397ece1f11201f425c4e9c57fe5b9f08b/cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48", size = 3028230, upload-time = "2026-02-10T19:17:30.518Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/5a/ac0f49e48063ab4255d9e3b79f5def51697fce1a95ea1370f03dc9db76f6/cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4", size = 3480909, upload-time = "2026-02-10T19:17:32.083Z" },
+    { url = "https://files.pythonhosted.org/packages/00/13/3d278bfa7a15a96b9dc22db5a12ad1e48a9eb3d40e1827ef66a5df75d0d0/cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2", size = 7119287, upload-time = "2026-02-10T19:17:33.801Z" },
+    { url = "https://files.pythonhosted.org/packages/67/c8/581a6702e14f0898a0848105cbefd20c058099e2c2d22ef4e476dfec75d7/cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678", size = 4265728, upload-time = "2026-02-10T19:17:35.569Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/4a/ba1a65ce8fc65435e5a849558379896c957870dd64fecea97b1ad5f46a37/cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87", size = 4408287, upload-time = "2026-02-10T19:17:36.938Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/67/8ffdbf7b65ed1ac224d1c2df3943553766914a8ca718747ee3871da6107e/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee", size = 4270291, upload-time = "2026-02-10T19:17:38.748Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/e5/f52377ee93bc2f2bba55a41a886fd208c15276ffbd2569f2ddc89d50e2c5/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981", size = 4927539, upload-time = "2026-02-10T19:17:40.241Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/02/cfe39181b02419bbbbcf3abdd16c1c5c8541f03ca8bda240debc467d5a12/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9", size = 4442199, upload-time = "2026-02-10T19:17:41.789Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/96/2fcaeb4873e536cf71421a388a6c11b5bc846e986b2b069c79363dc1648e/cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648", size = 3960131, upload-time = "2026-02-10T19:17:43.379Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/d2/b27631f401ddd644e94c5cf33c9a4069f72011821cf3dc7309546b0642a0/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4", size = 4270072, upload-time = "2026-02-10T19:17:45.481Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/a7/60d32b0370dae0b4ebe55ffa10e8599a2a59935b5ece1b9f06edb73abdeb/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0", size = 4892170, upload-time = "2026-02-10T19:17:46.997Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/b9/cf73ddf8ef1164330eb0b199a589103c363afa0cf794218c24d524a58eab/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663", size = 4441741, upload-time = "2026-02-10T19:17:48.661Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/eb/eee00b28c84c726fe8fa0158c65afe312d9c3b78d9d01daf700f1f6e37ff/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826", size = 4396728, upload-time = "2026-02-10T19:17:50.058Z" },
+    { url = "https://files.pythonhosted.org/packages/65/f4/6bc1a9ed5aef7145045114b75b77c2a8261b4d38717bd8dea111a63c3442/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d", size = 4652001, upload-time = "2026-02-10T19:17:51.54Z" },
+    { url = "https://files.pythonhosted.org/packages/86/ef/5d00ef966ddd71ac2e6951d278884a84a40ffbd88948ef0e294b214ae9e4/cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a", size = 3003637, upload-time = "2026-02-10T19:17:52.997Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/57/f3f4160123da6d098db78350fdfd9705057aad21de7388eacb2401dceab9/cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4", size = 3469487, upload-time = "2026-02-10T19:17:54.549Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/fa/a66aa722105ad6a458bebd64086ca2b72cdd361fed31763d20390f6f1389/cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31", size = 7170514, upload-time = "2026-02-10T19:17:56.267Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/04/c85bdeab78c8bc77b701bf0d9bdcf514c044e18a46dcff330df5448631b0/cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", size = 4275349, upload-time = "2026-02-10T19:17:58.419Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/32/9b87132a2f91ee7f5223b091dc963055503e9b442c98fc0b8a5ca765fab0/cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", size = 4420667, upload-time = "2026-02-10T19:18:00.619Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/a6/a7cb7010bec4b7c5692ca6f024150371b295ee1c108bdc1c400e4c44562b/cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", size = 4276980, upload-time = "2026-02-10T19:18:02.379Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/7c/c4f45e0eeff9b91e3f12dbd0e165fcf2a38847288fcfd889deea99fb7b6d/cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", size = 4939143, upload-time = "2026-02-10T19:18:03.964Z" },
+    { url = "https://files.pythonhosted.org/packages/37/19/e1b8f964a834eddb44fa1b9a9976f4e414cbb7aa62809b6760c8803d22d1/cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", size = 4453674, upload-time = "2026-02-10T19:18:05.588Z" },
+    { url = "https://files.pythonhosted.org/packages/db/ed/db15d3956f65264ca204625597c410d420e26530c4e2943e05a0d2f24d51/cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", size = 3978801, upload-time = "2026-02-10T19:18:07.167Z" },
+    { url = "https://files.pythonhosted.org/packages/41/e2/df40a31d82df0a70a0daf69791f91dbb70e47644c58581d654879b382d11/cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", size = 4276755, upload-time = "2026-02-10T19:18:09.813Z" },
+    { url = "https://files.pythonhosted.org/packages/33/45/726809d1176959f4a896b86907b98ff4391a8aa29c0aaaf9450a8a10630e/cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", size = 4901539, upload-time = "2026-02-10T19:18:11.263Z" },
+    { url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", size = 4452794, upload-time = "2026-02-10T19:18:12.914Z" },
+    { url = "https://files.pythonhosted.org/packages/02/ef/ffeb542d3683d24194a38f66ca17c0a4b8bf10631feef44a7ef64e631b1a/cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", size = 4404160, upload-time = "2026-02-10T19:18:14.375Z" },
+    { url = "https://files.pythonhosted.org/packages/96/93/682d2b43c1d5f1406ed048f377c0fc9fc8f7b0447a478d5c65ab3d3a66eb/cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", size = 4667123, upload-time = "2026-02-10T19:18:15.886Z" },
+    { url = "https://files.pythonhosted.org/packages/45/2d/9c5f2926cb5300a8eefc3f4f0b3f3df39db7f7ce40c8365444c49363cbda/cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72", size = 3010220, upload-time = "2026-02-10T19:18:17.361Z" },
+    { url = "https://files.pythonhosted.org/packages/48/ef/0c2f4a8e31018a986949d34a01115dd057bf536905dca38897bacd21fac3/cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595", size = 3467050, upload-time = "2026-02-10T19:18:18.899Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/dd/2d9fdb07cebdf3d51179730afb7d5e576153c6744c3ff8fded23030c204e/cryptography-46.0.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c", size = 3476964, upload-time = "2026-02-10T19:18:20.687Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/6f/6cc6cc9955caa6eaf83660b0da2b077c7fe8ff9950a3c5e45d605038d439/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a", size = 4218321, upload-time = "2026-02-10T19:18:22.349Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/5d/c4da701939eeee699566a6c1367427ab91a8b7088cc2328c09dbee940415/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356", size = 4381786, upload-time = "2026-02-10T19:18:24.529Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/97/a538654732974a94ff96c1db621fa464f455c02d4bb7d2652f4edc21d600/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da", size = 4217990, upload-time = "2026-02-10T19:18:25.957Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/11/7e500d2dd3ba891197b9efd2da5454b74336d64a7cc419aa7327ab74e5f6/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257", size = 4381252, upload-time = "2026-02-10T19:18:27.496Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/58/6b3d24e6b9bc474a2dcdee65dfd1f008867015408a271562e4b690561a4d/cryptography-46.0.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7", size = 3407605, upload-time = "2026-02-10T19:18:29.233Z" },
+]
+
+[[package]]
+name = "distlib"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
+]
+
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
+]
+
+[[package]]
+name = "filelock"
+version = "3.25.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/b8/00651a0f559862f3bb7d6f7477b192afe3f583cc5e26403b44e59a55ab34/filelock-3.25.2.tar.gz", hash = "sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694", size = 40480, upload-time = "2026-03-11T20:45:38.487Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70", size = 26759, upload-time = "2026-03-11T20:45:37.437Z" },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+
+[[package]]
+name = "identify"
+version = "2.6.18"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/46/c4/7fb4db12296cdb11893d61c92048fe617ee853f8523b9b296ac03b43757e/identify-2.6.18.tar.gz", hash = "sha256:873ac56a5e3fd63e7438a7ecbc4d91aca692eb3fefa4534db2b7913f3fc352fd", size = 99580, upload-time = "2026-03-15T18:39:50.319Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/46/33/92ef41c6fad0233e41d3d84ba8e8ad18d1780f1e5d99b3c683e6d7f98b63/identify-2.6.18-py2.py3-none-any.whl", hash = "sha256:8db9d3c8ea9079db92cafb0ebf97abdc09d52e97f4dcf773a2e694048b7cd737", size = 99394, upload-time = "2026-03-15T18:39:48.915Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
 [[package]]
 name = "iniconfig"
 version = "2.3.0"
@@ -189,6 +584,100 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
 ]
 
+[[package]]
+name = "isodate"
+version = "0.7.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705, upload-time = "2024-10-08T23:04:11.5Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320, upload-time = "2024-10-08T23:04:09.501Z" },
+]
+
+[[package]]
+name = "jiter"
+version = "0.13.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0d/5e/4ec91646aee381d01cdb9974e30882c9cd3b8c5d1079d6b5ff4af522439a/jiter-0.13.0.tar.gz", hash = "sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4", size = 164847, upload-time = "2026-02-02T12:37:56.441Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/29/499f8c9eaa8a16751b1c0e45e6f5f1761d180da873d417996cc7bddc8eef/jiter-0.13.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ea026e70a9a28ebbdddcbcf0f1323128a8db66898a06eaad3a4e62d2f554d096", size = 311157, upload-time = "2026-02-02T12:35:37.758Z" },
+    { url = "https://files.pythonhosted.org/packages/50/f6/566364c777d2ab450b92100bea11333c64c38d32caf8dc378b48e5b20c46/jiter-0.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:66aa3e663840152d18cc8ff1e4faad3dd181373491b9cfdc6004b92198d67911", size = 319729, upload-time = "2026-02-02T12:35:39.246Z" },
+    { url = "https://files.pythonhosted.org/packages/73/dd/560f13ec5e4f116d8ad2658781646cca91b617ae3b8758d4a5076b278f70/jiter-0.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3524798e70655ff19aec58c7d05adb1f074fecff62da857ea9be2b908b6d701", size = 354766, upload-time = "2026-02-02T12:35:40.662Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/0d/061faffcfe94608cbc28a0d42a77a74222bdf5055ccdbe5fd2292b94f510/jiter-0.13.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ec7e287d7fbd02cb6e22f9a00dd9c9cd504c40a61f2c61e7e1f9690a82726b4c", size = 362587, upload-time = "2026-02-02T12:35:42.025Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c9/c66a7864982fd38a9773ec6e932e0398d1262677b8c60faecd02ffb67bf3/jiter-0.13.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:47455245307e4debf2ce6c6e65a717550a0244231240dcf3b8f7d64e4c2f22f4", size = 487537, upload-time = "2026-02-02T12:35:43.459Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/86/84eb4352cd3668f16d1a88929b5888a3fe0418ea8c1dfc2ad4e7bf6e069a/jiter-0.13.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ee9da221dca6e0429c2704c1b3655fe7b025204a71d4d9b73390c759d776d165", size = 373717, upload-time = "2026-02-02T12:35:44.928Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/09/9fe4c159358176f82d4390407a03f506a8659ed13ca3ac93a843402acecf/jiter-0.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24ab43126d5e05f3d53a36a8e11eb2f23304c6c1117844aaaf9a0aa5e40b5018", size = 362683, upload-time = "2026-02-02T12:35:46.636Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/5e/85f3ab9caca0c1d0897937d378b4a515cae9e119730563572361ea0c48ae/jiter-0.13.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9da38b4fedde4fb528c740c2564628fbab737166a0e73d6d46cb4bb5463ff411", size = 392345, upload-time = "2026-02-02T12:35:48.088Z" },
+    { url = "https://files.pythonhosted.org/packages/12/4c/05b8629ad546191939e6f0c2f17e29f542a398f4a52fb987bc70b6d1eb8b/jiter-0.13.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0b34c519e17658ed88d5047999a93547f8889f3c1824120c26ad6be5f27b6cf5", size = 517775, upload-time = "2026-02-02T12:35:49.482Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/88/367ea2eb6bc582c7052e4baf5ddf57ebe5ab924a88e0e09830dfb585c02d/jiter-0.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2a6394e6af690d462310a86b53c47ad75ac8c21dc79f120714ea449979cb1d3", size = 551325, upload-time = "2026-02-02T12:35:51.104Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/12/fa377ffb94a2f28c41afaed093e0d70cfe512035d5ecb0cad0ae4792d35e/jiter-0.13.0-cp311-cp311-win32.whl", hash = "sha256:0f0c065695f616a27c920a56ad0d4fc46415ef8b806bf8fc1cacf25002bd24e1", size = 204709, upload-time = "2026-02-02T12:35:52.467Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/16/8e8203ce92f844dfcd3d9d6a5a7322c77077248dbb12da52d23193a839cd/jiter-0.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:0733312953b909688ae3c2d58d043aa040f9f1a6a75693defed7bc2cc4bf2654", size = 204560, upload-time = "2026-02-02T12:35:53.925Z" },
+    { url = "https://files.pythonhosted.org/packages/44/26/97cc40663deb17b9e13c3a5cf29251788c271b18ee4d262c8f94798b8336/jiter-0.13.0-cp311-cp311-win_arm64.whl", hash = "sha256:5d9b34ad56761b3bf0fbe8f7e55468704107608512350962d3317ffd7a4382d5", size = 189608, upload-time = "2026-02-02T12:35:55.304Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/30/7687e4f87086829955013ca12a9233523349767f69653ebc27036313def9/jiter-0.13.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0a2bd69fc1d902e89925fc34d1da51b2128019423d7b339a45d9e99c894e0663", size = 307958, upload-time = "2026-02-02T12:35:57.165Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/27/e57f9a783246ed95481e6749cc5002a8a767a73177a83c63ea71f0528b90/jiter-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f917a04240ef31898182f76a332f508f2cc4b57d2b4d7ad2dbfebbfe167eb505", size = 318597, upload-time = "2026-02-02T12:35:58.591Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/52/e5719a60ac5d4d7c5995461a94ad5ef962a37c8bf5b088390e6fad59b2ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1e2b199f446d3e82246b4fd9236d7cb502dc2222b18698ba0d986d2fecc6152", size = 348821, upload-time = "2026-02-02T12:36:00.093Z" },
+    { url = "https://files.pythonhosted.org/packages/61/db/c1efc32b8ba4c740ab3fc2d037d8753f67685f475e26b9d6536a4322bcdd/jiter-0.13.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04670992b576fa65bd056dbac0c39fe8bd67681c380cb2b48efa885711d9d726", size = 364163, upload-time = "2026-02-02T12:36:01.937Z" },
+    { url = "https://files.pythonhosted.org/packages/55/8a/fb75556236047c8806995671a18e4a0ad646ed255276f51a20f32dceaeec/jiter-0.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a1aff1fbdb803a376d4d22a8f63f8e7ccbce0b4890c26cc7af9e501ab339ef0", size = 483709, upload-time = "2026-02-02T12:36:03.41Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/16/43512e6ee863875693a8e6f6d532e19d650779d6ba9a81593ae40a9088ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b3fb8c2053acaef8580809ac1d1f7481a0a0bdc012fd7f5d8b18fb696a5a089", size = 370480, upload-time = "2026-02-02T12:36:04.791Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/4c/09b93e30e984a187bc8aaa3510e1ec8dcbdcd71ca05d2f56aac0492453aa/jiter-0.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdaba7d87e66f26a2c45d8cbadcbfc4bf7884182317907baf39cfe9775bb4d93", size = 360735, upload-time = "2026-02-02T12:36:06.994Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/1b/46c5e349019874ec5dfa508c14c37e29864ea108d376ae26d90bee238cd7/jiter-0.13.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7b88d649135aca526da172e48083da915ec086b54e8e73a425ba50999468cc08", size = 391814, upload-time = "2026-02-02T12:36:08.368Z" },
+    { url = "https://files.pythonhosted.org/packages/15/9e/26184760e85baee7162ad37b7912797d2077718476bf91517641c92b3639/jiter-0.13.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e404ea551d35438013c64b4f357b0474c7abf9f781c06d44fcaf7a14c69ff9e2", size = 513990, upload-time = "2026-02-02T12:36:09.993Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/34/2c9355247d6debad57a0a15e76ab1566ab799388042743656e566b3b7de1/jiter-0.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1f4748aad1b4a93c8bdd70f604d0f748cdc0e8744c5547798acfa52f10e79228", size = 548021, upload-time = "2026-02-02T12:36:11.376Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/4a/9f2c23255d04a834398b9c2e0e665382116911dc4d06b795710503cdad25/jiter-0.13.0-cp312-cp312-win32.whl", hash = "sha256:0bf670e3b1445fc4d31612199f1744f67f889ee1bbae703c4b54dc097e5dd394", size = 203024, upload-time = "2026-02-02T12:36:12.682Z" },
+    { url = "https://files.pythonhosted.org/packages/09/ee/f0ae675a957ae5a8f160be3e87acea6b11dc7b89f6b7ab057e77b2d2b13a/jiter-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:15db60e121e11fe186c0b15236bd5d18381b9ddacdcf4e659feb96fc6c969c92", size = 205424, upload-time = "2026-02-02T12:36:13.93Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/02/ae611edf913d3cbf02c97cdb90374af2082c48d7190d74c1111dde08bcdd/jiter-0.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:41f92313d17989102f3cb5dd533a02787cdb99454d494344b0361355da52fcb9", size = 186818, upload-time = "2026-02-02T12:36:15.308Z" },
+    { url = "https://files.pythonhosted.org/packages/91/9c/7ee5a6ff4b9991e1a45263bfc46731634c4a2bde27dfda6c8251df2d958c/jiter-0.13.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1f8a55b848cbabf97d861495cd65f1e5c590246fabca8b48e1747c4dfc8f85bf", size = 306897, upload-time = "2026-02-02T12:36:16.748Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/02/be5b870d1d2be5dd6a91bdfb90f248fbb7dcbd21338f092c6b89817c3dbf/jiter-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f556aa591c00f2c45eb1b89f68f52441a016034d18b65da60e2d2875bbbf344a", size = 317507, upload-time = "2026-02-02T12:36:18.351Z" },
+    { url = "https://files.pythonhosted.org/packages/da/92/b25d2ec333615f5f284f3a4024f7ce68cfa0604c322c6808b2344c7f5d2b/jiter-0.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7e1d61da332ec412350463891923f960c3073cf1aae93b538f0bb4c8cd46efb", size = 350560, upload-time = "2026-02-02T12:36:19.746Z" },
+    { url = "https://files.pythonhosted.org/packages/be/ec/74dcb99fef0aca9fbe56b303bf79f6bd839010cb18ad41000bf6cc71eec0/jiter-0.13.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3097d665a27bc96fd9bbf7f86178037db139f319f785e4757ce7ccbf390db6c2", size = 363232, upload-time = "2026-02-02T12:36:21.243Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/37/f17375e0bb2f6a812d4dd92d7616e41917f740f3e71343627da9db2824ce/jiter-0.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d01ecc3a8cbdb6f25a37bd500510550b64ddf9f7d64a107d92f3ccb25035d0f", size = 483727, upload-time = "2026-02-02T12:36:22.688Z" },
+    { url = "https://files.pythonhosted.org/packages/77/d2/a71160a5ae1a1e66c1395b37ef77da67513b0adba73b993a27fbe47eb048/jiter-0.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed9bbc30f5d60a3bdf63ae76beb3f9db280d7f195dfcfa61af792d6ce912d159", size = 370799, upload-time = "2026-02-02T12:36:24.106Z" },
+    { url = "https://files.pythonhosted.org/packages/01/99/ed5e478ff0eb4e8aa5fd998f9d69603c9fd3f32de3bd16c2b1194f68361c/jiter-0.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98fbafb6e88256f4454de33c1f40203d09fc33ed19162a68b3b257b29ca7f663", size = 359120, upload-time = "2026-02-02T12:36:25.519Z" },
+    { url = "https://files.pythonhosted.org/packages/16/be/7ffd08203277a813f732ba897352797fa9493faf8dc7995b31f3d9cb9488/jiter-0.13.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5467696f6b827f1116556cb0db620440380434591e93ecee7fd14d1a491b6daa", size = 390664, upload-time = "2026-02-02T12:36:26.866Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/84/e0787856196d6d346264d6dcccb01f741e5f0bd014c1d9a2ebe149caf4f3/jiter-0.13.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2d08c9475d48b92892583df9da592a0e2ac49bcd41fae1fec4f39ba6cf107820", size = 513543, upload-time = "2026-02-02T12:36:28.217Z" },
+    { url = "https://files.pythonhosted.org/packages/65/50/ecbd258181c4313cf79bca6c88fb63207d04d5bf5e4f65174114d072aa55/jiter-0.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:aed40e099404721d7fcaf5b89bd3b4568a4666358bcac7b6b15c09fb6252ab68", size = 547262, upload-time = "2026-02-02T12:36:29.678Z" },
+    { url = "https://files.pythonhosted.org/packages/27/da/68f38d12e7111d2016cd198161b36e1f042bd115c169255bcb7ec823a3bf/jiter-0.13.0-cp313-cp313-win32.whl", hash = "sha256:36ebfbcffafb146d0e6ffb3e74d51e03d9c35ce7c625c8066cdbfc7b953bdc72", size = 200630, upload-time = "2026-02-02T12:36:31.808Z" },
+    { url = "https://files.pythonhosted.org/packages/25/65/3bd1a972c9a08ecd22eb3b08a95d1941ebe6938aea620c246cf426ae09c2/jiter-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:8d76029f077379374cf0dbc78dbe45b38dec4a2eb78b08b5194ce836b2517afc", size = 202602, upload-time = "2026-02-02T12:36:33.679Z" },
+    { url = "https://files.pythonhosted.org/packages/15/fe/13bd3678a311aa67686bb303654792c48206a112068f8b0b21426eb6851e/jiter-0.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:bb7613e1a427cfcb6ea4544f9ac566b93d5bf67e0d48c787eca673ff9c9dff2b", size = 185939, upload-time = "2026-02-02T12:36:35.065Z" },
+    { url = "https://files.pythonhosted.org/packages/49/19/a929ec002ad3228bc97ca01dbb14f7632fffdc84a95ec92ceaf4145688ae/jiter-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fa476ab5dd49f3bf3a168e05f89358c75a17608dbabb080ef65f96b27c19ab10", size = 316616, upload-time = "2026-02-02T12:36:36.579Z" },
+    { url = "https://files.pythonhosted.org/packages/52/56/d19a9a194afa37c1728831e5fb81b7722c3de18a3109e8f282bfc23e587a/jiter-0.13.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade8cb6ff5632a62b7dbd4757d8c5573f7a2e9ae285d6b5b841707d8363205ef", size = 346850, upload-time = "2026-02-02T12:36:38.058Z" },
+    { url = "https://files.pythonhosted.org/packages/36/4a/94e831c6bf287754a8a019cb966ed39ff8be6ab78cadecf08df3bb02d505/jiter-0.13.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9950290340acc1adaded363edd94baebcee7dabdfa8bee4790794cd5cfad2af6", size = 358551, upload-time = "2026-02-02T12:36:39.417Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/ec/a4c72c822695fa80e55d2b4142b73f0012035d9fcf90eccc56bc060db37c/jiter-0.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2b4972c6df33731aac0742b64fd0d18e0a69bc7d6e03108ce7d40c85fd9e3e6d", size = 201950, upload-time = "2026-02-02T12:36:40.791Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/00/393553ec27b824fbc29047e9c7cd4a3951d7fbe4a76743f17e44034fa4e4/jiter-0.13.0-cp313-cp313t-win_arm64.whl", hash = "sha256:701a1e77d1e593c1b435315ff625fd071f0998c5f02792038a5ca98899261b7d", size = 185852, upload-time = "2026-02-02T12:36:42.077Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/f5/f1997e987211f6f9bd71b8083047b316208b4aca0b529bb5f8c96c89ef3e/jiter-0.13.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:cc5223ab19fe25e2f0bf2643204ad7318896fe3729bf12fde41b77bfc4fafff0", size = 308804, upload-time = "2026-02-02T12:36:43.496Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/8f/5482a7677731fd44881f0204981ce2d7175db271f82cba2085dd2212e095/jiter-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9776ebe51713acf438fd9b4405fcd86893ae5d03487546dae7f34993217f8a91", size = 318787, upload-time = "2026-02-02T12:36:45.071Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/b9/7257ac59778f1cd025b26a23c5520a36a424f7f1b068f2442a5b499b7464/jiter-0.13.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:879e768938e7b49b5e90b7e3fecc0dbec01b8cb89595861fb39a8967c5220d09", size = 353880, upload-time = "2026-02-02T12:36:47.365Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/87/719eec4a3f0841dad99e3d3604ee4cba36af4419a76f3cb0b8e2e691ad67/jiter-0.13.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:682161a67adea11e3aae9038c06c8b4a9a71023228767477d683f69903ebc607", size = 366702, upload-time = "2026-02-02T12:36:48.871Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/65/415f0a75cf6921e43365a1bc227c565cb949caca8b7532776e430cbaa530/jiter-0.13.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a13b68cd1cd8cc9de8f244ebae18ccb3e4067ad205220ef324c39181e23bbf66", size = 486319, upload-time = "2026-02-02T12:36:53.006Z" },
+    { url = "https://files.pythonhosted.org/packages/54/a2/9e12b48e82c6bbc6081fd81abf915e1443add1b13d8fc586e1d90bb02bb8/jiter-0.13.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87ce0f14c6c08892b610686ae8be350bf368467b6acd5085a5b65441e2bf36d2", size = 372289, upload-time = "2026-02-02T12:36:54.593Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/c1/e4693f107a1789a239c759a432e9afc592366f04e901470c2af89cfd28e1/jiter-0.13.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c365005b05505a90d1c47856420980d0237adf82f70c4aff7aebd3c1cc143ad", size = 360165, upload-time = "2026-02-02T12:36:56.112Z" },
+    { url = "https://files.pythonhosted.org/packages/17/08/91b9ea976c1c758240614bd88442681a87672eebc3d9a6dde476874e706b/jiter-0.13.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1317fdffd16f5873e46ce27d0e0f7f4f90f0cdf1d86bf6abeaea9f63ca2c401d", size = 389634, upload-time = "2026-02-02T12:36:57.495Z" },
+    { url = "https://files.pythonhosted.org/packages/18/23/58325ef99390d6d40427ed6005bf1ad54f2577866594bcf13ce55675f87d/jiter-0.13.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:c05b450d37ba0c9e21c77fef1f205f56bcee2330bddca68d344baebfc55ae0df", size = 514933, upload-time = "2026-02-02T12:36:58.909Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/25/69f1120c7c395fd276c3996bb8adefa9c6b84c12bb7111e5c6ccdcd8526d/jiter-0.13.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:775e10de3849d0631a97c603f996f518159272db00fdda0a780f81752255ee9d", size = 548842, upload-time = "2026-02-02T12:37:00.433Z" },
+    { url = "https://files.pythonhosted.org/packages/18/05/981c9669d86850c5fbb0d9e62bba144787f9fba84546ba43d624ee27ef29/jiter-0.13.0-cp314-cp314-win32.whl", hash = "sha256:632bf7c1d28421c00dd8bbb8a3bac5663e1f57d5cd5ed962bce3c73bf62608e6", size = 202108, upload-time = "2026-02-02T12:37:01.718Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/96/cdcf54dd0b0341db7d25413229888a346c7130bd20820530905fdb65727b/jiter-0.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:f22ef501c3f87ede88f23f9b11e608581c14f04db59b6a801f354397ae13739f", size = 204027, upload-time = "2026-02-02T12:37:03.075Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/f9/724bcaaab7a3cd727031fe4f6995cb86c4bd344909177c186699c8dec51a/jiter-0.13.0-cp314-cp314-win_arm64.whl", hash = "sha256:07b75fe09a4ee8e0c606200622e571e44943f47254f95e2436c8bdcaceb36d7d", size = 187199, upload-time = "2026-02-02T12:37:04.414Z" },
+    { url = "https://files.pythonhosted.org/packages/62/92/1661d8b9fd6a3d7a2d89831db26fe3c1509a287d83ad7838831c7b7a5c7e/jiter-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:964538479359059a35fb400e769295d4b315ae61e4105396d355a12f7fef09f0", size = 318423, upload-time = "2026-02-02T12:37:05.806Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/3b/f77d342a54d4ebcd128e520fc58ec2f5b30a423b0fd26acdfc0c6fef8e26/jiter-0.13.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e104da1db1c0991b3eaed391ccd650ae8d947eab1480c733e5a3fb28d4313e40", size = 351438, upload-time = "2026-02-02T12:37:07.189Z" },
+    { url = "https://files.pythonhosted.org/packages/76/b3/ba9a69f0e4209bd3331470c723c2f5509e6f0482e416b612431a5061ed71/jiter-0.13.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e3a5f0cde8ff433b8e88e41aa40131455420fb3649a3c7abdda6145f8cb7202", size = 364774, upload-time = "2026-02-02T12:37:08.579Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/16/6cdb31fa342932602458dbb631bfbd47f601e03d2e4950740e0b2100b570/jiter-0.13.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57aab48f40be1db920a582b30b116fe2435d184f77f0e4226f546794cedd9cf0", size = 487238, upload-time = "2026-02-02T12:37:10.066Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/b1/956cc7abaca8d95c13aa8d6c9b3f3797241c246cd6e792934cc4c8b250d2/jiter-0.13.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7772115877c53f62beeb8fd853cab692dbc04374ef623b30f997959a4c0e7e95", size = 372892, upload-time = "2026-02-02T12:37:11.656Z" },
+    { url = "https://files.pythonhosted.org/packages/26/c4/97ecde8b1e74f67b8598c57c6fccf6df86ea7861ed29da84629cdbba76c4/jiter-0.13.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1211427574b17b633cfceba5040de8081e5abf114f7a7602f73d2e16f9fdaa59", size = 360309, upload-time = "2026-02-02T12:37:13.244Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/d7/eabe3cf46715854ccc80be2cd78dd4c36aedeb30751dbf85a1d08c14373c/jiter-0.13.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7beae3a3d3b5212d3a55d2961db3c292e02e302feb43fce6a3f7a31b90ea6dfe", size = 389607, upload-time = "2026-02-02T12:37:14.881Z" },
+    { url = "https://files.pythonhosted.org/packages/df/2d/03963fc0804e6109b82decfb9974eb92df3797fe7222428cae12f8ccaa0c/jiter-0.13.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e5562a0f0e90a6223b704163ea28e831bd3a9faa3512a711f031611e6b06c939", size = 514986, upload-time = "2026-02-02T12:37:16.326Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6c/8c83b45eb3eb1c1e18d841fe30b4b5bc5619d781267ca9bc03e005d8fd0a/jiter-0.13.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:6c26a424569a59140fb51160a56df13f438a2b0967365e987889186d5fc2f6f9", size = 548756, upload-time = "2026-02-02T12:37:17.736Z" },
+    { url = "https://files.pythonhosted.org/packages/47/66/eea81dfff765ed66c68fd2ed8c96245109e13c896c2a5015c7839c92367e/jiter-0.13.0-cp314-cp314t-win32.whl", hash = "sha256:24dc96eca9f84da4131cdf87a95e6ce36765c3b156fc9ae33280873b1c32d5f6", size = 201196, upload-time = "2026-02-02T12:37:19.101Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/32/4ac9c7a76402f8f00d00842a7f6b83b284d0cf7c1e9d4227bc95aa6d17fa/jiter-0.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0a8d76c7524087272c8ae913f5d9d608bd839154b62c4322ef65723d2e5bb0b8", size = 204215, upload-time = "2026-02-02T12:37:20.495Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/8e/7def204fea9f9be8b3c21a6f2dd6c020cf56c7d5ff753e0e23ed7f9ea57e/jiter-0.13.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2c26cf47e2cad140fa23b6d58d435a7c0161f5c514284802f25e87fddfe11024", size = 187152, upload-time = "2026-02-02T12:37:22.124Z" },
+    { url = "https://files.pythonhosted.org/packages/79/b3/3c29819a27178d0e461a8571fb63c6ae38be6dc36b78b3ec2876bbd6a910/jiter-0.13.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b1cbfa133241d0e6bdab48dcdc2604e8ba81512f6bbd68ec3e8e1357dd3c316c", size = 307016, upload-time = "2026-02-02T12:37:42.755Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/ae/60993e4b07b1ac5ebe46da7aa99fdbb802eb986c38d26e3883ac0125c4e0/jiter-0.13.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:db367d8be9fad6e8ebbac4a7578b7af562e506211036cba2c06c3b998603c3d2", size = 305024, upload-time = "2026-02-02T12:37:44.774Z" },
+    { url = "https://files.pythonhosted.org/packages/77/fa/2227e590e9cf98803db2811f172b2d6460a21539ab73006f251c66f44b14/jiter-0.13.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45f6f8efb2f3b0603092401dc2df79fa89ccbc027aaba4174d2d4133ed661434", size = 339337, upload-time = "2026-02-02T12:37:46.668Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/92/015173281f7eb96c0ef580c997da8ef50870d4f7f4c9e03c845a1d62ae04/jiter-0.13.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:597245258e6ad085d064780abfb23a284d418d3e61c57362d9449c6c7317ee2d", size = 346395, upload-time = "2026-02-02T12:37:48.09Z" },
+    { url = "https://files.pythonhosted.org/packages/80/60/e50fa45dd7e2eae049f0ce964663849e897300433921198aef94b6ffa23a/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:3d744a6061afba08dd7ae375dcde870cffb14429b7477e10f67e9e6d68772a0a", size = 305169, upload-time = "2026-02-02T12:37:50.376Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/73/a009f41c5eed71c49bec53036c4b33555afcdee70682a18c6f66e396c039/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:ff732bd0a0e778f43d5009840f20b935e79087b4dc65bd36f1cd0f9b04b8ff7f", size = 303808, upload-time = "2026-02-02T12:37:52.092Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/10/528b439290763bff3d939268085d03382471b442f212dca4ff5f12802d43/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab44b178f7981fcaea7e0a5df20e773c663d06ffda0198f1a524e91b2fde7e59", size = 337384, upload-time = "2026-02-02T12:37:53.582Z" },
+    { url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" },
+]
+
 [[package]]
 name = "librt"
 version = "0.8.1"
@@ -283,6 +772,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 
+[[package]]
+name = "msal"
+version = "1.35.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "pyjwt", extra = ["crypto"] },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3c/aa/5a646093ac218e4a329391d5a31e5092a89db7d2ef1637a90b82cd0b6f94/msal-1.35.1.tar.gz", hash = "sha256:70cac18ab80a053bff86219ba64cfe3da1f307c74b009e2da57ef040eb1b5656", size = 165658, upload-time = "2026-03-04T23:38:51.812Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/96/86/16815fddf056ca998853c6dc525397edf0b43559bb4073a80d2bc7fe8009/msal-1.35.1-py3-none-any.whl", hash = "sha256:8f4e82f34b10c19e326ec69f44dc6b30171f2f7098f3720ea8a9f0c11832caa3", size = 119909, upload-time = "2026-03-04T23:38:50.452Z" },
+]
+
+[[package]]
+name = "msal-extensions"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "msal" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315, upload-time = "2025-03-14T23:51:03.902Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" },
+]
+
 [[package]]
 name = "mypy"
 version = "1.19.1"
@@ -331,6 +846,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
 ]
 
+[[package]]
+name = "nodeenv"
+version = "1.10.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload-time = "2025-12-20T14:08:54.006Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" },
+]
+
+[[package]]
+name = "openai"
+version = "2.29.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b4/15/203d537e58986b5673e7f232453a2a2f110f22757b15921cbdeea392e520/openai-2.29.0.tar.gz", hash = "sha256:32d09eb2f661b38d3edd7d7e1a2943d1633f572596febe64c0cd370c86d52bec", size = 671128, upload-time = "2026-03-17T17:53:49.599Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d0/b1/35b6f9c8cf9318e3dbb7146cc82dab4cf61182a8d5406fc9b50864362895/openai-2.29.0-py3-none-any.whl", hash = "sha256:b7c5de513c3286d17c5e29b92c4c98ceaf0d775244ac8159aeb1bddf840eb42a", size = 1141533, upload-time = "2026-03-17T17:53:47.348Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "26.0"
@@ -349,6 +892,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" },
 ]
 
+[[package]]
+name = "platformdirs"
+version = "4.9.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/19/56/8d4c30c8a1d07013911a8fdbd8f89440ef9f08d07a1b50ab8ca8be5a20f9/platformdirs-4.9.4.tar.gz", hash = "sha256:1ec356301b7dc906d83f371c8f487070e99d3ccf9e501686456394622a01a934", size = 28737, upload-time = "2026-03-05T18:34:13.271Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl", hash = "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868", size = 21216, upload-time = "2026-03-05T18:34:12.172Z" },
+]
+
 [[package]]
 name = "pluggy"
 version = "1.6.0"
@@ -358,6 +910,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
+[[package]]
+name = "pre-commit"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cfgv" },
+    { name = "identify" },
+    { name = "nodeenv" },
+    { name = "pyyaml" },
+    { name = "virtualenv" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/f1/6d86a29246dfd2e9b6237f0b5823717f60cad94d47ddc26afa916d21f525/pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61", size = 198232, upload-time = "2025-12-16T21:14:33.552Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" },
+]
+
+[[package]]
+name = "pycparser"
+version = "3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" },
+]
+
 [[package]]
 name = "pydantic"
 version = "2.12.5"
@@ -479,6 +1056,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
 ]
 
+[[package]]
+name = "pyjwt"
+version = "2.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" },
+]
+
+[package.optional-dependencies]
+crypto = [
+    { name = "cryptography" },
+]
+
 [[package]]
 name = "pytest"
 version = "9.0.2"
@@ -522,6 +1113,89 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
 ]
 
+[[package]]
+name = "python-discovery"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "platformdirs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9c/90/bcce6b46823c9bec1757c964dc37ed332579be512e17a30e9698095dcae4/python_discovery-1.2.0.tar.gz", hash = "sha256:7d33e350704818b09e3da2bd419d37e21e7c30db6e0977bb438916e06b41b5b1", size = 58055, upload-time = "2026-03-19T01:43:08.248Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/3c/2005227cb951df502412de2fa781f800663cccbef8d90ec6f1b371ac2c0d/python_discovery-1.2.0-py3-none-any.whl", hash = "sha256:1e108f1bbe2ed0ef089823d28805d5ad32be8e734b86a5f212bf89b71c266e4a", size = 31524, upload-time = "2026-03-19T01:43:07.045Z" },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" },
+    { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" },
+    { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" },
+    { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" },
+    { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" },
+    { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
+    { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
+    { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
+    { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
+    { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
+    { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
+    { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
+    { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
+    { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
+    { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
+    { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
+    { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
+    { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
+    { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+]
+
 [[package]]
 name = "rich"
 version = "14.3.3"
@@ -578,6 +1252,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
 ]
 
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+]
+
 [[package]]
 name = "tomli"
 version = "2.4.0"
@@ -632,6 +1315,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" },
 ]
 
+[[package]]
+name = "tqdm"
+version = "4.67.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
+]
+
 [[package]]
 name = "typer"
 version = "0.24.1"
@@ -667,3 +1362,27 @@ sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac
 wheels = [
     { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
 ]
+
+[[package]]
+name = "urllib3"
+version = "2.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
+]
+
+[[package]]
+name = "virtualenv"
+version = "21.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "distlib" },
+    { name = "filelock" },
+    { name = "platformdirs" },
+    { name = "python-discovery" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/aa/92/58199fe10049f9703c2666e809c4f686c54ef0a68b0f6afccf518c0b1eb9/virtualenv-21.2.0.tar.gz", hash = "sha256:1720dc3a62ef5b443092e3f499228599045d7fea4c79199770499df8becf9098", size = 5840618, upload-time = "2026-03-09T17:24:38.013Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/59/7d02447a55b2e55755011a647479041bc92a82e143f96a8195cb33bd0a1c/virtualenv-21.2.0-py3-none-any.whl", hash = "sha256:1bd755b504931164a5a496d217c014d098426cddc79363ad66ac78125f9d908f", size = 5825084, upload-time = "2026-03-09T17:24:35.378Z" },
+]


{'Baseline' if i == 0 else f'Run {i}'}	{_status_badge(True)} ({pct}% · {p}/{t})	{_status_badge(False)} ({pct}% · {p}/{t})	{_html_escape(getter(r))}
{field}
Evaluator	Target
{_fmt(v)}	{inner}	{inner}
{_html_escape(mr.name)}	{_html_escape(target)}