Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
aff4343
Simplify run-eval workflow by removing polling logic
openhands-agent Nov 25, 2025
edeab93
Add eval_branch parameter for testing feature branches
openhands-agent Nov 25, 2025
12b9613
Move model configs to SDK and pass full configs to evaluation
openhands-agent Nov 25, 2025
7b1bc9a
Add authorization validation for workflow_dispatch
openhands-agent Nov 26, 2025
fd7cc3d
Add trigger_reason propagation to evaluation workflow
openhands-agent Nov 26, 2025
c64080a
Remove model stubs JSON and use models.json as single source of truth
openhands-agent Nov 26, 2025
e7c1787
Address code review comments
openhands-agent Nov 26, 2025
8ecec55
Add tests for find_models_by_id() function
openhands-agent Nov 26, 2025
a00a3ed
change file name
simonrosenberg Nov 26, 2025
1eb3c5c
Merge branch 'main' into openhands/orchestration-refactor
simonrosenberg Nov 26, 2025
d17b85d
Implement evaluation workflow improvements
openhands-agent Nov 26, 2025
644805b
Add benchmarks_branch parameter to support feature branch testing
openhands-agent Nov 26, 2025
7782d48
Fix module name in workflow from resolve_model_configs to resolve_mod…
openhands-agent Nov 26, 2025
fc143ed
Rename resolve_model_configs.py to resolve_model_config.py for consis…
openhands-agent Nov 26, 2025
36755b7
update step name for clarity
simonrosenberg Nov 27, 2025
df3d693
Merge branch 'main' into openhands/orchestration-refactor
simonrosenberg Nov 27, 2025
45afbc3
Add temporary pr_number input to workflow_dispatch for testing PR com…
simonrosenberg Nov 27, 2025
31a5464
Rename test file to match module name
simonrosenberg Nov 28, 2025
a64cf88
Simplify SDK SHA resolution and remove temporary parameters
simonrosenberg Nov 28, 2025
87d29d5
Fix initial checkout to handle short SHA references
simonrosenberg Nov 28, 2025
e5bbafe
Add temporary pr_number input for testing clickable links
simonrosenberg Nov 28, 2025
506abb6
Revert "Add temporary pr_number input for testing clickable links"
simonrosenberg Dec 1, 2025
9898814
Merge branch 'main' into openhands/orchestration-refactor
simonrosenberg Dec 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions .github/run-eval/allowed-model-stubs.json

This file was deleted.

14 changes: 0 additions & 14 deletions .github/run-eval/authorized-labelers.txt

This file was deleted.

112 changes: 112 additions & 0 deletions .github/run-eval/resolve_model_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""
Resolve model IDs to full model configurations.

Reads:
- MODEL_IDS: comma-separated model IDs

Outputs to GITHUB_OUTPUT:
- models_json: JSON array of full model configs with display names
"""

import json
import os
import sys


# Model configurations dictionary
MODELS = {
"claude-sonnet-4-5-20250929": {
"id": "claude-sonnet-4-5-20250929",
"display_name": "Claude Sonnet 4.5",
"llm_config": {
"model": "litellm_proxy/claude-sonnet-4-5-20250929",
"temperature": 0.0,
},
},
"claude-haiku-4-5-20251001": {
"id": "claude-haiku-4-5-20251001",
"display_name": "Claude Haiku 4.5",
"llm_config": {
"model": "litellm_proxy/claude-haiku-4-5-20251001",
"temperature": 0.0,
},
},
"gpt-5-mini-2025-08-07": {
"id": "gpt-5-mini-2025-08-07",
"display_name": "GPT-5 Mini",
"llm_config": {
"model": "litellm_proxy/gpt-5-mini-2025-08-07",
"temperature": 1.0,
},
},
"deepseek-chat": {
"id": "deepseek-chat",
"display_name": "DeepSeek Chat",
"llm_config": {"model": "litellm_proxy/deepseek/deepseek-chat"},
},
"kimi-k2-thinking": {
"id": "kimi-k2-thinking",
"display_name": "Kimi K2 Thinking",
"llm_config": {"model": "litellm_proxy/moonshot/kimi-k2-thinking"},
},
}


def error_exit(msg: str, exit_code: int = 1) -> None:
"""Print error message and exit."""
print(f"ERROR: {msg}", file=sys.stderr)
sys.exit(exit_code)


def get_required_env(key: str) -> str:
"""Get required environment variable or exit with error."""
value = os.environ.get(key)
if not value:
error_exit(f"{key} not set")
return value


def find_models_by_id(model_ids: list[str]) -> list[dict]:
"""Find models by ID. Fails fast on missing ID.

Args:
model_ids: List of model IDs to find

Returns:
List of model dictionaries matching the IDs

Raises:
SystemExit: If any model ID is not found
"""
resolved = []
for model_id in model_ids:
if model_id not in MODELS:
available = ", ".join(sorted(MODELS.keys()))
error_exit(
f"Model ID '{model_id}' not found. Available models: {available}"
)
resolved.append(MODELS[model_id])
return resolved


def main() -> None:
model_ids_str = get_required_env("MODEL_IDS")
github_output = get_required_env("GITHUB_OUTPUT")

# Parse requested model IDs
model_ids = [mid.strip() for mid in model_ids_str.split(",") if mid.strip()]

# Resolve model configs
resolved = find_models_by_id(model_ids)

# Output as JSON
models_json = json.dumps(resolved, separators=(",", ":"))
with open(github_output, "a", encoding="utf-8") as f:
f.write(f"models_json={models_json}\n")

print(f"Resolved {len(resolved)} model(s): {', '.join(model_ids)}")


if __name__ == "__main__":
main()
Loading
Loading