From e7e8ad4fa25d349fa699dee793e7ac41b6132f24 Mon Sep 17 00:00:00 2001 From: "user.email" Date: Sun, 26 Apr 2026 17:19:16 -0700 Subject: [PATCH 1/3] Allow running multiple domains --- src/eva/cli.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 91 insertions(+), 6 deletions(-) diff --git a/src/eva/cli.py b/src/eva/cli.py index 94c1ca95..d91265b2 100644 --- a/src/eva/cli.py +++ b/src/eva/cli.py @@ -5,26 +5,111 @@ """ import asyncio +import os import sys from pydantic import ValidationError +def _extract_domain_spec() -> tuple[str | None, bool]: + """Return (raw_domain_spec, came_from_argv). + + Looks at --domain / --domain=... in sys.argv first, then EVA_DOMAIN env var. + """ + argv = sys.argv + for i, arg in enumerate(argv): + if arg == "--domain" and i + 1 < len(argv): + return argv[i + 1], True + if arg.startswith("--domain="): + return arg.split("=", 1)[1], True + env = os.environ.get("EVA_DOMAIN") + if env is not None: + return env, False + return None, False + + +def _strip_domain_from_argv() -> None: + """Remove --domain (and its value) from sys.argv in place.""" + argv = sys.argv + out = [argv[0]] + i = 1 + while i < len(argv): + a = argv[i] + if a == "--domain": + i += 2 + continue + if a.startswith("--domain="): + i += 1 + continue + out.append(a) + i += 1 + sys.argv = out + + +def _has_explicit_run_id() -> bool: + return any(a == "--run-id" or a.startswith("--run-id=") for a in sys.argv) or "EVA_RUN_ID" in os.environ + + def main(): """Entry point for the `eva` console script.""" # Import config first (lightweight) for fast --help and validation errors. # Heavy deps (pipecat, litellm, etc.) are imported only in run_benchmark. from eva.models.config import RunConfig - try: - config = RunConfig(_cli_parse_args=True, _env_file=".env") - except ValidationError as e: - print(e, file=sys.stderr) - sys.exit(1) + spec, _ = _extract_domain_spec() + domains = [d.strip() for d in spec.split(",")] if spec and "," in spec else None + + if domains is None: + # Single-domain path — unchanged behavior. + try: + config = RunConfig(_cli_parse_args=True, _env_file=".env") + except ValidationError as e: + print(e, file=sys.stderr) + sys.exit(1) + + from eva.run_benchmark import run_benchmark + + sys.exit(asyncio.run(run_benchmark(config))) + + # Multi-domain path: loop, one RunConfig + run_benchmark per domain. + # Dedupe preserving order, drop empties. + seen: set[str] = set() + ordered: list[str] = [] + for d in domains: + if d and d not in seen: + seen.add(d) + ordered.append(d) + + explicit_run_id = _has_explicit_run_id() + original_env = os.environ.get("EVA_DOMAIN") + _strip_domain_from_argv() # prevent pydantic-settings from re-reading the comma list from eva.run_benchmark import run_benchmark - sys.exit(asyncio.run(run_benchmark(config))) + worst_exit = 0 + try: + for domain in ordered: + os.environ["EVA_DOMAIN"] = domain + try: + config = RunConfig(_cli_parse_args=True, _env_file=".env") + except ValidationError as e: + print(f"[domain={domain}] {e}", file=sys.stderr) + worst_exit = max(worst_exit, 1) + continue + + if explicit_run_id: + config.run_id = f"{config.run_id}_{domain}" + + print(f"\n=== Running domain: {domain} ===\n", file=sys.stderr) + exit_code = asyncio.run(run_benchmark(config)) + worst_exit = max(worst_exit, exit_code) + finally: + if original_env is None: + os.environ.pop("EVA_DOMAIN", None) + else: + os.environ["EVA_DOMAIN"] = original_env + + sys.exit(worst_exit) if __name__ == "__main__": From 1240f465f9f0ec3a2fcace9ca29aaee7643a3db8 Mon Sep 17 00:00:00 2001 From: "user.email" Date: Sun, 26 Apr 2026 17:19:26 -0700 Subject: [PATCH 2/3] Fix name of prompt --- configs/prompts/simulation.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/prompts/simulation.yaml b/configs/prompts/simulation.yaml index 27d429dc..3f646848 100644 --- a/configs/prompts/simulation.yaml +++ b/configs/prompts/simulation.yaml @@ -359,7 +359,7 @@ user_simulator: IMPORTANT: Before ending the conversation, confirm with the agent that there are no outstanding actions. The end_call tool should only be called in a turn that is a brief goodbye — never in the same turn where you are providing the agent with data, an identifier, a request to transfer to a live agent, an approval to proceed, or any kind of additional information. - system_prompt_hr: | + system_prompt_medical_hr: | You are an employee or credentialed provider at a medical organization calling HR to complete an administrative task. You are communicating through a voice channel. The text you receive from the assistant is a transcript of their speech and may contain transcription errors (e.g., misheard words, garbled phrases). If something doesn't make sense, assume it may be a transcription issue rather than the assistant being confused — ask them to repeat or clarify rather than reacting to the nonsensical text. From d17485ad48cb7c552b97629eecbe16541b389794 Mon Sep 17 00:00:00 2001 From: "user.email" Date: Sun, 26 Apr 2026 17:58:46 -0700 Subject: [PATCH 3/3] Bump project version for new runs --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 715dec36..fd91e6d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "eva" -version = "0.1.2" +version = "0.1.3" description = "A New End-to-end Framework for Evaluating Voice Agents (EVA)" authors = [ { name = "Tara Bogavelli" },