From e7e8ad4fa25d349fa699dee793e7ac41b6132f24 Mon Sep 17 00:00:00 2001
From: "user.email" <tara.bogavelli@servicenow.com>
Date: Sun, 26 Apr 2026 17:19:16 -0700
Subject: [PATCH 1/3] Allow running multiple domains

---
 src/eva/cli.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 91 insertions(+), 6 deletions(-)

diff --git a/src/eva/cli.py b/src/eva/cli.py
index 94c1ca95..d91265b2 100644
--- a/src/eva/cli.py
+++ b/src/eva/cli.py
@@ -5,26 +5,111 @@
 """
 
 import asyncio
+import os
 import sys
 
 from pydantic import ValidationError
 
 
+def _extract_domain_spec() -> tuple[str | None, bool]:
+    """Return (raw_domain_spec, came_from_argv).
+
+    Looks at --domain / --domain=... in sys.argv first, then EVA_DOMAIN env var.
+    """
+    argv = sys.argv
+    for i, arg in enumerate(argv):
+        if arg == "--domain" and i + 1 < len(argv):
+            return argv[i + 1], True
+        if arg.startswith("--domain="):
+            return arg.split("=", 1)[1], True
+    env = os.environ.get("EVA_DOMAIN")
+    if env is not None:
+        return env, False
+    return None, False
+
+
+def _strip_domain_from_argv() -> None:
+    """Remove --domain (and its value) from sys.argv in place."""
+    argv = sys.argv
+    out = [argv[0]]
+    i = 1
+    while i < len(argv):
+        a = argv[i]
+        if a == "--domain":
+            i += 2
+            continue
+        if a.startswith("--domain="):
+            i += 1
+            continue
+        out.append(a)
+        i += 1
+    sys.argv = out
+
+
+def _has_explicit_run_id() -> bool:
+    return any(a == "--run-id" or a.startswith("--run-id=") for a in sys.argv) or "EVA_RUN_ID" in os.environ
+
+
 def main():
     """Entry point for the `eva` console script."""
     # Import config first (lightweight) for fast --help and validation errors.
     # Heavy deps (pipecat, litellm, etc.) are imported only in run_benchmark.
     from eva.models.config import RunConfig
 
-    try:
-        config = RunConfig(_cli_parse_args=True, _env_file=".env")
-    except ValidationError as e:
-        print(e, file=sys.stderr)
-        sys.exit(1)
+    spec, _ = _extract_domain_spec()
+    domains = [d.strip() for d in spec.split(",")] if spec and "," in spec else None
+
+    if domains is None:
+        # Single-domain path — unchanged behavior.
+        try:
+            config = RunConfig(_cli_parse_args=True, _env_file=".env")
+        except ValidationError as e:
+            print(e, file=sys.stderr)
+            sys.exit(1)
+
+        from eva.run_benchmark import run_benchmark
+
+        sys.exit(asyncio.run(run_benchmark(config)))
+
+    # Multi-domain path: loop, one RunConfig + run_benchmark per domain.
+    # Dedupe preserving order, drop empties.
+    seen: set[str] = set()
+    ordered: list[str] = []
+    for d in domains:
+        if d and d not in seen:
+            seen.add(d)
+            ordered.append(d)
+
+    explicit_run_id = _has_explicit_run_id()
+    original_env = os.environ.get("EVA_DOMAIN")
+    _strip_domain_from_argv()  # prevent pydantic-settings from re-reading the comma list
 
     from eva.run_benchmark import run_benchmark
 
-    sys.exit(asyncio.run(run_benchmark(config)))
+    worst_exit = 0
+    try:
+        for domain in ordered:
+            os.environ["EVA_DOMAIN"] = domain
+            try:
+                config = RunConfig(_cli_parse_args=True, _env_file=".env")
+            except ValidationError as e:
+                print(f"[domain={domain}] {e}", file=sys.stderr)
+                worst_exit = max(worst_exit, 1)
+                continue
+
+            if explicit_run_id:
+                config.run_id = f"{config.run_id}_{domain}"
+
+            print(f"\n=== Running domain: {domain} ===\n", file=sys.stderr)
+            exit_code = asyncio.run(run_benchmark(config))
+            worst_exit = max(worst_exit, exit_code)
+    finally:
+        if original_env is None:
+            os.environ.pop("EVA_DOMAIN", None)
+        else:
+            os.environ["EVA_DOMAIN"] = original_env
+
+    sys.exit(worst_exit)
 
 
 if __name__ == "__main__":

From 1240f465f9f0ec3a2fcace9ca29aaee7643a3db8 Mon Sep 17 00:00:00 2001
From: "user.email" <tara.bogavelli@servicenow.com>
Date: Sun, 26 Apr 2026 17:19:26 -0700
Subject: [PATCH 2/3] Fix name of prompt

---
 configs/prompts/simulation.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/prompts/simulation.yaml b/configs/prompts/simulation.yaml
index 27d429dc..3f646848 100644
--- a/configs/prompts/simulation.yaml
+++ b/configs/prompts/simulation.yaml
@@ -359,7 +359,7 @@ user_simulator:
     
     IMPORTANT: Before ending the conversation, confirm with the agent that there are no outstanding actions. The end_call tool should only be called in a turn that is a brief goodbye — never in the same turn where you are providing the agent with data, an identifier, a request to transfer to a live agent, an approval to proceed, or any kind of additional information.
 
-  system_prompt_hr: |
+  system_prompt_medical_hr: |
     You are an employee or credentialed provider at a medical organization calling HR to complete an administrative task.
 
     You are communicating through a voice channel. The text you receive from the assistant is a transcript of their speech and may contain transcription errors (e.g., misheard words, garbled phrases). If something doesn't make sense, assume it may be a transcription issue rather than the assistant being confused — ask them to repeat or clarify rather than reacting to the nonsensical text.

From d17485ad48cb7c552b97629eecbe16541b389794 Mon Sep 17 00:00:00 2001
From: "user.email" <tara.bogavelli@servicenow.com>
Date: Sun, 26 Apr 2026 17:58:46 -0700
Subject: [PATCH 3/3] Bump project version for new runs

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 715dec36..fd91e6d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "eva"
-version = "0.1.2"
+version = "0.1.3"
 description = "A New End-to-end Framework for Evaluating Voice Agents (EVA)"
 authors = [
     { name = "Tara Bogavelli" },