From cd59f0e69c30433779ab2d8c03ec9c859c10d1eb Mon Sep 17 00:00:00 2001
From: ethenotethan <42627790+ethenotethan@users.noreply.github.com>
Date: Mon, 20 Apr 2026 15:30:04 +0700
Subject: [PATCH] Make LLM backend any OpenAI-compatible endpoint

Drops the hardcoded OpenRouter base URL and Claude Agent SDK framing in
favor of generic OpenAI Chat Completions config (OPENAI_API_KEY,
OPENAI_BASE_URL, OPENAI_MODEL), so Flashlight works against OpenAI,
OpenRouter, vLLM, LM Studio, Ollama, and anything else that speaks the
same protocol + tool-calling schema.

- agent/burr_app.py: parametrize base URL, rename client helper to
  _chat_completion (avoid collision with the Burr call_llm action),
  only emit OpenRouter attribution headers when routing through
  openrouter.ai, default model -> gpt-4o-mini
- agent/cli.py, agent/agent.py: swap API-key checks to OPENAI_API_KEY
  with provider-agnostic error messages
- .env.example: document OPENAI_* vars + per-provider examples
- README.md: drop Claude Agent SDK references and add an LLM
  configuration section with an env-var table and example configs
---
 .env.example      | 41 ++++++++++++++++++-------
 README.md         | 49 +++++++++++++++++++++++-------
 agent/agent.py    |  9 ++++--
 agent/burr_app.py | 76 +++++++++++++++++++++++++++++++----------------
 agent/cli.py      | 10 +++++--
 5 files changed, 132 insertions(+), 53 deletions(-)

diff --git a/.env.example b/.env.example
index e8ac797..51cec64 100644
--- a/.env.example
+++ b/.env.example
@@ -1,21 +1,40 @@
-# OpenRouter API Configuration
-# Get your key at: https://openrouter.ai/keys
-OPENROUTER_API_KEY=your_api_key_here
+# Flashlight LLM configuration
+#
+# Flashlight speaks the OpenAI Chat Completions protocol, so it works with
+# any OpenAI-compatible endpoint — OpenAI, OpenRouter, vLLM, LM Studio,
+# Ollama, Together, Groq, etc.
+# ---------------------------------------------------------------------------
 
-# Model Configuration
-# OpenRouter model identifier (see https://openrouter.ai/models)
-# Examples: anthropic/claude-sonnet-4, anthropic/claude-3.5-sonnet, openai/gpt-4o
-OPENROUTER_MODEL=anthropic/claude-sonnet-4
+# Required: bearer token for the target endpoint.
+OPENAI_API_KEY=your_api_key_here
 
-# Logging Configuration (optional)
-# Enable verbose logging to see detailed interactions in the terminal
+# Optional: OpenAI-compatible base URL. Defaults to https://api.openai.com/v1.
+# Examples:
+#   OpenAI        https://api.openai.com/v1
+#   OpenRouter    https://openrouter.ai/api/v1
+#   vLLM (local)  http://localhost:8000/v1
+#   LM Studio     http://localhost:1234/v1
+#   Ollama        http://localhost:11434/v1
+# OPENAI_BASE_URL=https://api.openai.com/v1
+
+# Optional: model identifier. Defaults to gpt-4o-mini.
+# Must be a model served by whichever endpoint you chose above.
+# Examples: gpt-4o, gpt-4o-mini, anthropic/claude-sonnet-4 (OpenRouter),
+#           meta-llama/Llama-3.1-70B-Instruct (vLLM/Together), ...
+# OPENAI_MODEL=gpt-4o-mini
+
+# ---------------------------------------------------------------------------
+# Logging (optional)
+# ---------------------------------------------------------------------------
+
+# Enable verbose logging to see detailed interactions in the terminal.
 # AGENT_VERBOSE=true
 
-# Enable debug logging for full trace-level logging
+# Enable debug logging for full trace-level logging.
 # AGENT_DEBUG=true
 
 # When verbose/debug mode is enabled, you'll see:
-# - API requests via OpenRouter
+# - LLM API requests and responses
 # - Subagent spawning and lifecycle
 # - Tool calls with parameters
 # - Tool results and success/failure status
diff --git a/README.md b/README.md
index 33fadca..680c6e9 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # GitHub Flashlight
 
-A sophisticated multi-agent processing pipeline using the Claude Agent SDK that performs dependency-aware codebase analysis and visualization through multi-agent composition.
+A multi-agent processing pipeline that performs dependency-aware codebase analysis and visualization. Built on [Burr](https://github.com/apache/burr) for explicit state-machine orchestration, and speaks the OpenAI Chat Completions protocol — so it works with **any OpenAI-compatible endpoint** (OpenAI, OpenRouter, vLLM, LM Studio, Ollama, Together, Groq, …).
 
 ## Features
 
@@ -117,9 +117,37 @@ source venv/bin/activate  # On Windows: venv\Scripts\activate
 # Install dependencies
 pip install -e .
 
-# Set up API key
+# Configure your LLM endpoint
 cp .env.example .env
-# Edit .env and add your ANTHROPIC_API_KEY
+# Edit .env and set OPENAI_API_KEY (and optionally OPENAI_BASE_URL / OPENAI_MODEL)
+```
+
+### LLM configuration
+
+Flashlight uses the OpenAI Chat Completions API, so any OpenAI-compatible provider works. Set:
+
+| Variable | Required | Default | Notes |
+|----------|----------|---------|-------|
+| `OPENAI_API_KEY` | yes | — | Bearer token for the target endpoint |
+| `OPENAI_BASE_URL` | no | `https://api.openai.com/v1` | Point at OpenAI, OpenRouter, vLLM, LM Studio, Ollama, etc. |
+| `OPENAI_MODEL` | no | `gpt-4o-mini` | Any model served by your chosen endpoint |
+
+Example configurations:
+
+```bash
+# OpenAI
+OPENAI_API_KEY=sk-...
+OPENAI_MODEL=gpt-4o
+
+# OpenRouter (access to Claude, Llama, etc. via one API)
+OPENAI_API_KEY=sk-or-...
+OPENAI_BASE_URL=https://openrouter.ai/api/v1
+OPENAI_MODEL=anthropic/claude-sonnet-4
+
+# Local vLLM / LM Studio / Ollama
+OPENAI_API_KEY=not-needed-but-must-be-set
+OPENAI_BASE_URL=http://localhost:8000/v1
+OPENAI_MODEL=meta-llama/Llama-3.1-70B-Instruct
 ```
 
 ## Usage
@@ -150,12 +178,11 @@ AGENT_DEBUG=true python -m github_flashlight.agent
 ```
 
 When enabled, you'll see real-time information about:
-- 📤 API requests to Claude
-- 📥 API responses
-- 🚀 Subagent spawning and lifecycle
-- 🔧 Tool calls with parameters
-- ✅ Tool results and success/failure status
-- 📝 Agent context and model information
+- LLM API requests and responses
+- Subagent spawning and lifecycle
+- Tool calls with parameters
+- Tool results and success/failure status
+- Agent context and model information
 
 This is useful for:
 - Understanding what the agents are doing in real-time
@@ -232,7 +259,7 @@ The agent will:
 ## Requirements
 
 - Python 3.10+
-- Claude API key
+- An API key for an OpenAI-compatible endpoint (OpenAI, OpenRouter, a self-hosted vLLM/LM Studio/Ollama server, etc.)
 - Access to the codebase to analyze
 
 ## Development
@@ -287,7 +314,7 @@ The primary leader orchestrates a sophisticated multi-phase workflow:
 
 ## Contributing
 
-This project showcases the Claude Agent SDK's multi-agent composition capabilities. Feel free to extend it with:
+This project showcases dependency-aware multi-agent composition over the OpenAI Chat Completions protocol. Feel free to extend it with:
 - Additional language support (Java, C#, etc.)
 - Enhanced metrics collection (LOC, complexity, test coverage)
 - Incremental analysis for large repositories
diff --git a/agent/agent.py b/agent/agent.py
index 3034fd5..8f2e0de 100644
--- a/agent/agent.py
+++ b/agent/agent.py
@@ -55,10 +55,13 @@ def analyze(service_name: str):
         service_name: Name of the service (must have discovery files in /tmp/{service_name}/)
     """
     # Check API key first
-    if not os.environ.get("OPENROUTER_API_KEY"):
-        print("\nError: OPENROUTER_API_KEY not found.")
+    if not os.environ.get("OPENAI_API_KEY"):
+        print("\nError: OPENAI_API_KEY not found.")
         print("Set it in a .env file or export it in your shell.")
-        print("Get your key at: https://openrouter.ai/keys\n")
+        print(
+            "Flashlight works with any OpenAI-compatible endpoint; point "
+            "OPENAI_BASE_URL at your provider (defaults to https://api.openai.com/v1).\n"
+        )
         return
 
     # Setup session directory and transcript
diff --git a/agent/burr_app.py b/agent/burr_app.py
index a7b963c..0e34292 100644
--- a/agent/burr_app.py
+++ b/agent/burr_app.py
@@ -45,18 +45,35 @@
 
 # ---------------------------------------------------------------------------
 # Configuration
+#
+# Flashlight speaks the OpenAI Chat Completions protocol, so it works with
+# any OpenAI-compatible endpoint (OpenAI, OpenRouter, vLLM, LM Studio, Ollama,
+# Together, Groq, etc.).
+#
+# Required:
+#     OPENAI_API_KEY   - bearer token for the target endpoint
+#
+# Optional:
+#     OPENAI_BASE_URL  - defaults to https://api.openai.com/v1
+#     OPENAI_MODEL     - defaults to gpt-4o-mini
 # ---------------------------------------------------------------------------
 
-OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
-DEFAULT_MODEL = os.environ.get("OPENROUTER_MODEL", "anthropic/claude-sonnet-4")
+DEFAULT_BASE_URL = "https://api.openai.com/v1"
+DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
+
+
+def get_base_url() -> str:
+    """Return the OpenAI-compatible API base URL."""
+    return os.environ.get("OPENAI_BASE_URL", DEFAULT_BASE_URL).rstrip("/")
 
 
 def get_api_key() -> str:
-    """Get OpenRouter API key from environment."""
-    key = os.environ.get("OPENROUTER_API_KEY", "")
+    """Return the OpenAI-compatible API key."""
+    key = os.environ.get("OPENAI_API_KEY", "")
     if not key:
         raise RuntimeError(
-            "OPENROUTER_API_KEY not set. Get one at https://openrouter.ai/keys"
+            "OPENAI_API_KEY not set. Point OPENAI_BASE_URL at your provider "
+            "(OpenAI, OpenRouter, vLLM, LM Studio, ...) and set OPENAI_API_KEY."
         )
     return key
 
@@ -503,7 +520,7 @@ def subagent_call_llm(state: State) -> State:
 
         api_messages = [{"role": "system", "content": sys_prompt}] + messages
 
-        response = call_openrouter(
+        response = _chat_completion(
             messages=api_messages,
             tools=subagent_tools,
         )
@@ -687,7 +704,7 @@ def _run_subagent_loop(
         )
 
         try:
-            response = call_openrouter(
+            response = _chat_completion(
                 messages=api_messages,
                 tools=subagent_tools,
             )
@@ -773,11 +790,11 @@ def _run_subagent_loop(
 
 
 # ---------------------------------------------------------------------------
-# OpenRouter LLM client
+# OpenAI-compatible LLM client
 # ---------------------------------------------------------------------------
 
 
-def call_openrouter(
+def _chat_completion(
     messages: List[Dict[str, Any]],
     model: str = DEFAULT_MODEL,
     tools: Optional[List[Dict]] = None,
@@ -786,7 +803,11 @@ def call_openrouter(
     max_retries: int = 3,
     initial_retry_delay: float = 2.0,
 ) -> Dict[str, Any]:
-    """Call OpenRouter API and return the response with retry logic.
+    """Call an OpenAI-compatible Chat Completions endpoint with retry logic.
+
+    Works with any OpenAI-compatible API (OpenAI, OpenRouter, vLLM, LM Studio,
+    Ollama, Together, Groq, ...). Base URL is resolved from OPENAI_BASE_URL
+    and defaults to https://api.openai.com/v1.
 
     Implements exponential backoff for transient failures:
     - Timeouts (httpx.TimeoutException)
@@ -814,6 +835,7 @@ def call_openrouter(
         RuntimeError: On exhausted retries
     """
     api_key = get_api_key()
+    base_url = get_base_url()
 
     payload = {
         "model": model,
@@ -826,9 +848,12 @@ def call_openrouter(
     headers = {
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json",
-        "HTTP-Referer": "https://github.com/anthropics/flashlight",
-        "X-Title": "flashlight",
     }
+    # OpenRouter-specific attribution headers (harmless but only meaningful
+    # when actually routing through openrouter.ai).
+    if "openrouter.ai" in base_url:
+        headers["HTTP-Referer"] = "https://github.com/anthropics/flashlight"
+        headers["X-Title"] = "flashlight"
 
     last_exception: Optional[Exception] = None
     retry_delay = initial_retry_delay
@@ -837,7 +862,7 @@ def call_openrouter(
         try:
             with httpx.Client(timeout=timeout) as client:
                 response = client.post(
-                    f"{OPENROUTER_BASE_URL}/chat/completions",
+                    f"{base_url}/chat/completions",
                     json=payload,
                     headers=headers,
                 )
@@ -892,7 +917,7 @@ def call_openrouter(
                     "timeout" if is_timeout else f"HTTP {e.response.status_code}"
                 )
                 logger.warning(
-                    f"OpenRouter request failed ({error_type}), "
+                    f"LLM request failed ({error_type}), "
                     f"retrying in {retry_delay:.1f}s (attempt {attempt + 1}/{max_retries + 1})"
                 )
                 time.sleep(retry_delay)
@@ -906,7 +931,7 @@ def call_openrouter(
             last_exception = e
             if attempt < max_retries:
                 logger.warning(
-                    f"OpenRouter request failed (network error: {e}), "
+                    f"LLM request failed (network error: {e}), "
                     f"retrying in {retry_delay:.1f}s (attempt {attempt + 1}/{max_retries + 1})"
                 )
                 time.sleep(retry_delay)
@@ -917,7 +942,7 @@ def call_openrouter(
 
     # Should not reach here, but just in case
     raise RuntimeError(
-        f"OpenRouter request failed after {max_retries + 1} attempts: {last_exception}"
+        f"LLM request failed after {max_retries + 1} attempts: {last_exception}"
     )
 
 
@@ -940,8 +965,9 @@ def call_openrouter(
 def call_llm(state: State, __tracer: "TracerFactory") -> State:
     """Call the LLM with the current conversation history.
 
-    This is the core LLM action - it sends messages to OpenRouter and
-    processes the response, extracting any tool calls.
+    This is the core LLM action - it sends messages to the configured
+    OpenAI-compatible endpoint and processes the response, extracting
+    any tool calls.
 
     Sets has_pending_tools boolean for transition conditions.
     Source: agent/burr_app.py
@@ -949,7 +975,7 @@ def call_llm(state: State, __tracer: "TracerFactory") -> State:
 
     Uses __tracer for nested span visibility into:
     - Message preparation
-    - OpenRouter API call (with token/model details)
+    - LLM API call (with token/model details)
     - Response processing
     """
     messages = state.get("messages", [])
@@ -965,12 +991,12 @@ def call_llm(state: State, __tracer: "TracerFactory") -> State:
             has_tools=True,
         )
 
-    # Span: Call the LLM via OpenRouter
-    with __tracer("openrouter_api_call", span_dependencies=["prepare_messages"]) as t:
-        model = os.environ.get("OPENROUTER_MODEL", DEFAULT_MODEL)
+    # Span: Call the LLM
+    with __tracer("llm_api_call", span_dependencies=["prepare_messages"]) as t:
+        model = os.environ.get("OPENAI_MODEL", DEFAULT_MODEL)
         t.log_attributes(model=model, tool_count=len(AVAILABLE_TOOLS))
 
-        response = call_openrouter(
+        response = _chat_completion(
             messages=api_messages,
             tools=AVAILABLE_TOOLS,
         )
@@ -986,7 +1012,7 @@ def call_llm(state: State, __tracer: "TracerFactory") -> State:
         )
 
     # Span: Process response
-    with __tracer("process_response", span_dependencies=["openrouter_api_call"]) as t:
+    with __tracer("process_response", span_dependencies=["llm_api_call"]) as t:
         content = response["content"]
         tool_calls = response["tool_calls"]
 
@@ -1483,7 +1509,7 @@ def synthesize(state: State, __tracer: "TracerFactory") -> State:
     for iteration in range(max_iterations):
         with __tracer(f"synth_iteration_{iteration}") as iter_t:
             try:
-                response = call_openrouter(
+                response = _chat_completion(
                     messages=api_messages,
                     tools=subagent_tools,
                 )
diff --git a/agent/cli.py b/agent/cli.py
index 61cabea..96099c6 100644
--- a/agent/cli.py
+++ b/agent/cli.py
@@ -259,9 +259,13 @@ def analyze(
 
     load_dotenv()
 
-    if not os.environ.get("OPENROUTER_API_KEY"):
-        print("Error: OPENROUTER_API_KEY not found.", file=sys.stderr)
-        print("Get your key at: https://openrouter.ai/keys", file=sys.stderr)
+    if not os.environ.get("OPENAI_API_KEY"):
+        print("Error: OPENAI_API_KEY not found.", file=sys.stderr)
+        print(
+            "Flashlight uses any OpenAI-compatible endpoint. Set OPENAI_API_KEY "
+            "and (optionally) OPENAI_BASE_URL + OPENAI_MODEL.",
+            file=sys.stderr,
+        )
         sys.exit(1)
 
     repo = Path(repo_path).resolve()