From cd59f0e69c30433779ab2d8c03ec9c859c10d1eb Mon Sep 17 00:00:00 2001 From: ethenotethan <42627790+ethenotethan@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:30:04 +0700 Subject: [PATCH] Make LLM backend any OpenAI-compatible endpoint Drops the hardcoded OpenRouter base URL and Claude Agent SDK framing in favor of generic OpenAI Chat Completions config (OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MODEL), so Flashlight works against OpenAI, OpenRouter, vLLM, LM Studio, Ollama, and anything else that speaks the same protocol + tool-calling schema. - agent/burr_app.py: parametrize base URL, rename client helper to _chat_completion (avoid collision with the Burr call_llm action), only emit OpenRouter attribution headers when routing through openrouter.ai, default model -> gpt-4o-mini - agent/cli.py, agent/agent.py: swap API-key checks to OPENAI_API_KEY with provider-agnostic error messages - .env.example: document OPENAI_* vars + per-provider examples - README.md: drop Claude Agent SDK references and add an LLM configuration section with an env-var table and example configs --- .env.example | 41 ++++++++++++++++++------- README.md | 49 +++++++++++++++++++++++------- agent/agent.py | 9 ++++-- agent/burr_app.py | 76 +++++++++++++++++++++++++++++++---------------- agent/cli.py | 10 +++++-- 5 files changed, 132 insertions(+), 53 deletions(-) diff --git a/.env.example b/.env.example index e8ac797..51cec64 100644 --- a/.env.example +++ b/.env.example @@ -1,21 +1,40 @@ -# OpenRouter API Configuration -# Get your key at: https://openrouter.ai/keys -OPENROUTER_API_KEY=your_api_key_here +# Flashlight LLM configuration +# +# Flashlight speaks the OpenAI Chat Completions protocol, so it works with +# any OpenAI-compatible endpoint — OpenAI, OpenRouter, vLLM, LM Studio, +# Ollama, Together, Groq, etc. +# --------------------------------------------------------------------------- -# Model Configuration -# OpenRouter model identifier (see https://openrouter.ai/models) -# Examples: anthropic/claude-sonnet-4, anthropic/claude-3.5-sonnet, openai/gpt-4o -OPENROUTER_MODEL=anthropic/claude-sonnet-4 +# Required: bearer token for the target endpoint. +OPENAI_API_KEY=your_api_key_here -# Logging Configuration (optional) -# Enable verbose logging to see detailed interactions in the terminal +# Optional: OpenAI-compatible base URL. Defaults to https://api.openai.com/v1. +# Examples: +# OpenAI https://api.openai.com/v1 +# OpenRouter https://openrouter.ai/api/v1 +# vLLM (local) http://localhost:8000/v1 +# LM Studio http://localhost:1234/v1 +# Ollama http://localhost:11434/v1 +# OPENAI_BASE_URL=https://api.openai.com/v1 + +# Optional: model identifier. Defaults to gpt-4o-mini. +# Must be a model served by whichever endpoint you chose above. +# Examples: gpt-4o, gpt-4o-mini, anthropic/claude-sonnet-4 (OpenRouter), +# meta-llama/Llama-3.1-70B-Instruct (vLLM/Together), ... +# OPENAI_MODEL=gpt-4o-mini + +# --------------------------------------------------------------------------- +# Logging (optional) +# --------------------------------------------------------------------------- + +# Enable verbose logging to see detailed interactions in the terminal. # AGENT_VERBOSE=true -# Enable debug logging for full trace-level logging +# Enable debug logging for full trace-level logging. # AGENT_DEBUG=true # When verbose/debug mode is enabled, you'll see: -# - API requests via OpenRouter +# - LLM API requests and responses # - Subagent spawning and lifecycle # - Tool calls with parameters # - Tool results and success/failure status diff --git a/README.md b/README.md index 33fadca..680c6e9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # GitHub Flashlight -A sophisticated multi-agent processing pipeline using the Claude Agent SDK that performs dependency-aware codebase analysis and visualization through multi-agent composition. +A multi-agent processing pipeline that performs dependency-aware codebase analysis and visualization. Built on [Burr](https://github.com/apache/burr) for explicit state-machine orchestration, and speaks the OpenAI Chat Completions protocol — so it works with **any OpenAI-compatible endpoint** (OpenAI, OpenRouter, vLLM, LM Studio, Ollama, Together, Groq, …). ## Features @@ -117,9 +117,37 @@ source venv/bin/activate # On Windows: venv\Scripts\activate # Install dependencies pip install -e . -# Set up API key +# Configure your LLM endpoint cp .env.example .env -# Edit .env and add your ANTHROPIC_API_KEY +# Edit .env and set OPENAI_API_KEY (and optionally OPENAI_BASE_URL / OPENAI_MODEL) +``` + +### LLM configuration + +Flashlight uses the OpenAI Chat Completions API, so any OpenAI-compatible provider works. Set: + +| Variable | Required | Default | Notes | +|----------|----------|---------|-------| +| `OPENAI_API_KEY` | yes | — | Bearer token for the target endpoint | +| `OPENAI_BASE_URL` | no | `https://api.openai.com/v1` | Point at OpenAI, OpenRouter, vLLM, LM Studio, Ollama, etc. | +| `OPENAI_MODEL` | no | `gpt-4o-mini` | Any model served by your chosen endpoint | + +Example configurations: + +```bash +# OpenAI +OPENAI_API_KEY=sk-... +OPENAI_MODEL=gpt-4o + +# OpenRouter (access to Claude, Llama, etc. via one API) +OPENAI_API_KEY=sk-or-... +OPENAI_BASE_URL=https://openrouter.ai/api/v1 +OPENAI_MODEL=anthropic/claude-sonnet-4 + +# Local vLLM / LM Studio / Ollama +OPENAI_API_KEY=not-needed-but-must-be-set +OPENAI_BASE_URL=http://localhost:8000/v1 +OPENAI_MODEL=meta-llama/Llama-3.1-70B-Instruct ``` ## Usage @@ -150,12 +178,11 @@ AGENT_DEBUG=true python -m github_flashlight.agent ``` When enabled, you'll see real-time information about: -- 📤 API requests to Claude -- 📥 API responses -- 🚀 Subagent spawning and lifecycle -- 🔧 Tool calls with parameters -- ✅ Tool results and success/failure status -- 📝 Agent context and model information +- LLM API requests and responses +- Subagent spawning and lifecycle +- Tool calls with parameters +- Tool results and success/failure status +- Agent context and model information This is useful for: - Understanding what the agents are doing in real-time @@ -232,7 +259,7 @@ The agent will: ## Requirements - Python 3.10+ -- Claude API key +- An API key for an OpenAI-compatible endpoint (OpenAI, OpenRouter, a self-hosted vLLM/LM Studio/Ollama server, etc.) - Access to the codebase to analyze ## Development @@ -287,7 +314,7 @@ The primary leader orchestrates a sophisticated multi-phase workflow: ## Contributing -This project showcases the Claude Agent SDK's multi-agent composition capabilities. Feel free to extend it with: +This project showcases dependency-aware multi-agent composition over the OpenAI Chat Completions protocol. Feel free to extend it with: - Additional language support (Java, C#, etc.) - Enhanced metrics collection (LOC, complexity, test coverage) - Incremental analysis for large repositories diff --git a/agent/agent.py b/agent/agent.py index 3034fd5..8f2e0de 100644 --- a/agent/agent.py +++ b/agent/agent.py @@ -55,10 +55,13 @@ def analyze(service_name: str): service_name: Name of the service (must have discovery files in /tmp/{service_name}/) """ # Check API key first - if not os.environ.get("OPENROUTER_API_KEY"): - print("\nError: OPENROUTER_API_KEY not found.") + if not os.environ.get("OPENAI_API_KEY"): + print("\nError: OPENAI_API_KEY not found.") print("Set it in a .env file or export it in your shell.") - print("Get your key at: https://openrouter.ai/keys\n") + print( + "Flashlight works with any OpenAI-compatible endpoint; point " + "OPENAI_BASE_URL at your provider (defaults to https://api.openai.com/v1).\n" + ) return # Setup session directory and transcript diff --git a/agent/burr_app.py b/agent/burr_app.py index a7b963c..0e34292 100644 --- a/agent/burr_app.py +++ b/agent/burr_app.py @@ -45,18 +45,35 @@ # --------------------------------------------------------------------------- # Configuration +# +# Flashlight speaks the OpenAI Chat Completions protocol, so it works with +# any OpenAI-compatible endpoint (OpenAI, OpenRouter, vLLM, LM Studio, Ollama, +# Together, Groq, etc.). +# +# Required: +# OPENAI_API_KEY - bearer token for the target endpoint +# +# Optional: +# OPENAI_BASE_URL - defaults to https://api.openai.com/v1 +# OPENAI_MODEL - defaults to gpt-4o-mini # --------------------------------------------------------------------------- -OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" -DEFAULT_MODEL = os.environ.get("OPENROUTER_MODEL", "anthropic/claude-sonnet-4") +DEFAULT_BASE_URL = "https://api.openai.com/v1" +DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini") + + +def get_base_url() -> str: + """Return the OpenAI-compatible API base URL.""" + return os.environ.get("OPENAI_BASE_URL", DEFAULT_BASE_URL).rstrip("/") def get_api_key() -> str: - """Get OpenRouter API key from environment.""" - key = os.environ.get("OPENROUTER_API_KEY", "") + """Return the OpenAI-compatible API key.""" + key = os.environ.get("OPENAI_API_KEY", "") if not key: raise RuntimeError( - "OPENROUTER_API_KEY not set. Get one at https://openrouter.ai/keys" + "OPENAI_API_KEY not set. Point OPENAI_BASE_URL at your provider " + "(OpenAI, OpenRouter, vLLM, LM Studio, ...) and set OPENAI_API_KEY." ) return key @@ -503,7 +520,7 @@ def subagent_call_llm(state: State) -> State: api_messages = [{"role": "system", "content": sys_prompt}] + messages - response = call_openrouter( + response = _chat_completion( messages=api_messages, tools=subagent_tools, ) @@ -687,7 +704,7 @@ def _run_subagent_loop( ) try: - response = call_openrouter( + response = _chat_completion( messages=api_messages, tools=subagent_tools, ) @@ -773,11 +790,11 @@ def _run_subagent_loop( # --------------------------------------------------------------------------- -# OpenRouter LLM client +# OpenAI-compatible LLM client # --------------------------------------------------------------------------- -def call_openrouter( +def _chat_completion( messages: List[Dict[str, Any]], model: str = DEFAULT_MODEL, tools: Optional[List[Dict]] = None, @@ -786,7 +803,11 @@ def call_openrouter( max_retries: int = 3, initial_retry_delay: float = 2.0, ) -> Dict[str, Any]: - """Call OpenRouter API and return the response with retry logic. + """Call an OpenAI-compatible Chat Completions endpoint with retry logic. + + Works with any OpenAI-compatible API (OpenAI, OpenRouter, vLLM, LM Studio, + Ollama, Together, Groq, ...). Base URL is resolved from OPENAI_BASE_URL + and defaults to https://api.openai.com/v1. Implements exponential backoff for transient failures: - Timeouts (httpx.TimeoutException) @@ -814,6 +835,7 @@ def call_openrouter( RuntimeError: On exhausted retries """ api_key = get_api_key() + base_url = get_base_url() payload = { "model": model, @@ -826,9 +848,12 @@ def call_openrouter( headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", - "HTTP-Referer": "https://github.com/anthropics/flashlight", - "X-Title": "flashlight", } + # OpenRouter-specific attribution headers (harmless but only meaningful + # when actually routing through openrouter.ai). + if "openrouter.ai" in base_url: + headers["HTTP-Referer"] = "https://github.com/anthropics/flashlight" + headers["X-Title"] = "flashlight" last_exception: Optional[Exception] = None retry_delay = initial_retry_delay @@ -837,7 +862,7 @@ def call_openrouter( try: with httpx.Client(timeout=timeout) as client: response = client.post( - f"{OPENROUTER_BASE_URL}/chat/completions", + f"{base_url}/chat/completions", json=payload, headers=headers, ) @@ -892,7 +917,7 @@ def call_openrouter( "timeout" if is_timeout else f"HTTP {e.response.status_code}" ) logger.warning( - f"OpenRouter request failed ({error_type}), " + f"LLM request failed ({error_type}), " f"retrying in {retry_delay:.1f}s (attempt {attempt + 1}/{max_retries + 1})" ) time.sleep(retry_delay) @@ -906,7 +931,7 @@ def call_openrouter( last_exception = e if attempt < max_retries: logger.warning( - f"OpenRouter request failed (network error: {e}), " + f"LLM request failed (network error: {e}), " f"retrying in {retry_delay:.1f}s (attempt {attempt + 1}/{max_retries + 1})" ) time.sleep(retry_delay) @@ -917,7 +942,7 @@ def call_openrouter( # Should not reach here, but just in case raise RuntimeError( - f"OpenRouter request failed after {max_retries + 1} attempts: {last_exception}" + f"LLM request failed after {max_retries + 1} attempts: {last_exception}" ) @@ -940,8 +965,9 @@ def call_openrouter( def call_llm(state: State, __tracer: "TracerFactory") -> State: """Call the LLM with the current conversation history. - This is the core LLM action - it sends messages to OpenRouter and - processes the response, extracting any tool calls. + This is the core LLM action - it sends messages to the configured + OpenAI-compatible endpoint and processes the response, extracting + any tool calls. Sets has_pending_tools boolean for transition conditions. Source: agent/burr_app.py @@ -949,7 +975,7 @@ def call_llm(state: State, __tracer: "TracerFactory") -> State: Uses __tracer for nested span visibility into: - Message preparation - - OpenRouter API call (with token/model details) + - LLM API call (with token/model details) - Response processing """ messages = state.get("messages", []) @@ -965,12 +991,12 @@ def call_llm(state: State, __tracer: "TracerFactory") -> State: has_tools=True, ) - # Span: Call the LLM via OpenRouter - with __tracer("openrouter_api_call", span_dependencies=["prepare_messages"]) as t: - model = os.environ.get("OPENROUTER_MODEL", DEFAULT_MODEL) + # Span: Call the LLM + with __tracer("llm_api_call", span_dependencies=["prepare_messages"]) as t: + model = os.environ.get("OPENAI_MODEL", DEFAULT_MODEL) t.log_attributes(model=model, tool_count=len(AVAILABLE_TOOLS)) - response = call_openrouter( + response = _chat_completion( messages=api_messages, tools=AVAILABLE_TOOLS, ) @@ -986,7 +1012,7 @@ def call_llm(state: State, __tracer: "TracerFactory") -> State: ) # Span: Process response - with __tracer("process_response", span_dependencies=["openrouter_api_call"]) as t: + with __tracer("process_response", span_dependencies=["llm_api_call"]) as t: content = response["content"] tool_calls = response["tool_calls"] @@ -1483,7 +1509,7 @@ def synthesize(state: State, __tracer: "TracerFactory") -> State: for iteration in range(max_iterations): with __tracer(f"synth_iteration_{iteration}") as iter_t: try: - response = call_openrouter( + response = _chat_completion( messages=api_messages, tools=subagent_tools, ) diff --git a/agent/cli.py b/agent/cli.py index 61cabea..96099c6 100644 --- a/agent/cli.py +++ b/agent/cli.py @@ -259,9 +259,13 @@ def analyze( load_dotenv() - if not os.environ.get("OPENROUTER_API_KEY"): - print("Error: OPENROUTER_API_KEY not found.", file=sys.stderr) - print("Get your key at: https://openrouter.ai/keys", file=sys.stderr) + if not os.environ.get("OPENAI_API_KEY"): + print("Error: OPENAI_API_KEY not found.", file=sys.stderr) + print( + "Flashlight uses any OpenAI-compatible endpoint. Set OPENAI_API_KEY " + "and (optionally) OPENAI_BASE_URL + OPENAI_MODEL.", + file=sys.stderr, + ) sys.exit(1) repo = Path(repo_path).resolve()