diff --git a/.gitignore b/.gitignore index da918ac5..194c780d 100644 --- a/.gitignore +++ b/.gitignore @@ -188,6 +188,10 @@ logs/ # Docker volume configs (keep .env.example but ignore actual .env) volumes/env/.env +# Vendored proxy sources (kept locally for reference) +ai/proxy/bifrost/ +ai/proxy/litellm/ + # Test project databases and configurations test_projects/*/.fuzzforge/ test_projects/*/findings.db* @@ -304,4 +308,4 @@ test_projects/*/.npmrc test_projects/*/.git-credentials test_projects/*/credentials.* test_projects/*/api_keys.* -test_projects/*/ci-*.sh \ No newline at end of file +test_projects/*/ci-*.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fd0722c..52c751ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### 🐛 Bug Fixes +- Fixed default parameters from metadata.yaml not being applied to workflows when no parameters provided - Fixed gitleaks workflow failing on uploaded directories without Git history - Fixed worker startup command suggestions (now uses `docker compose up -d` with service names) - Fixed missing `cognify_text` method in CogneeProjectIntegration diff --git a/README.md b/README.md index 9b8eaafb..abc2c14c 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,9 @@ For AI-powered workflows, configure your LLM API keys: ```bash cp volumes/env/.env.example volumes/env/.env # Edit volumes/env/.env and add your API keys (OpenAI, Anthropic, Google, etc.) +# Add your key to LITELLM_GEMINI_API_KEY ``` +> Dont change the OPENAI_API_KEY default value, as it is used for the LLM proxy. This is required for: - `llm_secret_detection` workflow diff --git a/ai/agents/task_agent/.env.example b/ai/agents/task_agent/.env.example deleted file mode 100644 index c71d59a4..00000000 --- a/ai/agents/task_agent/.env.example +++ /dev/null @@ -1,10 +0,0 @@ -# Default LiteLLM configuration -LITELLM_MODEL=gemini/gemini-2.0-flash-001 -# LITELLM_PROVIDER=gemini - -# API keys (uncomment and fill as needed) -# GOOGLE_API_KEY= -# OPENAI_API_KEY= -# ANTHROPIC_API_KEY= -# OPENROUTER_API_KEY= -# MISTRAL_API_KEY= diff --git a/ai/agents/task_agent/Dockerfile b/ai/agents/task_agent/Dockerfile index eaf734bf..c2b66862 100644 --- a/ai/agents/task_agent/Dockerfile +++ b/ai/agents/task_agent/Dockerfile @@ -16,4 +16,9 @@ COPY . /app/agent_with_adk_format WORKDIR /app/agent_with_adk_format ENV PYTHONPATH=/app +# Copy and set up entrypoint +COPY docker-entrypoint.sh /docker-entrypoint.sh +RUN chmod +x /docker-entrypoint.sh + +ENTRYPOINT ["/docker-entrypoint.sh"] CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/ai/agents/task_agent/README.md b/ai/agents/task_agent/README.md index 769ce33e..692e4e6b 100644 --- a/ai/agents/task_agent/README.md +++ b/ai/agents/task_agent/README.md @@ -43,18 +43,34 @@ cd task_agent # cp .env.example .env ``` -Edit `.env` (or `.env.example`) and add your API keys. The agent must be restarted after changes so the values are picked up: +Edit `.env` (or `.env.example`) and add your proxy + API keys. The agent must be restarted after changes so the values are picked up: ```bash -# Set default model -LITELLM_MODEL=gemini/gemini-2.0-flash-001 - -# Add API keys for providers you want to use -GOOGLE_API_KEY=your_google_api_key -OPENAI_API_KEY=your_openai_api_key -ANTHROPIC_API_KEY=your_anthropic_api_key -OPENROUTER_API_KEY=your_openrouter_api_key +# Route every request through the proxy container (use http://localhost:10999 from the host) +FF_LLM_PROXY_BASE_URL=http://llm-proxy:4000 + +# Default model + provider the agent boots with +LITELLM_MODEL=openai/gpt-4o-mini +LITELLM_PROVIDER=openai + +# Virtual key issued by the proxy to the task agent (bootstrap replaces the placeholder) +OPENAI_API_KEY=sk-proxy-default + +# Upstream keys stay inside the proxy. Store real secrets under the LiteLLM +# aliases and the bootstrapper mirrors them into .env.litellm for the proxy container. +LITELLM_OPENAI_API_KEY=your_real_openai_api_key +LITELLM_ANTHROPIC_API_KEY=your_real_anthropic_key +LITELLM_GEMINI_API_KEY=your_real_gemini_key +LITELLM_MISTRAL_API_KEY=your_real_mistral_key +LITELLM_OPENROUTER_API_KEY=your_real_openrouter_key ``` +> When running the agent outside of Docker, swap `FF_LLM_PROXY_BASE_URL` to the host port (default `http://localhost:10999`). + +The bootstrap container provisions LiteLLM, copies provider secrets into +`volumes/env/.env.litellm`, and rewrites `volumes/env/.env` with the virtual key. +Populate the `LITELLM_*_API_KEY` values before the first launch so the proxy can +reach your upstream providers as soon as the bootstrap script runs. + ### 2. Install Dependencies ```bash diff --git a/ai/agents/task_agent/docker-entrypoint.sh b/ai/agents/task_agent/docker-entrypoint.sh new file mode 100644 index 00000000..88e37338 --- /dev/null +++ b/ai/agents/task_agent/docker-entrypoint.sh @@ -0,0 +1,31 @@ +#!/bin/bash +set -e + +# Wait for .env file to have keys (max 30 seconds) +echo "[task-agent] Waiting for virtual keys to be provisioned..." +for i in $(seq 1 30); do + if [ -f /app/config/.env ]; then + # Check if TASK_AGENT_API_KEY has a value (not empty) + KEY=$(grep -E '^TASK_AGENT_API_KEY=' /app/config/.env | cut -d'=' -f2) + if [ -n "$KEY" ] && [ "$KEY" != "" ]; then + echo "[task-agent] Virtual keys found, loading environment..." + # Export keys from .env file + export TASK_AGENT_API_KEY="$KEY" + export OPENAI_API_KEY=$(grep -E '^OPENAI_API_KEY=' /app/config/.env | cut -d'=' -f2) + export FF_LLM_PROXY_BASE_URL=$(grep -E '^FF_LLM_PROXY_BASE_URL=' /app/config/.env | cut -d'=' -f2) + echo "[task-agent] Loaded TASK_AGENT_API_KEY: ${TASK_AGENT_API_KEY:0:15}..." + echo "[task-agent] Loaded FF_LLM_PROXY_BASE_URL: $FF_LLM_PROXY_BASE_URL" + break + fi + fi + echo "[task-agent] Keys not ready yet, waiting... ($i/30)" + sleep 1 +done + +if [ -z "$TASK_AGENT_API_KEY" ]; then + echo "[task-agent] ERROR: Virtual keys were not provisioned within 30 seconds!" + exit 1 +fi + +echo "[task-agent] Starting uvicorn..." +exec "$@" diff --git a/ai/agents/task_agent/litellm_agent/config.py b/ai/agents/task_agent/litellm_agent/config.py index 9b404bfd..54ab609c 100644 --- a/ai/agents/task_agent/litellm_agent/config.py +++ b/ai/agents/task_agent/litellm_agent/config.py @@ -4,13 +4,28 @@ import os + +def _normalize_proxy_base_url(raw_value: str | None) -> str | None: + if not raw_value: + return None + cleaned = raw_value.strip() + if not cleaned: + return None + # Avoid double slashes in downstream requests + return cleaned.rstrip("/") + AGENT_NAME = "litellm_agent" AGENT_DESCRIPTION = ( "A LiteLLM-backed shell that exposes hot-swappable model and prompt controls." ) -DEFAULT_MODEL = os.getenv("LITELLM_MODEL", "gemini-2.0-flash-001") -DEFAULT_PROVIDER = os.getenv("LITELLM_PROVIDER") +DEFAULT_MODEL = os.getenv("LITELLM_MODEL", "openai/gpt-4o-mini") +DEFAULT_PROVIDER = os.getenv("LITELLM_PROVIDER") or None +PROXY_BASE_URL = _normalize_proxy_base_url( + os.getenv("FF_LLM_PROXY_BASE_URL") + or os.getenv("LITELLM_API_BASE") + or os.getenv("LITELLM_BASE_URL") +) STATE_PREFIX = "app:litellm_agent/" STATE_MODEL_KEY = f"{STATE_PREFIX}model" diff --git a/ai/agents/task_agent/litellm_agent/state.py b/ai/agents/task_agent/litellm_agent/state.py index 460d9611..54f13088 100644 --- a/ai/agents/task_agent/litellm_agent/state.py +++ b/ai/agents/task_agent/litellm_agent/state.py @@ -3,11 +3,15 @@ from __future__ import annotations from dataclasses import dataclass +import os from typing import Any, Mapping, MutableMapping, Optional +import httpx + from .config import ( DEFAULT_MODEL, DEFAULT_PROVIDER, + PROXY_BASE_URL, STATE_MODEL_KEY, STATE_PROMPT_KEY, STATE_PROVIDER_KEY, @@ -66,11 +70,109 @@ def instantiate_llm(self): """Create a LiteLlm instance for the current state.""" from google.adk.models.lite_llm import LiteLlm # Lazy import to avoid cycle + from google.adk.models.lite_llm import LiteLLMClient + from litellm.types.utils import Choices, Message, ModelResponse, Usage kwargs = {"model": self.model} if self.provider: kwargs["custom_llm_provider"] = self.provider - return LiteLlm(**kwargs) + if PROXY_BASE_URL: + provider = (self.provider or DEFAULT_PROVIDER or "").lower() + if provider and provider != "openai": + kwargs["api_base"] = f"{PROXY_BASE_URL.rstrip('/')}/{provider}" + else: + kwargs["api_base"] = PROXY_BASE_URL + kwargs.setdefault("api_key", os.environ.get("TASK_AGENT_API_KEY") or os.environ.get("OPENAI_API_KEY")) + + provider = (self.provider or DEFAULT_PROVIDER or "").lower() + model_suffix = self.model.split("/", 1)[-1] + use_responses = provider == "openai" and ( + model_suffix.startswith("gpt-5") or model_suffix.startswith("o1") + ) + if use_responses: + kwargs.setdefault("use_responses_api", True) + + llm = LiteLlm(**kwargs) + + if use_responses and PROXY_BASE_URL: + + class _ResponsesAwareClient(LiteLLMClient): + def __init__(self, base_client: LiteLLMClient, api_base: str, api_key: str): + self._base_client = base_client + self._api_base = api_base.rstrip("/") + self._api_key = api_key + + async def acompletion(self, model, messages, tools, **kwargs): # type: ignore[override] + use_responses_api = kwargs.pop("use_responses_api", False) + if not use_responses_api: + return await self._base_client.acompletion( + model=model, + messages=messages, + tools=tools, + **kwargs, + ) + + resolved_model = model + if "/" not in resolved_model: + resolved_model = f"openai/{resolved_model}" + + payload = { + "model": resolved_model, + "input": _messages_to_responses_input(messages), + } + + timeout = kwargs.get("timeout", 60) + headers = { + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + } + + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post( + f"{self._api_base}/v1/responses", + json=payload, + headers=headers, + ) + try: + response.raise_for_status() + except httpx.HTTPStatusError as exc: + text = exc.response.text + raise RuntimeError( + f"LiteLLM responses request failed: {text}" + ) from exc + data = response.json() + + text_output = _extract_output_text(data) + usage = data.get("usage", {}) + + return ModelResponse( + id=data.get("id"), + model=model, + choices=[ + Choices( + finish_reason="stop", + index=0, + message=Message(role="assistant", content=text_output), + provider_specific_fields={"bifrost_response": data}, + ) + ], + usage=Usage( + prompt_tokens=usage.get("input_tokens"), + completion_tokens=usage.get("output_tokens"), + reasoning_tokens=usage.get("output_tokens_details", {}).get( + "reasoning_tokens" + ), + total_tokens=usage.get("total_tokens"), + ), + ) + + llm.llm_client = _ResponsesAwareClient( + llm.llm_client, + PROXY_BASE_URL, + os.environ.get("TASK_AGENT_API_KEY") or os.environ.get("OPENAI_API_KEY", ""), + ) + + return llm @property def display_model(self) -> str: @@ -84,3 +186,69 @@ def apply_state_to_agent(invocation_context, state: HotSwapState) -> None: agent = invocation_context.agent agent.model = state.instantiate_llm() + + +def _messages_to_responses_input(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + inputs: list[dict[str, Any]] = [] + for message in messages: + role = message.get("role", "user") + content = message.get("content", "") + text_segments: list[str] = [] + + if isinstance(content, list): + for item in content: + if isinstance(item, dict): + text = item.get("text") or item.get("content") + if text: + text_segments.append(str(text)) + elif isinstance(item, str): + text_segments.append(item) + elif isinstance(content, str): + text_segments.append(content) + + text = "\n".join(segment.strip() for segment in text_segments if segment) + if not text: + continue + + entry_type = "input_text" + if role == "assistant": + entry_type = "output_text" + + inputs.append( + { + "role": role, + "content": [ + { + "type": entry_type, + "text": text, + } + ], + } + ) + + if not inputs: + inputs.append( + { + "role": "user", + "content": [ + { + "type": "input_text", + "text": "", + } + ], + } + ) + return inputs + + +def _extract_output_text(response_json: dict[str, Any]) -> str: + outputs = response_json.get("output", []) + collected: list[str] = [] + for item in outputs: + if isinstance(item, dict) and item.get("type") == "message": + for part in item.get("content", []): + if isinstance(part, dict) and part.get("type") == "output_text": + text = part.get("text", "") + if text: + collected.append(str(text)) + return "\n\n".join(collected).strip() diff --git a/ai/proxy/README.md b/ai/proxy/README.md new file mode 100644 index 00000000..fc941eb8 --- /dev/null +++ b/ai/proxy/README.md @@ -0,0 +1,5 @@ +# LLM Proxy Integrations + +This directory contains vendor source trees that were vendored only for reference when integrating LLM gateways. The actual FuzzForge deployment uses the official Docker images for each project. + +See `docs/docs/how-to/llm-proxy.md` for up-to-date instructions on running the proxy services and issuing keys for the agents. diff --git a/ai/src/fuzzforge_ai/agent_executor.py b/ai/src/fuzzforge_ai/agent_executor.py index e27ee3d1..41613c07 100644 --- a/ai/src/fuzzforge_ai/agent_executor.py +++ b/ai/src/fuzzforge_ai/agent_executor.py @@ -1049,10 +1049,19 @@ async def get_task_list(task_list_id: str) -> str: FunctionTool(get_task_list) ]) - - # Create the agent + + # Create the agent with LiteLLM configuration + llm_kwargs = {} + api_key = os.getenv('OPENAI_API_KEY') or os.getenv('LLM_API_KEY') + api_base = os.getenv('LLM_ENDPOINT') or os.getenv('LLM_API_BASE') or os.getenv('OPENAI_API_BASE') + + if api_key: + llm_kwargs['api_key'] = api_key + if api_base: + llm_kwargs['api_base'] = api_base + self.agent = LlmAgent( - model=LiteLlm(model=self.model), + model=LiteLlm(model=self.model, **llm_kwargs), name="fuzzforge_executor", description="Intelligent A2A orchestrator with memory", instruction=self._build_instruction(), diff --git a/ai/src/fuzzforge_ai/cognee_service.py b/ai/src/fuzzforge_ai/cognee_service.py index 968e9567..ba14a30e 100644 --- a/ai/src/fuzzforge_ai/cognee_service.py +++ b/ai/src/fuzzforge_ai/cognee_service.py @@ -56,7 +56,7 @@ async def initialize(self): # Configure LLM with API key BEFORE any other cognee operations provider = os.getenv("LLM_PROVIDER", "openai") model = os.getenv("LLM_MODEL") or os.getenv("LITELLM_MODEL", "gpt-4o-mini") - api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") + api_key = os.getenv("COGNEE_API_KEY") or os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") endpoint = os.getenv("LLM_ENDPOINT") api_version = os.getenv("LLM_API_VERSION") max_tokens = os.getenv("LLM_MAX_TOKENS") @@ -78,48 +78,62 @@ async def initialize(self): os.environ.setdefault("OPENAI_API_KEY", api_key) if endpoint: os.environ["LLM_ENDPOINT"] = endpoint + os.environ.setdefault("LLM_API_BASE", endpoint) + os.environ.setdefault("OPENAI_API_BASE", endpoint) + os.environ.setdefault("LITELLM_PROXY_API_BASE", endpoint) + if api_key: + os.environ.setdefault("LITELLM_PROXY_API_KEY", api_key) if api_version: os.environ["LLM_API_VERSION"] = api_version if max_tokens: os.environ["LLM_MAX_TOKENS"] = str(max_tokens) # Configure Cognee's runtime using its configuration helpers when available + embedding_model = os.getenv("LLM_EMBEDDING_MODEL") + embedding_endpoint = os.getenv("LLM_EMBEDDING_ENDPOINT") + if embedding_endpoint: + os.environ.setdefault("LLM_EMBEDDING_API_BASE", embedding_endpoint) + if hasattr(cognee.config, "set_llm_provider"): cognee.config.set_llm_provider(provider) - if hasattr(cognee.config, "set_llm_model"): - cognee.config.set_llm_model(model) - if api_key and hasattr(cognee.config, "set_llm_api_key"): - cognee.config.set_llm_api_key(api_key) - if endpoint and hasattr(cognee.config, "set_llm_endpoint"): - cognee.config.set_llm_endpoint(endpoint) + if hasattr(cognee.config, "set_llm_model"): + cognee.config.set_llm_model(model) + if api_key and hasattr(cognee.config, "set_llm_api_key"): + cognee.config.set_llm_api_key(api_key) + if endpoint and hasattr(cognee.config, "set_llm_endpoint"): + cognee.config.set_llm_endpoint(endpoint) + if embedding_model and hasattr(cognee.config, "set_llm_embedding_model"): + cognee.config.set_llm_embedding_model(embedding_model) + if embedding_endpoint and hasattr(cognee.config, "set_llm_embedding_endpoint"): + cognee.config.set_llm_embedding_endpoint(embedding_endpoint) if api_version and hasattr(cognee.config, "set_llm_api_version"): cognee.config.set_llm_api_version(api_version) if max_tokens and hasattr(cognee.config, "set_llm_max_tokens"): cognee.config.set_llm_max_tokens(int(max_tokens)) - + # Configure graph database cognee.config.set_graph_db_config({ "graph_database_provider": self.cognee_config.get("graph_database_provider", "kuzu"), }) - + # Set data directories data_dir = self.cognee_config.get("data_directory") system_dir = self.cognee_config.get("system_directory") - + if data_dir: logger.debug("Setting cognee data root", extra={"path": data_dir}) cognee.config.data_root_directory(data_dir) if system_dir: logger.debug("Setting cognee system root", extra={"path": system_dir}) cognee.config.system_root_directory(system_dir) - + # Setup multi-tenant user context await self._setup_user_context() - + self._initialized = True logger.info(f"Cognee initialized for project {self.project_context['project_name']} " f"with Kuzu at {system_dir}") - + except ImportError: logger.error("Cognee not installed. Install with: pip install cognee") raise diff --git a/backend/src/api/workflows.py b/backend/src/api/workflows.py index 3ffda9db..a4d1b7cf 100644 --- a/backend/src/api/workflows.py +++ b/backend/src/api/workflows.py @@ -43,6 +43,42 @@ router = APIRouter(prefix="/workflows", tags=["workflows"]) +def extract_defaults_from_json_schema(metadata: Dict[str, Any]) -> Dict[str, Any]: + """ + Extract default parameter values from JSON Schema format. + + Converts from: + parameters: + properties: + param_name: + default: value + + To: + {param_name: value} + + Args: + metadata: Workflow metadata dictionary + + Returns: + Dictionary of parameter defaults + """ + defaults = {} + + # Check if there's a legacy default_parameters field + if "default_parameters" in metadata: + defaults.update(metadata["default_parameters"]) + + # Extract defaults from JSON Schema parameters + parameters = metadata.get("parameters", {}) + properties = parameters.get("properties", {}) + + for param_name, param_spec in properties.items(): + if "default" in param_spec: + defaults[param_name] = param_spec["default"] + + return defaults + + def create_structured_error_response( error_type: str, message: str, @@ -164,7 +200,7 @@ async def get_workflow_metadata( author=metadata.get("author"), tags=metadata.get("tags", []), parameters=metadata.get("parameters", {}), - default_parameters=metadata.get("default_parameters", {}), + default_parameters=extract_defaults_from_json_schema(metadata), required_modules=metadata.get("required_modules", []) ) @@ -221,7 +257,7 @@ async def submit_workflow( # Merge default parameters with user parameters workflow_info = temporal_mgr.workflows[workflow_name] metadata = workflow_info.metadata or {} - defaults = metadata.get("default_parameters", {}) + defaults = extract_defaults_from_json_schema(metadata) user_params = submission.parameters or {} workflow_params = {**defaults, **user_params} @@ -450,7 +486,7 @@ async def upload_and_submit_workflow( # Merge default parameters with user parameters workflow_info = temporal_mgr.workflows.get(workflow_name) metadata = workflow_info.metadata or {} - defaults = metadata.get("default_parameters", {}) + defaults = extract_defaults_from_json_schema(metadata) workflow_params = {**defaults, **workflow_params} # Start workflow execution @@ -617,11 +653,8 @@ async def get_workflow_parameters( else: param_definitions = parameters_schema - # Add default values to the schema - default_params = metadata.get("default_parameters", {}) - for param_name, param_schema in param_definitions.items(): - if isinstance(param_schema, dict) and param_name in default_params: - param_schema["default"] = default_params[param_name] + # Extract default values from JSON Schema + default_params = extract_defaults_from_json_schema(metadata) return { "workflow": workflow_name, diff --git a/backend/src/temporal/manager.py b/backend/src/temporal/manager.py index 9a44e8b9..96d9a842 100644 --- a/backend/src/temporal/manager.py +++ b/backend/src/temporal/manager.py @@ -187,12 +187,28 @@ async def run_workflow( # Add parameters in order based on metadata schema # This ensures parameters match the workflow signature order - if workflow_params and 'parameters' in workflow_info.metadata: + # Apply defaults from metadata.yaml if parameter not provided + if 'parameters' in workflow_info.metadata: param_schema = workflow_info.metadata['parameters'].get('properties', {}) + logger.debug(f"Found {len(param_schema)} parameters in schema") # Iterate parameters in schema order and add values for param_name in param_schema.keys(): - param_value = workflow_params.get(param_name) + param_spec = param_schema[param_name] + + # Use provided param, or fall back to default from metadata + if workflow_params and param_name in workflow_params: + param_value = workflow_params[param_name] + logger.debug(f"Using provided value for {param_name}: {param_value}") + elif 'default' in param_spec: + param_value = param_spec['default'] + logger.debug(f"Using default for {param_name}: {param_value}") + else: + param_value = None + logger.debug(f"No value or default for {param_name}, using None") + workflow_args.append(param_value) + else: + logger.debug("No 'parameters' section found in workflow metadata") # Determine task queue from workflow vertical vertical = workflow_info.metadata.get("vertical", "default") diff --git a/backend/toolbox/modules/secret_detection/llm_secret_detector.py b/backend/toolbox/modules/secret_detection/llm_secret_detector.py index 3ba96f81..1adf3416 100644 --- a/backend/toolbox/modules/secret_detection/llm_secret_detector.py +++ b/backend/toolbox/modules/secret_detection/llm_secret_detector.py @@ -107,7 +107,8 @@ def validate_config(self, config: Dict[str, Any]) -> bool: ) agent_url = config.get("agent_url") - if not agent_url or not isinstance(agent_url, str): + # agent_url is optional - will have default from metadata.yaml + if agent_url is not None and not isinstance(agent_url, str): raise ValueError("agent_url must be a valid URL string") max_files = config.get("max_files", 20) @@ -131,14 +132,14 @@ async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult logger.info(f"Starting LLM secret detection in workspace: {workspace}") - # Extract configuration - agent_url = config.get("agent_url", "http://fuzzforge-task-agent:8000/a2a/litellm_agent") - llm_model = config.get("llm_model", "gpt-4o-mini") - llm_provider = config.get("llm_provider", "openai") - file_patterns = config.get("file_patterns", ["*.py", "*.js", "*.ts", "*.java", "*.go", "*.env", "*.yaml", "*.yml", "*.json", "*.xml", "*.ini", "*.sql", "*.properties", "*.sh", "*.bat", "*.config", "*.conf", "*.toml", "*id_rsa*", "*.txt"]) - max_files = config.get("max_files", 20) - max_file_size = config.get("max_file_size", 30000) - timeout = config.get("timeout", 30) # Reduced from 45s + # Extract configuration (defaults come from metadata.yaml via API) + agent_url = config["agent_url"] + llm_model = config["llm_model"] + llm_provider = config["llm_provider"] + file_patterns = config["file_patterns"] + max_files = config["max_files"] + max_file_size = config["max_file_size"] + timeout = config["timeout"] # Find files to analyze # Skip files that are unlikely to contain secrets diff --git a/backend/toolbox/workflows/llm_secret_detection/metadata.yaml b/backend/toolbox/workflows/llm_secret_detection/metadata.yaml index 83852b32..a97b8597 100644 --- a/backend/toolbox/workflows/llm_secret_detection/metadata.yaml +++ b/backend/toolbox/workflows/llm_secret_detection/metadata.yaml @@ -30,5 +30,42 @@ parameters: type: integer default: 20 + max_file_size: + type: integer + default: 30000 + description: "Maximum file size in bytes" + + timeout: + type: integer + default: 30 + description: "Timeout per file in seconds" + + file_patterns: + type: array + items: + type: string + default: + - "*.py" + - "*.js" + - "*.ts" + - "*.java" + - "*.go" + - "*.env" + - "*.yaml" + - "*.yml" + - "*.json" + - "*.xml" + - "*.ini" + - "*.sql" + - "*.properties" + - "*.sh" + - "*.bat" + - "*.config" + - "*.conf" + - "*.toml" + - "*id_rsa*" + - "*.txt" + description: "File patterns to scan for secrets" + required_modules: - "llm_secret_detector" diff --git a/backend/toolbox/workflows/llm_secret_detection/workflow.py b/backend/toolbox/workflows/llm_secret_detection/workflow.py index 4f693d0f..a0c66d22 100644 --- a/backend/toolbox/workflows/llm_secret_detection/workflow.py +++ b/backend/toolbox/workflows/llm_secret_detection/workflow.py @@ -17,6 +17,7 @@ async def run( llm_model: Optional[str] = None, llm_provider: Optional[str] = None, max_files: Optional[int] = None, + max_file_size: Optional[int] = None, timeout: Optional[int] = None, file_patterns: Optional[list] = None ) -> Dict[str, Any]: @@ -67,6 +68,8 @@ async def run( config["llm_provider"] = llm_provider if max_files: config["max_files"] = max_files + if max_file_size: + config["max_file_size"] = max_file_size if timeout: config["timeout"] = timeout if file_patterns: diff --git a/cli/src/fuzzforge_cli/commands/init.py b/cli/src/fuzzforge_cli/commands/init.py index 9a9d30ab..4b94df5a 100644 --- a/cli/src/fuzzforge_cli/commands/init.py +++ b/cli/src/fuzzforge_cli/commands/init.py @@ -187,19 +187,40 @@ def _ensure_env_file(fuzzforge_dir: Path, force: bool) -> None: console.print("🧠 Configuring AI environment...") console.print(" • Default LLM provider: openai") - console.print(" • Default LLM model: gpt-5-mini") + console.print(" • Default LLM model: litellm_proxy/gpt-5-mini") console.print(" • To customise provider/model later, edit .fuzzforge/.env") llm_provider = "openai" - llm_model = "gpt-5-mini" + llm_model = "litellm_proxy/gpt-5-mini" + + # Check for global virtual keys from volumes/env/.env + global_env_key = None + for parent in fuzzforge_dir.parents: + global_env = parent / "volumes" / "env" / ".env" + if global_env.exists(): + try: + for line in global_env.read_text(encoding="utf-8").splitlines(): + if line.strip().startswith("OPENAI_API_KEY=") and "=" in line: + key_value = line.split("=", 1)[1].strip() + if key_value and not key_value.startswith("your-") and key_value.startswith("sk-"): + global_env_key = key_value + console.print(f" • Found virtual key in {global_env.relative_to(parent)}") + break + except Exception: + pass + break api_key = Prompt.ask( - "OpenAI API key (leave blank to fill manually)", + "OpenAI API key (leave blank to use global virtual key)" if global_env_key else "OpenAI API key (leave blank to fill manually)", default="", show_default=False, console=console, ) + # Use global key if user didn't provide one + if not api_key and global_env_key: + api_key = global_env_key + session_db_path = fuzzforge_dir / "fuzzforge_sessions.db" session_db_rel = session_db_path.relative_to(fuzzforge_dir.parent) @@ -210,14 +231,20 @@ def _ensure_env_file(fuzzforge_dir: Path, force: bool) -> None: f"LLM_PROVIDER={llm_provider}", f"LLM_MODEL={llm_model}", f"LITELLM_MODEL={llm_model}", + "LLM_ENDPOINT=http://localhost:10999", + "LLM_API_KEY=", + "LLM_EMBEDDING_MODEL=litellm_proxy/text-embedding-3-large", + "LLM_EMBEDDING_ENDPOINT=http://localhost:10999", f"OPENAI_API_KEY={api_key}", "FUZZFORGE_MCP_URL=http://localhost:8010/mcp", "", "# Cognee configuration mirrors the primary LLM by default", f"LLM_COGNEE_PROVIDER={llm_provider}", f"LLM_COGNEE_MODEL={llm_model}", - f"LLM_COGNEE_API_KEY={api_key}", - "LLM_COGNEE_ENDPOINT=", + "LLM_COGNEE_ENDPOINT=http://localhost:10999", + "LLM_COGNEE_API_KEY=", + "LLM_COGNEE_EMBEDDING_MODEL=litellm_proxy/text-embedding-3-large", + "LLM_COGNEE_EMBEDDING_ENDPOINT=http://localhost:10999", "COGNEE_MCP_URL=", "", "# Session persistence options: inmemory | sqlite", @@ -239,6 +266,8 @@ def _ensure_env_file(fuzzforge_dir: Path, force: bool) -> None: for line in env_lines: if line.startswith("OPENAI_API_KEY="): template_lines.append("OPENAI_API_KEY=") + elif line.startswith("LLM_API_KEY="): + template_lines.append("LLM_API_KEY=") elif line.startswith("LLM_COGNEE_API_KEY="): template_lines.append("LLM_COGNEE_API_KEY=") else: diff --git a/cli/src/fuzzforge_cli/config.py b/cli/src/fuzzforge_cli/config.py index f21b87d0..1a0ae283 100644 --- a/cli/src/fuzzforge_cli/config.py +++ b/cli/src/fuzzforge_cli/config.py @@ -28,6 +28,58 @@ except ImportError: # pragma: no cover - optional dependency load_dotenv = None + +def _load_env_file_if_exists(path: Path, override: bool = False) -> bool: + if not path.exists(): + return False + # Always use manual parsing to handle empty values correctly + try: + for line in path.read_text(encoding="utf-8").splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#") or "=" not in stripped: + continue + key, value = stripped.split("=", 1) + key = key.strip() + value = value.strip() + if override: + # Only override if value is non-empty + if value: + os.environ[key] = value + else: + # Set if not already in environment and value is non-empty + if key not in os.environ and value: + os.environ[key] = value + return True + except Exception: # pragma: no cover - best effort fallback + return False + + +def _find_shared_env_file(project_dir: Path) -> Path | None: + for directory in [project_dir] + list(project_dir.parents): + candidate = directory / "volumes" / "env" / ".env" + if candidate.exists(): + return candidate + return None + + +def load_project_env(project_dir: Optional[Path] = None) -> Path | None: + """Load project-local env, falling back to shared volumes/env/.env.""" + + project_dir = Path(project_dir or Path.cwd()) + shared_env = _find_shared_env_file(project_dir) + loaded_shared = False + if shared_env: + loaded_shared = _load_env_file_if_exists(shared_env, override=False) + + project_env = project_dir / ".fuzzforge" / ".env" + if _load_env_file_if_exists(project_env, override=True): + return project_env + + if loaded_shared: + return shared_env + + return None + import yaml from pydantic import BaseModel, Field @@ -312,23 +364,7 @@ def setup_cognee_environment(self) -> None: if not cognee.get("enabled", True): return - # Load project-specific environment overrides from .fuzzforge/.env if available - env_file = self.project_dir / ".fuzzforge" / ".env" - if env_file.exists(): - if load_dotenv: - load_dotenv(env_file, override=False) - else: - try: - for line in env_file.read_text(encoding="utf-8").splitlines(): - stripped = line.strip() - if not stripped or stripped.startswith("#"): - continue - if "=" not in stripped: - continue - key, value = stripped.split("=", 1) - os.environ.setdefault(key.strip(), value.strip()) - except Exception: # pragma: no cover - best effort fallback - pass + load_project_env(self.project_dir) backend_access = "true" if cognee.get("backend_access_control", True) else "false" os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = backend_access @@ -374,6 +410,17 @@ def _env(*names: str, default: str | None = None) -> str | None: "OPENAI_API_KEY", ) endpoint = _env("LLM_COGNEE_ENDPOINT", "COGNEE_LLM_ENDPOINT", "LLM_ENDPOINT") + embedding_model = _env( + "LLM_COGNEE_EMBEDDING_MODEL", + "COGNEE_LLM_EMBEDDING_MODEL", + "LLM_EMBEDDING_MODEL", + ) + embedding_endpoint = _env( + "LLM_COGNEE_EMBEDDING_ENDPOINT", + "COGNEE_LLM_EMBEDDING_ENDPOINT", + "LLM_EMBEDDING_ENDPOINT", + "LLM_ENDPOINT", + ) api_version = _env( "LLM_COGNEE_API_VERSION", "COGNEE_LLM_API_VERSION", @@ -398,6 +445,20 @@ def _env(*names: str, default: str | None = None) -> str | None: os.environ.setdefault("OPENAI_API_KEY", api_key) if endpoint: os.environ["LLM_ENDPOINT"] = endpoint + os.environ.setdefault("LLM_API_BASE", endpoint) + os.environ.setdefault("LLM_EMBEDDING_ENDPOINT", endpoint) + os.environ.setdefault("LLM_EMBEDDING_API_BASE", endpoint) + os.environ.setdefault("OPENAI_API_BASE", endpoint) + # Set LiteLLM proxy environment variables for SDK usage + os.environ.setdefault("LITELLM_PROXY_API_BASE", endpoint) + if api_key: + # Set LiteLLM proxy API key from the virtual key + os.environ.setdefault("LITELLM_PROXY_API_KEY", api_key) + if embedding_model: + os.environ["LLM_EMBEDDING_MODEL"] = embedding_model + if embedding_endpoint: + os.environ["LLM_EMBEDDING_ENDPOINT"] = embedding_endpoint + os.environ.setdefault("LLM_EMBEDDING_API_BASE", embedding_endpoint) if api_version: os.environ["LLM_API_VERSION"] = api_version if max_tokens: diff --git a/cli/src/fuzzforge_cli/main.py b/cli/src/fuzzforge_cli/main.py index 93fc5d9e..0d3f9408 100644 --- a/cli/src/fuzzforge_cli/main.py +++ b/cli/src/fuzzforge_cli/main.py @@ -19,6 +19,8 @@ from typing import Optional, List import sys +from .config import load_project_env + from .commands import ( workflows, workflow_exec, @@ -33,6 +35,9 @@ # Install rich traceback handler install(show_locals=True) +# Ensure environment variables are available before command execution +load_project_env() + # Create console for rich output console = Console() diff --git a/docker-compose.yml b/docker-compose.yml index c364778d..22d0e170 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -144,6 +144,103 @@ services: networks: - fuzzforge-network + # ============================================================================ + # LLM Proxy - LiteLLM Gateway + # ============================================================================ + llm-proxy: + image: ghcr.io/berriai/litellm:main-stable + container_name: fuzzforge-llm-proxy + depends_on: + llm-proxy-db: + condition: service_healthy + otel-collector: + condition: service_started + env_file: + - ./volumes/env/.env + environment: + PORT: 4000 + DATABASE_URL: postgresql://litellm:litellm@llm-proxy-db:5432/litellm + STORE_MODEL_IN_DB: "True" + UI_USERNAME: ${UI_USERNAME:-fuzzforge} + UI_PASSWORD: ${UI_PASSWORD:-fuzzforge123} + OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 + OTEL_EXPORTER_OTLP_PROTOCOL: grpc + ANTHROPIC_API_KEY: ${LITELLM_ANTHROPIC_API_KEY:-} + OPENAI_API_KEY: ${LITELLM_OPENAI_API_KEY:-} + command: + - "--config" + - "/etc/litellm/proxy_config.yaml" + ports: + - "10999:4000" # Web UI + OpenAI-compatible API + volumes: + - litellm_proxy_data:/var/lib/litellm + - ./volumes/litellm/proxy_config.yaml:/etc/litellm/proxy_config.yaml:ro + networks: + - fuzzforge-network + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 http://localhost:4000/health/liveliness || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + restart: unless-stopped + + otel-collector: + image: otel/opentelemetry-collector:latest + container_name: fuzzforge-otel-collector + command: ["--config=/etc/otel-collector/config.yaml"] + volumes: + - ./volumes/otel/collector-config.yaml:/etc/otel-collector/config.yaml:ro + ports: + - "4317:4317" + - "4318:4318" + networks: + - fuzzforge-network + restart: unless-stopped + + llm-proxy-db: + image: postgres:16 + container_name: fuzzforge-llm-proxy-db + environment: + POSTGRES_DB: litellm + POSTGRES_USER: litellm + POSTGRES_PASSWORD: litellm + healthcheck: + test: ["CMD-SHELL", "pg_isready -d litellm -U litellm"] + interval: 5s + timeout: 5s + retries: 12 + volumes: + - litellm_proxy_db:/var/lib/postgresql/data + networks: + - fuzzforge-network + restart: unless-stopped + + # ============================================================================ + # LLM Proxy Bootstrap - Seed providers and virtual keys + # ============================================================================ + llm-proxy-bootstrap: + image: python:3.11-slim + container_name: fuzzforge-llm-proxy-bootstrap + depends_on: + llm-proxy: + condition: service_started + env_file: + - ./volumes/env/.env + environment: + PROXY_BASE_URL: http://llm-proxy:4000 + ENV_FILE_PATH: /bootstrap/env/.env + UI_USERNAME: ${UI_USERNAME:-fuzzforge} + UI_PASSWORD: ${UI_PASSWORD:-fuzzforge123} + volumes: + - ./docker/scripts/bootstrap_llm_proxy.py:/app/bootstrap.py:ro + - ./volumes/env:/bootstrap/env + - litellm_proxy_data:/bootstrap/data + networks: + - fuzzforge-network + command: ["python", "/app/bootstrap.py"] + restart: "no" + # ============================================================================ # Vertical Worker: Rust/Native Security # ============================================================================ @@ -455,10 +552,11 @@ services: context: ./ai/agents/task_agent dockerfile: Dockerfile container_name: fuzzforge-task-agent + depends_on: + llm-proxy-bootstrap: + condition: service_completed_successfully ports: - "10900:8000" - env_file: - - ./volumes/env/.env environment: - PORT=8000 - PYTHONUNBUFFERED=1 @@ -555,6 +653,10 @@ volumes: name: fuzzforge_worker_ossfuzz_cache worker_ossfuzz_build: name: fuzzforge_worker_ossfuzz_build + litellm_proxy_data: + name: fuzzforge_litellm_proxy_data + litellm_proxy_db: + name: fuzzforge_litellm_proxy_db # Add more worker caches as you add verticals: # worker_web_cache: # worker_ios_cache: @@ -588,6 +690,7 @@ networks: # 4. Web UIs: # - Temporal UI: http://localhost:8233 # - MinIO Console: http://localhost:9001 (user: fuzzforge, pass: fuzzforge123) +# - LiteLLM Proxy: http://localhost:10999 # # 5. Resource Usage (Baseline): # - Temporal: ~500MB diff --git a/docker/scripts/bootstrap_llm_proxy.py b/docker/scripts/bootstrap_llm_proxy.py new file mode 100644 index 00000000..feb7d219 --- /dev/null +++ b/docker/scripts/bootstrap_llm_proxy.py @@ -0,0 +1,636 @@ +"""Bootstrap the LiteLLM proxy with provider secrets and default virtual keys. + +The bootstrapper runs as a one-shot container during docker-compose startup. +It performs the following actions: + + 1. Waits for the proxy health endpoint to respond. + 2. Collects upstream provider API keys from the shared .env file (plus any + legacy copies) and mirrors them into a proxy-specific env file + (volumes/env/.env.litellm) so only the proxy container can access them. + 3. Emits a default virtual key for the task agent by calling /key/generate, + persisting the generated token back into volumes/env/.env so the agent can + authenticate through the proxy instead of using raw provider secrets. + 4. Keeps the process idempotent: existing keys are reused and their allowed + model list is refreshed instead of issuing duplicates on every run. +""" + +from __future__ import annotations + +import json +import os +import sys +import time +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable, Mapping + +PROXY_BASE_URL = os.getenv("PROXY_BASE_URL", "http://llm-proxy:4000").rstrip("/") +ENV_FILE_PATH = Path(os.getenv("ENV_FILE_PATH", "/bootstrap/env/.env")) +LITELLM_ENV_FILE_PATH = Path( + os.getenv("LITELLM_ENV_FILE_PATH", "/bootstrap/env/.env.litellm") +) +LEGACY_ENV_FILE_PATH = Path( + os.getenv("LEGACY_ENV_FILE_PATH", "/bootstrap/env/.env.bifrost") +) +MAX_WAIT_SECONDS = int(os.getenv("LITELLM_PROXY_WAIT_SECONDS", "120")) + + +@dataclass(frozen=True) +class VirtualKeySpec: + """Configuration for a virtual key to be provisioned.""" + env_var: str + alias: str + user_id: str + budget_env_var: str + duration_env_var: str + default_budget: float + default_duration: str + + +# Multiple virtual keys for different services +VIRTUAL_KEYS: tuple[VirtualKeySpec, ...] = ( + VirtualKeySpec( + env_var="OPENAI_API_KEY", + alias="fuzzforge-cli", + user_id="fuzzforge-cli", + budget_env_var="CLI_BUDGET", + duration_env_var="CLI_DURATION", + default_budget=100.0, + default_duration="30d", + ), + VirtualKeySpec( + env_var="TASK_AGENT_API_KEY", + alias="fuzzforge-task-agent", + user_id="fuzzforge-task-agent", + budget_env_var="TASK_AGENT_BUDGET", + duration_env_var="TASK_AGENT_DURATION", + default_budget=25.0, + default_duration="30d", + ), + VirtualKeySpec( + env_var="COGNEE_API_KEY", + alias="fuzzforge-cognee", + user_id="fuzzforge-cognee", + budget_env_var="COGNEE_BUDGET", + duration_env_var="COGNEE_DURATION", + default_budget=50.0, + default_duration="30d", + ), +) + + +@dataclass(frozen=True) +class ProviderSpec: + name: str + litellm_env_var: str + alias_env_var: str + source_env_vars: tuple[str, ...] + + +# Support fresh LiteLLM variables while gracefully migrating legacy env +# aliases on first boot. +PROVIDERS: tuple[ProviderSpec, ...] = ( + ProviderSpec( + "openai", + "OPENAI_API_KEY", + "LITELLM_OPENAI_API_KEY", + ("LITELLM_OPENAI_API_KEY", "BIFROST_OPENAI_KEY"), + ), + ProviderSpec( + "anthropic", + "ANTHROPIC_API_KEY", + "LITELLM_ANTHROPIC_API_KEY", + ("LITELLM_ANTHROPIC_API_KEY", "BIFROST_ANTHROPIC_KEY"), + ), + ProviderSpec( + "gemini", + "GEMINI_API_KEY", + "LITELLM_GEMINI_API_KEY", + ("LITELLM_GEMINI_API_KEY", "BIFROST_GEMINI_KEY"), + ), + ProviderSpec( + "mistral", + "MISTRAL_API_KEY", + "LITELLM_MISTRAL_API_KEY", + ("LITELLM_MISTRAL_API_KEY", "BIFROST_MISTRAL_KEY"), + ), + ProviderSpec( + "openrouter", + "OPENROUTER_API_KEY", + "LITELLM_OPENROUTER_API_KEY", + ("LITELLM_OPENROUTER_API_KEY", "BIFROST_OPENROUTER_KEY"), + ), +) + +PROVIDER_LOOKUP: dict[str, ProviderSpec] = {spec.name: spec for spec in PROVIDERS} + + +def log(message: str) -> None: + print(f"[litellm-bootstrap] {message}", flush=True) + + +def read_lines(path: Path) -> list[str]: + if not path.exists(): + return [] + return path.read_text().splitlines() + + +def write_lines(path: Path, lines: Iterable[str]) -> None: + material = "\n".join(lines) + if material and not material.endswith("\n"): + material += "\n" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(material) + + +def read_env_file() -> list[str]: + if not ENV_FILE_PATH.exists(): + raise FileNotFoundError( + f"Expected env file at {ENV_FILE_PATH}. Copy volumes/env/.env.example first." + ) + return read_lines(ENV_FILE_PATH) + + +def write_env_file(lines: Iterable[str]) -> None: + write_lines(ENV_FILE_PATH, lines) + + +def read_litellm_env_file() -> list[str]: + return read_lines(LITELLM_ENV_FILE_PATH) + + +def write_litellm_env_file(lines: Iterable[str]) -> None: + write_lines(LITELLM_ENV_FILE_PATH, lines) + + +def read_legacy_env_file() -> Mapping[str, str]: + lines = read_lines(LEGACY_ENV_FILE_PATH) + return parse_env_lines(lines) + + +def set_env_value(lines: list[str], key: str, value: str) -> tuple[list[str], bool]: + prefix = f"{key}=" + new_line = f"{prefix}{value}" + for idx, line in enumerate(lines): + stripped = line.lstrip() + if not stripped or stripped.startswith("#"): + continue + if stripped.startswith(prefix): + if stripped == new_line: + return lines, False + indent = line[: len(line) - len(stripped)] + lines[idx] = f"{indent}{new_line}" + return lines, True + lines.append(new_line) + return lines, True + + +def parse_env_lines(lines: list[str]) -> dict[str, str]: + mapping: dict[str, str] = {} + for raw_line in lines: + stripped = raw_line.strip() + if not stripped or stripped.startswith("#") or "=" not in stripped: + continue + key, value = stripped.split("=", 1) + mapping[key] = value + return mapping + + +def wait_for_proxy() -> None: + health_paths = ("/health/liveliness", "/health", "/") + deadline = time.time() + MAX_WAIT_SECONDS + attempt = 0 + while time.time() < deadline: + attempt += 1 + for path in health_paths: + url = f"{PROXY_BASE_URL}{path}" + try: + with urllib.request.urlopen(url) as response: # noqa: S310 + if response.status < 400: + log(f"Proxy responded on {path} (attempt {attempt})") + return + except urllib.error.URLError as exc: + log(f"Proxy not ready yet ({path}): {exc}") + time.sleep(3) + raise TimeoutError(f"Timed out waiting for proxy at {PROXY_BASE_URL}") + + +def request_json( + path: str, + *, + method: str = "GET", + payload: Mapping[str, object] | None = None, + auth_token: str | None = None, +) -> tuple[int, str]: + url = f"{PROXY_BASE_URL}{path}" + data = None + headers = {"Accept": "application/json"} + if auth_token: + headers["Authorization"] = f"Bearer {auth_token}" + if payload is not None: + data = json.dumps(payload).encode("utf-8") + headers["Content-Type"] = "application/json" + request = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(request) as response: # noqa: S310 + body = response.read().decode("utf-8") + return response.status, body + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8") + return exc.code, body + + +def get_master_key(env_map: Mapping[str, str]) -> str: + candidate = os.getenv("LITELLM_MASTER_KEY") or env_map.get("LITELLM_MASTER_KEY") + if not candidate: + raise RuntimeError( + "LITELLM_MASTER_KEY is not set. Add it to volumes/env/.env before starting Docker." + ) + value = candidate.strip() + if not value: + raise RuntimeError( + "LITELLM_MASTER_KEY is blank. Provide a non-empty value in the env file." + ) + return value + + +def gather_provider_keys( + env_lines: list[str], + env_map: dict[str, str], + legacy_map: Mapping[str, str], +) -> tuple[dict[str, str], list[str], bool]: + updated_lines = list(env_lines) + discovered: dict[str, str] = {} + changed = False + + for spec in PROVIDERS: + value: str | None = None + for source_var in spec.source_env_vars: + candidate = env_map.get(source_var) or legacy_map.get(source_var) or os.getenv( + source_var + ) + if not candidate: + continue + stripped = candidate.strip() + if stripped: + value = stripped + break + if not value: + continue + + discovered[spec.litellm_env_var] = value + updated_lines, alias_changed = set_env_value( + updated_lines, spec.alias_env_var, value + ) + if alias_changed: + env_map[spec.alias_env_var] = value + changed = True + + return discovered, updated_lines, changed + + +def ensure_litellm_env(provider_values: Mapping[str, str]) -> None: + if not provider_values: + log("No provider secrets discovered; skipping LiteLLM env update") + return + lines = read_litellm_env_file() + updated_lines = list(lines) + changed = False + for env_var, value in provider_values.items(): + updated_lines, var_changed = set_env_value(updated_lines, env_var, value) + if var_changed: + changed = True + if changed or not lines: + write_litellm_env_file(updated_lines) + log(f"Wrote provider secrets to {LITELLM_ENV_FILE_PATH}") + + +def current_env_key(env_map: Mapping[str, str], env_var: str) -> str | None: + candidate = os.getenv(env_var) or env_map.get(env_var) + if not candidate: + return None + value = candidate.strip() + if not value or value.startswith("sk-proxy-"): + return None + return value + + +def collect_default_models(env_map: Mapping[str, str]) -> list[str]: + explicit = ( + os.getenv("LITELLM_DEFAULT_MODELS") + or env_map.get("LITELLM_DEFAULT_MODELS") + or "" + ) + models: list[str] = [] + if explicit: + models.extend( + model.strip() + for model in explicit.split(",") + if model.strip() + ) + if models: + return sorted(dict.fromkeys(models)) + + configured_model = ( + os.getenv("LITELLM_MODEL") or env_map.get("LITELLM_MODEL") or "" + ).strip() + configured_provider = ( + os.getenv("LITELLM_PROVIDER") or env_map.get("LITELLM_PROVIDER") or "" + ).strip() + + if configured_model: + if "/" in configured_model: + models.append(configured_model) + elif configured_provider: + models.append(f"{configured_provider}/{configured_model}") + else: + log( + "LITELLM_MODEL is set without a provider; configure LITELLM_PROVIDER or " + "use the provider/model format (e.g. openai/gpt-4o-mini)." + ) + elif configured_provider: + log( + "LITELLM_PROVIDER configured without a default model. Bootstrap will issue an " + "unrestricted virtual key allowing any proxy-registered model." + ) + + return sorted(dict.fromkeys(models)) + + +def fetch_existing_key_record(master_key: str, key_value: str) -> Mapping[str, object] | None: + encoded = urllib.parse.quote_plus(key_value) + status, body = request_json(f"/key/info?key={encoded}", auth_token=master_key) + if status != 200: + log(f"Key lookup failed ({status}); treating OPENAI_API_KEY as new") + return None + try: + data = json.loads(body) + except json.JSONDecodeError: + log("Key info response was not valid JSON; ignoring") + return None + if isinstance(data, Mapping) and data.get("key"): + return data + return None + + +def fetch_key_by_alias(master_key: str, alias: str) -> str | None: + """Fetch existing key value by alias from LiteLLM proxy.""" + status, body = request_json("/key/info", auth_token=master_key) + if status != 200: + return None + try: + data = json.loads(body) + except json.JSONDecodeError: + return None + if isinstance(data, dict) and "keys" in data: + for key_info in data.get("keys", []): + if isinstance(key_info, dict) and key_info.get("key_alias") == alias: + return str(key_info.get("key", "")).strip() or None + return None + + +def generate_virtual_key( + master_key: str, + models: list[str], + spec: VirtualKeySpec, + env_map: Mapping[str, str], +) -> str: + budget_str = os.getenv(spec.budget_env_var) or env_map.get(spec.budget_env_var) or str(spec.default_budget) + try: + budget = float(budget_str) + except ValueError: + budget = spec.default_budget + + duration = os.getenv(spec.duration_env_var) or env_map.get(spec.duration_env_var) or spec.default_duration + + payload: dict[str, object] = { + "key_alias": spec.alias, + "user_id": spec.user_id, + "duration": duration, + "max_budget": budget, + "metadata": { + "provisioned_by": "bootstrap", + "service": spec.alias, + "default_models": models, + }, + "key_type": "llm_api", + } + if models: + payload["models"] = models + status, body = request_json( + "/key/generate", method="POST", payload=payload, auth_token=master_key + ) + if status == 400 and "already exists" in body.lower(): + # Key alias already exists but .env is out of sync (e.g., after docker prune) + # Delete the old key and regenerate + log(f"Key alias '{spec.alias}' already exists in database but not in .env; deleting and regenerating") + # Try to delete by alias using POST /key/delete with key_aliases array + delete_payload = {"key_aliases": [spec.alias]} + delete_status, delete_body = request_json( + "/key/delete", method="POST", payload=delete_payload, auth_token=master_key + ) + if delete_status not in {200, 201}: + log(f"Warning: Could not delete existing key alias {spec.alias} ({delete_status}): {delete_body}") + # Continue anyway and try to generate + else: + log(f"Deleted existing key alias {spec.alias}") + + # Retry generation + status, body = request_json( + "/key/generate", method="POST", payload=payload, auth_token=master_key + ) + if status not in {200, 201}: + raise RuntimeError(f"Failed to generate virtual key for {spec.alias} ({status}): {body}") + try: + data = json.loads(body) + except json.JSONDecodeError as exc: + raise RuntimeError(f"Virtual key response for {spec.alias} was not valid JSON") from exc + if isinstance(data, Mapping): + key_value = str(data.get("key") or data.get("token") or "").strip() + if key_value: + log(f"Generated new LiteLLM virtual key for {spec.alias} (budget: ${budget}, duration: {duration})") + return key_value + raise RuntimeError(f"Virtual key response for {spec.alias} did not include a key field") + + +def update_virtual_key( + master_key: str, + key_value: str, + models: list[str], + spec: VirtualKeySpec, +) -> None: + if not models: + return + payload: dict[str, object] = { + "key": key_value, + "models": models, + } + status, body = request_json( + "/key/update", method="POST", payload=payload, auth_token=master_key + ) + if status != 200: + log(f"Virtual key update for {spec.alias} skipped ({status}): {body}") + else: + log(f"Refreshed allowed models for {spec.alias}") + + +def persist_key_to_env(new_key: str, env_var: str) -> None: + lines = read_env_file() + updated_lines, changed = set_env_value(lines, env_var, new_key) + # Always update the environment variable, even if file wasn't changed + os.environ[env_var] = new_key + if changed: + write_env_file(updated_lines) + log(f"Persisted {env_var} to {ENV_FILE_PATH}") + else: + log(f"{env_var} already up-to-date in env file") + + +def ensure_virtual_key( + master_key: str, + models: list[str], + env_map: Mapping[str, str], + spec: VirtualKeySpec, +) -> str: + allowed_models: list[str] = [] + sync_flag = os.getenv("LITELLM_SYNC_VIRTUAL_KEY_MODELS", "").strip().lower() + if models and (sync_flag in {"1", "true", "yes", "on"} or models == ["*"]): + allowed_models = models + existing_key = current_env_key(env_map, spec.env_var) + if existing_key: + record = fetch_existing_key_record(master_key, existing_key) + if record: + log(f"Reusing existing LiteLLM virtual key for {spec.alias}") + if allowed_models: + update_virtual_key(master_key, existing_key, allowed_models, spec) + return existing_key + log(f"Existing {spec.env_var} not registered with proxy; generating new key") + + new_key = generate_virtual_key(master_key, models, spec, env_map) + if allowed_models: + update_virtual_key(master_key, new_key, allowed_models, spec) + return new_key + + +def _split_model_identifier(model: str) -> tuple[str | None, str]: + if "/" in model: + provider, short_name = model.split("/", 1) + return provider.lower().strip() or None, short_name.strip() + return None, model.strip() + + +def ensure_models_registered( + master_key: str, + models: list[str], + env_map: Mapping[str, str], +) -> None: + if not models: + return + for model in models: + provider, short_name = _split_model_identifier(model) + if not provider or not short_name: + log(f"Skipping model '{model}' (no provider segment)") + continue + spec = PROVIDER_LOOKUP.get(provider) + if not spec: + log(f"No provider spec registered for '{provider}'; skipping model '{model}'") + continue + provider_secret = ( + env_map.get(spec.alias_env_var) + or env_map.get(spec.litellm_env_var) + or os.getenv(spec.alias_env_var) + or os.getenv(spec.litellm_env_var) + ) + if not provider_secret: + log( + f"Provider secret for '{provider}' not found; skipping model registration" + ) + continue + + api_key_reference = f"os.environ/{spec.alias_env_var}" + payload: dict[str, object] = { + "model_name": model, + "litellm_params": { + "model": short_name, + "custom_llm_provider": provider, + "api_key": api_key_reference, + }, + "model_info": { + "provider": provider, + "description": "Auto-registered during bootstrap", + }, + } + + status, body = request_json( + "/model/new", method="POST", payload=payload, auth_token=master_key + ) + if status in {200, 201}: + log(f"Registered LiteLLM model '{model}'") + continue + try: + data = json.loads(body) + except json.JSONDecodeError: + data = body + error_message = ( + data.get("error") if isinstance(data, Mapping) else str(data) + ) + if status == 409 or ( + isinstance(error_message, str) + and "already" in error_message.lower() + ): + log(f"Model '{model}' already present; skipping") + continue + log(f"Failed to register model '{model}' ({status}): {error_message}") + + +def main() -> int: + log("Bootstrapping LiteLLM proxy") + try: + wait_for_proxy() + env_lines = read_env_file() + env_map = parse_env_lines(env_lines) + legacy_map = read_legacy_env_file() + master_key = get_master_key(env_map) + + provider_values, updated_env_lines, env_changed = gather_provider_keys( + env_lines, env_map, legacy_map + ) + if env_changed: + write_env_file(updated_env_lines) + env_map = parse_env_lines(updated_env_lines) + log("Updated LiteLLM provider aliases in shared env file") + + ensure_litellm_env(provider_values) + + models = collect_default_models(env_map) + if models: + log("Default models for virtual keys: %s" % ", ".join(models)) + models_for_key = models + else: + log( + "No default models configured; provisioning virtual keys without model " + "restrictions (model-agnostic)." + ) + models_for_key = ["*"] + + # Generate virtual keys for each service + for spec in VIRTUAL_KEYS: + virtual_key = ensure_virtual_key(master_key, models_for_key, env_map, spec) + persist_key_to_env(virtual_key, spec.env_var) + + # Register models if any were specified + if models: + ensure_models_registered(master_key, models, env_map) + + log("Bootstrap complete") + return 0 + except Exception as exc: # pragma: no cover - startup failure reported to logs + log(f"Bootstrap failed: {exc}") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/docs/how-to/litellm-hot-swap.md b/docs/docs/how-to/litellm-hot-swap.md new file mode 100644 index 00000000..8c1d138f --- /dev/null +++ b/docs/docs/how-to/litellm-hot-swap.md @@ -0,0 +1,179 @@ +--- +title: "Hot-Swap LiteLLM Models" +description: "Register OpenAI and Anthropic models with the bundled LiteLLM proxy and switch them on the task agent without downtime." +--- + +LiteLLM sits between the task agent and upstream providers, so every model change +is just an API call. This guide walks through registering OpenAI and Anthropic +models, updating the virtual key, and exercising the A2A hot-swap flow. + +## Prerequisites + +- `docker compose up llm-proxy llm-proxy-db task-agent` +- Provider secrets in `volumes/env/.env`: + - `LITELLM_OPENAI_API_KEY` + - `LITELLM_ANTHROPIC_API_KEY` +- Master key (`LITELLM_MASTER_KEY`) and task-agent virtual key (auto-generated + during bootstrap) + +> UI access uses `UI_USERNAME` / `UI_PASSWORD` (defaults: `fuzzforge` / +> `fuzzforge123`). Change them by exporting new values before running compose. + +## Register Provider Models + +Use the admin API to register the models the proxy should expose. The snippet +below creates aliases for OpenAI `gpt-5`, `gpt-5-mini`, and Anthropic +`claude-sonnet-4-5`. + +```bash +MASTER_KEY=$(awk -F= '$1=="LITELLM_MASTER_KEY"{print $2}' volumes/env/.env) +export OPENAI_API_KEY=$(awk -F= '$1=="OPENAI_API_KEY"{print $2}' volumes/env/.env) +python - <<'PY' +import os, requests +master = os.environ['MASTER_KEY'].strip() +base = 'http://localhost:10999' +models = [ + { + "model_name": "openai/gpt-5", + "litellm_params": { + "model": "gpt-5", + "custom_llm_provider": "openai", + "api_key": "os.environ/LITELLM_OPENAI_API_KEY" + }, + "model_info": { + "provider": "openai", + "description": "OpenAI GPT-5" + } + }, + { + "model_name": "openai/gpt-5-mini", + "litellm_params": { + "model": "gpt-5-mini", + "custom_llm_provider": "openai", + "api_key": "os.environ/LITELLM_OPENAI_API_KEY" + }, + "model_info": { + "provider": "openai", + "description": "OpenAI GPT-5 mini" + } + }, + { + "model_name": "anthropic/claude-sonnet-4-5", + "litellm_params": { + "model": "claude-sonnet-4-5", + "custom_llm_provider": "anthropic", + "api_key": "os.environ/LITELLM_ANTHROPIC_API_KEY" + }, + "model_info": { + "provider": "anthropic", + "description": "Anthropic Claude Sonnet 4.5" + } + } +] +for payload in models: + resp = requests.post( + f"{base}/model/new", + headers={"Authorization": f"Bearer {master}", "Content-Type": "application/json"}, + json=payload, + timeout=60, + ) + if resp.status_code not in (200, 201, 409): + raise SystemExit(f"Failed to register {payload['model_name']}: {resp.status_code} {resp.text}") + print(payload['model_name'], '=>', resp.status_code) +PY +``` + +Each entry stores the upstream secret by reference (`os.environ/...`) so the +raw API key never leaves the container environment. + +## Relax Virtual Key Model Restrictions + +Let the agent key call every model on the proxy: + +```bash +MASTER_KEY=$(awk -F= '$1=="LITELLM_MASTER_KEY"{print $2}' volumes/env/.env) +VK=$(awk -F= '$1=="OPENAI_API_KEY"{print $2}' volumes/env/.env) +python - <<'PY' +import os, requests, json +resp = requests.post( + 'http://localhost:10999/key/update', + headers={ + 'Authorization': f"Bearer {os.environ['MASTER_KEY'].strip()}", + 'Content-Type': 'application/json' + }, + json={'key': os.environ['VK'].strip(), 'models': []}, + timeout=60, +) +print(json.dumps(resp.json(), indent=2)) +PY +``` + +Restart the task agent so it sees the refreshed key: + +```bash +docker compose restart task-agent +``` + +## Hot-Swap With The A2A Helper + +Switch models without restarting the service: + +```bash +# Ensure the CLI reads the latest virtual key +export OPENAI_API_KEY=$(awk -F= '$1=="OPENAI_API_KEY"{print $2}' volumes/env/.env) + +# OpenAI gpt-5 alias +python ai/agents/task_agent/a2a_hot_swap.py \ + --url http://localhost:10900/a2a/litellm_agent \ + --model openai gpt-5 \ + --context switch-demo + +# Confirm the response comes from the new model +python ai/agents/task_agent/a2a_hot_swap.py \ + --url http://localhost:10900/a2a/litellm_agent \ + --message "Which model am I using?" \ + --context switch-demo + +# Swap to gpt-5-mini +python ai/agents/task_agent/a2a_hot_swap.py --url http://localhost:10900/a2a/litellm_agent --model openai gpt-5-mini --context switch-demo + +# Swap to Anthropic Claude Sonnet 4.5 +python ai/agents/task_agent/a2a_hot_swap.py --url http://localhost:10900/a2a/litellm_agent --model anthropic claude-sonnet-4-5 --context switch-demo +``` + +> Each invocation reuses the same conversation context (`switch-demo`) so you +> can confirm the active provider by asking follow-up questions. + +## Resetting The Proxy (Optional) + +To wipe the LiteLLM state and rerun bootstrap: + +```bash +docker compose down llm-proxy llm-proxy-db llm-proxy-bootstrap + +docker volume rm fuzzforge_litellm_proxy_data fuzzforge_litellm_proxy_db + +docker compose up -d llm-proxy-db llm-proxy +``` + +After the proxy is healthy, rerun the registration script and key update. The +bootstrap container mirrors secrets into `.env.litellm` and reissues the task +agent key automatically. + +## How The Pieces Fit Together + +1. **LiteLLM Proxy** exposes OpenAI-compatible routes and stores provider + metadata in Postgres. +2. **Bootstrap Container** waits for `/health/liveliness`, mirrors secrets into + `.env.litellm`, registers any models you script, and keeps the virtual key in + sync with the discovered model list. +3. **Task Agent** calls the proxy via `FF_LLM_PROXY_BASE_URL`. The hot-swap tool + updates the agent’s runtime configuration, so switching providers is just a + control message. +4. **Virtual Keys** carry quotas and allowed models. Setting the `models` array + to `[]` lets the key use anything registered on the proxy. + +Keep the master key and generated virtual keys somewhere safe—they grant full +admin and agent access respectively. When you add a new provider (e.g., Ollama) +just register the model via `/model/new`, update the key if needed, and repeat +the hot-swap steps. diff --git a/docs/docs/how-to/llm-proxy.md b/docs/docs/how-to/llm-proxy.md new file mode 100644 index 00000000..08925697 --- /dev/null +++ b/docs/docs/how-to/llm-proxy.md @@ -0,0 +1,194 @@ +--- +title: "Run the LLM Proxy" +description: "Run the LiteLLM gateway that ships with FuzzForge and connect it to the task agent." +--- + +## Overview + +FuzzForge routes every LLM request through a LiteLLM proxy so that usage can be +metered, priced, and rate limited per user. Docker Compose starts the proxy in a +hardened container, while a bootstrap job seeds upstream provider secrets and +issues a virtual key for the task agent automatically. + +LiteLLM exposes the OpenAI-compatible APIs (`/v1/*`) plus a rich admin UI. All +traffic stays on your network and upstream credentials never leave the proxy +container. + +## Before You Start + +1. Copy `volumes/env/.env.example` to `volumes/env/.env` and set the basics: + - `LITELLM_MASTER_KEY` — admin token used to manage the proxy + - `LITELLM_SALT_KEY` — random string used to encrypt provider credentials + - Provider secrets under `LITELLM__API_KEY` (for example + `LITELLM_OPENAI_API_KEY`) + - Leave `OPENAI_API_KEY=sk-proxy-default`; the bootstrap job replaces it with a + LiteLLM-issued virtual key +2. When running tools outside Docker, change `FF_LLM_PROXY_BASE_URL` to the + published host port (`http://localhost:10999`). Inside Docker the default + value `http://llm-proxy:4000` already resolves to the container. + +## Start the Proxy + +```bash +docker compose up llm-proxy +``` + +The service publishes two things: + +- HTTP API + admin UI on `http://localhost:10999` +- Persistent SQLite state inside the named volume + `fuzzforge_litellm_proxy_data` + +The UI login uses the `UI_USERNAME` / `UI_PASSWORD` pair (defaults to +`fuzzforge` / `fuzzforge123`). To change them, set the environment variables +before you run `docker compose up`: + +```bash +export UI_USERNAME=myadmin +export UI_PASSWORD=super-secret +docker compose up llm-proxy +``` + +You can also edit the values directly in `docker-compose.yml` if you prefer to +check them into a different secrets manager. + +Proxy-wide settings now live in `volumes/litellm/proxy_config.yaml`. By +default it enables `store_model_in_db` and `store_prompts_in_spend_logs`, which +lets the UI display request/response payloads for new calls. Update this file +if you need additional LiteLLM options and restart the `llm-proxy` container. + +LiteLLM's health endpoint lives at `/health/liveliness`. You can verify it from +another terminal: + +```bash +curl http://localhost:10999/health/liveliness +``` + +## What the Bootstrapper Does + +During startup the `llm-proxy-bootstrap` container performs three actions: + +1. **Wait for the proxy** — Blocks until `/health/liveliness` becomes healthy. +2. **Mirror provider secrets** — Reads `volumes/env/.env` and writes any + `LITELLM_*_API_KEY` values into `volumes/env/.env.litellm`. The file is + created automatically on first boot; if you delete it, bootstrap will + recreate it and the proxy continues to read secrets from `.env`. +3. **Issue the default virtual key** — Calls `/key/generate` with the master key + and persists the generated token back into `volumes/env/.env` (replacing the + `sk-proxy-default` placeholder). The key is scoped to + `LITELLM_DEFAULT_MODELS` when that variable is set; otherwise it uses the + model from `LITELLM_MODEL`. + +The sequence is idempotent. Existing provider secrets and virtual keys are +reused on subsequent runs, and the allowed-model list is refreshed via +`/key/update` if you change the defaults. + +## Managing Virtual Keys + +LiteLLM keys act as per-user credentials. The default key, named +`task-agent default`, is created automatically for the task agent. You can issue +more keys for teammates or CI jobs with the same management API: + +```bash +curl http://localhost:10999/key/generate \ + -H "Authorization: Bearer $LITELLM_MASTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "key_alias": "demo-user", + "user_id": "demo", + "models": ["openai/gpt-4o-mini"], + "duration": "30d", + "max_budget": 50, + "metadata": {"team": "sandbox"} + }' +``` + +Use `/key/update` to adjust budgets or the allowed-model list on existing keys: + +```bash +curl http://localhost:10999/key/update \ + -H "Authorization: Bearer $LITELLM_MASTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "key": "sk-...", + "models": ["openai/*", "anthropic/*"], + "max_budget": 100 + }' +``` + +The admin UI (navigate to `http://localhost:10999/ui`) provides equivalent +controls for creating keys, routing models, auditing spend, and exporting logs. + +## Wiring the Task Agent + +The task agent already expects to talk to the proxy. Confirm these values in +`volumes/env/.env` before launching the stack: + +```bash +FF_LLM_PROXY_BASE_URL=http://llm-proxy:4000 # or http://localhost:10999 when outside Docker +OPENAI_API_KEY= +LITELLM_MODEL=openai/gpt-5 +LITELLM_PROVIDER=openai +``` + +Restart the agent container after changing environment variables so the process +picks up the updates. + +To validate the integration end to end, call the proxy directly: + +```bash +curl -X POST http://localhost:10999/v1/chat/completions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openai/gpt-4o-mini", + "messages": [{"role": "user", "content": "Proxy health check"}] + }' +``` + +A JSON response indicates the proxy can reach your upstream provider using the +mirrored secrets. + +## Local Runtimes (Ollama, etc.) + +LiteLLM supports non-hosted providers as well. To route requests to a local +runtime such as Ollama: + +1. Set the appropriate provider key in the env file + (for Ollama, point LiteLLM at `OLLAMA_API_BASE` inside the container). +2. Add the passthrough model either from the UI (**Models → Add Model**) or + by calling `/model/new` with the master key. +3. Update `LITELLM_DEFAULT_MODELS` (and regenerate the virtual key if you want +the default key to include it). + +The task agent keeps using the same OpenAI-compatible surface while LiteLLM +handles the translation to your runtime. + +## Next Steps + +- Explore [LiteLLM's documentation](https://docs.litellm.ai/docs/simple_proxy) + for advanced routing, cost controls, and observability hooks. +- Configure Slack/Prometheus integrations from the UI to monitor usage. +- Rotate the master key periodically and store it in your secrets manager, as it + grants full admin access to the proxy. + +## Observability + +LiteLLM ships with OpenTelemetry hooks for traces and metrics. This repository +already includes an OTLP collector (`otel-collector` service) and mounts a +default configuration that forwards traces to standard output. To wire it up: + +1. Edit `volumes/otel/collector-config.yaml` if you want to forward to Jaeger, + Datadog, etc. The initial config uses the logging exporter so you can see + spans immediately via `docker compose logs -f otel-collector`. +2. Customize `volumes/litellm/proxy_config.yaml` if you need additional + callbacks; `general_settings.otel: true` and `litellm_settings.callbacks: + ["otel"]` are already present so no extra code changes are required. +3. (Optional) Override `OTEL_EXPORTER_OTLP_*` environment variables in + `docker-compose.yml` or your shell to point at a remote collector. + +After updating the configs, run `docker compose up -d otel-collector llm-proxy` +and generate a request (for example, trigger `ff workflow run llm_analysis`). +New traces will show up in the collector logs or whichever backend you +configured. See the official LiteLLM guide for advanced exporter options: +https://docs.litellm.ai/docs/observability/opentelemetry_integration. diff --git a/volumes/env/.env.example b/volumes/env/.env.example index 4be30b94..ad5f1ed1 100644 --- a/volumes/env/.env.example +++ b/volumes/env/.env.example @@ -1,17 +1,40 @@ # FuzzForge Agent Configuration -# Copy this to .env and configure your API keys +# Copy this to .env and configure your API keys and proxy settings -# LiteLLM Model Configuration -LITELLM_MODEL=gemini/gemini-2.0-flash-001 -# LITELLM_PROVIDER=gemini +# LiteLLM Model Configuration (default routed through the proxy) +LITELLM_MODEL=openai/gpt-5 +LITELLM_PROVIDER=openai +# Leave empty to let bootstrap mirror the LiteLLM model list dynamically. +LITELLM_DEFAULT_MODELS= -# API Keys (uncomment and configure as needed) -# GOOGLE_API_KEY= -# OPENAI_API_KEY= -# ANTHROPIC_API_KEY= -# OPENROUTER_API_KEY= -# MISTRAL_API_KEY= +# Proxy configuration +# Base URL is used by the task agent to talk to the proxy container inside Docker. +# When running everything locally without Docker networking, replace with http://localhost:10999. +FF_LLM_PROXY_BASE_URL=http://llm-proxy:4000 -# Agent Configuration +# Virtual key placeholder. The bootstrap job replaces this with a LiteLLM +# proxy-issued key on startup so the task agent authenticates via the gateway. +OPENAI_API_KEY=sk-proxy-default + +# LiteLLM proxy configuration +LITELLM_MASTER_KEY=sk-master-key +LITELLM_SALT_KEY=choose-a-random-string +# LiteLLM UI login (defaults to admin/fuzzforge123 if not overridden) +UI_USERNAME=fuzzforge +UI_PASSWORD=fuzzforge123 +# Optional: override OTEL exporter endpoint if using a remote collector +# OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317 +# LITELLM_DEFAULT_KEY_BUDGET=25 +# LITELLM_DEFAULT_KEY_DURATION=7d + +# Upstream provider secrets (ingested by the proxy only). The bootstrapper copies +# these into volumes/env/.env.litellm so other containers never see the raw keys. +# LITELLM_OPENAI_API_KEY= +# LITELLM_ANTHROPIC_API_KEY= +# LITELLM_GEMINI_API_KEY= +# LITELLM_MISTRAL_API_KEY= +# LITELLM_OPENROUTER_API_KEY= + +# Agent behaviour # DEFAULT_TIMEOUT=120 # DEFAULT_CONTEXT_ID=default diff --git a/volumes/env/.env.template b/volumes/env/.env.template new file mode 100644 index 00000000..09ccca6e --- /dev/null +++ b/volumes/env/.env.template @@ -0,0 +1,65 @@ +# ============================================================================= +# FuzzForge LiteLLM Proxy Configuration +# ============================================================================= +# Copy this file to .env and fill in your API keys +# Bootstrap will automatically create virtual keys for each service +# ============================================================================= + +# LiteLLM Proxy Internal Configuration +# ----------------------------------------------------------------------------- +FF_LLM_PROXY_BASE_URL=http://llm-proxy:4000 +LITELLM_MASTER_KEY=sk-master-test +LITELLM_SALT_KEY=super-secret-salt + +# Default Models (comma-separated, leave empty for model-agnostic access) +# ----------------------------------------------------------------------------- +# Examples: +# openai/gpt-5-mini,openai/text-embedding-3-large +# anthropic/claude-sonnet-4-5-20250929,openai/gpt-5-mini +# (empty = unrestricted access to all registered models) +LITELLM_DEFAULT_MODELS= + +# Upstream Provider API Keys +# ----------------------------------------------------------------------------- +# Add your real provider keys here - these are used by the proxy to call LLM providers +LITELLM_OPENAI_API_KEY=your-openai-key-here +LITELLM_ANTHROPIC_API_KEY=your-anthropic-key-here +LITELLM_GEMINI_API_KEY= +LITELLM_MISTRAL_API_KEY= +LITELLM_OPENROUTER_API_KEY= + +# Virtual Keys Budget & Duration Configuration +# ----------------------------------------------------------------------------- +# These control the budget and duration for auto-generated virtual keys +# Task Agent Key - used by task-agent service for A2A LiteLLM calls +TASK_AGENT_BUDGET=25.0 +TASK_AGENT_DURATION=30d + +# Cognee Key - used by Cognee for knowledge graph ingestion and queries +COGNEE_BUDGET=50.0 +COGNEE_DURATION=30d + +# General CLI/SDK Key - used by ff CLI and fuzzforge-sdk +CLI_BUDGET=100.0 +CLI_DURATION=30d + +# Virtual Keys (auto-generated by bootstrap - leave blank) +# ----------------------------------------------------------------------------- +TASK_AGENT_API_KEY= +COGNEE_API_KEY= +OPENAI_API_KEY= + +# LiteLLM Proxy Client Configuration +# ----------------------------------------------------------------------------- +# For CLI and SDK usage (Cognee, ff ingest, etc.) +LITELLM_PROXY_API_BASE=http://localhost:10999 +LLM_ENDPOINT=http://localhost:10999 +LLM_PROVIDER=openai +LLM_MODEL=litellm_proxy/gpt-5-mini +LLM_API_BASE=http://localhost:10999 +LLM_EMBEDDING_MODEL=litellm_proxy/text-embedding-3-large + +# UI Access +# ----------------------------------------------------------------------------- +UI_USERNAME=fuzzforge +UI_PASSWORD=fuzzforge123 diff --git a/volumes/env/README.md b/volumes/env/README.md index c53f1842..7df65e8b 100644 --- a/volumes/env/README.md +++ b/volumes/env/README.md @@ -1,22 +1,89 @@ -# FuzzForge Environment Configuration +# FuzzForge LiteLLM Proxy Configuration -This directory contains environment files that are mounted into Docker containers. +This directory contains configuration for the LiteLLM proxy with model-agnostic virtual keys. + +## Quick Start (Fresh Clone) + +### 1. Create Your `.env` File + +```bash +cp .env.template .env +``` + +### 2. Add Your Provider API Keys + +Edit `.env` and add your **real** API keys: + +```bash +LITELLM_OPENAI_API_KEY=sk-proj-YOUR-OPENAI-KEY-HERE +LITELLM_ANTHROPIC_API_KEY=sk-ant-api03-YOUR-ANTHROPIC-KEY-HERE +``` + +### 3. Start Services + +```bash +cd ../.. # Back to repo root +COMPOSE_PROFILES=secrets docker compose up -d +``` + +Bootstrap will automatically: +- Generate 3 virtual keys with individual budgets +- Write them to your `.env` file +- No model restrictions (model-agnostic) ## Files -- `.env.example` - Template configuration file -- `.env` - Your actual configuration (create by copying .env.example) +- **`.env.template`** - Clean template (checked into git) +- **`.env`** - Your real keys (git ignored, you create this) +- **`.env.example`** - Legacy example + +## Virtual Keys (Auto-Generated) + +Bootstrap creates 3 keys with budget controls: + +| Key | Budget | Duration | Used By | +|-----|--------|----------|---------| +| `OPENAI_API_KEY` | $100 | 30 days | CLI, SDK | +| `TASK_AGENT_API_KEY` | $25 | 30 days | Task Agent | +| `COGNEE_API_KEY` | $50 | 30 days | Cognee | + +All keys are **model-agnostic** by default (no restrictions). + +## Using Models + +Registered models in `volumes/litellm/proxy_config.yaml`: +- `gpt-5-mini` → `openai/gpt-5-mini` +- `claude-sonnet-4-5` → `anthropic/claude-sonnet-4-5-20250929` +- `text-embedding-3-large` → `openai/text-embedding-3-large` + +### Use Registered Aliases: + +```bash +fuzzforge workflow run llm_secret_detection . -n llm_model=gpt-5-mini +fuzzforge workflow run llm_secret_detection . -n llm_model=claude-sonnet-4-5 +``` + +### Use Any Model (Direct): + +```bash +# Works without registering first! +fuzzforge workflow run llm_secret_detection . -n llm_model=openai/gpt-5-nano +``` + +## Proxy UI + +http://localhost:10999/ui +- User: `fuzzforge` / Pass: `fuzzforge123` -## Usage +## Troubleshooting -1. Copy the example file: - ```bash - cp .env.example .env - ``` +```bash +# Check bootstrap logs +docker compose logs llm-proxy-bootstrap -2. Edit `.env` and add your API keys +# Verify keys generated +grep "API_KEY=" .env | grep -v "^#" | grep -v "your-" -3. Restart Docker containers to apply changes: - ```bash - docker-compose restart - ``` +# Restart services +docker compose restart llm-proxy task-agent +``` diff --git a/volumes/litellm/proxy_config.yaml b/volumes/litellm/proxy_config.yaml new file mode 100644 index 00000000..297abfab --- /dev/null +++ b/volumes/litellm/proxy_config.yaml @@ -0,0 +1,26 @@ +general_settings: + master_key: os.environ/LITELLM_MASTER_KEY + database_url: os.environ/DATABASE_URL + store_model_in_db: true + store_prompts_in_spend_logs: true + otel: true + +litellm_settings: + callbacks: + - "otel" + +model_list: + - model_name: claude-sonnet-4-5 + litellm_params: + model: anthropic/claude-sonnet-4-5-20250929 + api_key: os.environ/ANTHROPIC_API_KEY + + - model_name: gpt-5-mini + litellm_params: + model: openai/gpt-5-mini + api_key: os.environ/LITELLM_OPENAI_API_KEY + + - model_name: text-embedding-3-large + litellm_params: + model: openai/text-embedding-3-large + api_key: os.environ/LITELLM_OPENAI_API_KEY diff --git a/volumes/otel/collector-config.yaml b/volumes/otel/collector-config.yaml new file mode 100644 index 00000000..29dfa0b1 --- /dev/null +++ b/volumes/otel/collector-config.yaml @@ -0,0 +1,25 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + +exporters: + debug: + verbosity: detailed + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [debug] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [debug]