diff --git a/.flocks/flocks.json.example b/.flocks/flocks.json.example index 3948f852..a8e31b91 100644 --- a/.flocks/flocks.json.example +++ b/.flocks/flocks.json.example @@ -63,6 +63,10 @@ "max_age_days": 3 } }, + "server": { + "cors": ["http://127.0.0.1:5173", "http://localhost:5173"] + }, + "allowReadPaths": [], "updater": { "enabled": true, "repo": "AgentFlocks/flocks", diff --git a/flocks/cli/service_manager.py b/flocks/cli/service_manager.py index fb6c2628..2d826657 100644 --- a/flocks/cli/service_manager.py +++ b/flocks/cli/service_manager.py @@ -704,11 +704,16 @@ def start_backend(config: ServiceConfig, console) -> None: str(config.backend_port), ] + backend_env = os.environ.copy() + backend_env["_FLOCKS_WEBUI_HOST"] = config.frontend_host + backend_env["_FLOCKS_WEBUI_PORT"] = str(config.frontend_port) + console.print("[flocks] 启动后端服务...") process = _spawn_process( command, cwd=root, log_path=paths.backend_log, + env=backend_env, ) write_runtime_record( paths.backend_pid, diff --git a/flocks/config/config.py b/flocks/config/config.py index 2e4fbd6c..98e1ebdd 100644 --- a/flocks/config/config.py +++ b/flocks/config/config.py @@ -533,7 +533,16 @@ class ConfigInfo(BaseModel): "workspace_access (none/ro/rw), workspace_root, docker, tools, prune." ), ) - + allow_read_paths: Optional[List[str]] = Field( + None, + alias="allowReadPaths", + description=( + "Extra absolute paths (directories or prefixes) allowed for HTTP " + "/api/file/content and /api/file/list. Does not replace project root, data, or " + "workspace; the Flocks config directory and ~/.ssh are never allowed." + ), + ) + # Channel configuration (IM platform integrations) channels: Optional[Dict[str, ChannelConfig]] = Field( None, diff --git a/flocks/server/app.py b/flocks/server/app.py index 4bf51d63..70e54b62 100644 --- a/flocks/server/app.py +++ b/flocks/server/app.py @@ -297,15 +297,100 @@ async def lifespan(app: FastAPI): # CORS Configuration -def configure_cors() -> None: - """Configure CORS middleware""" - app.add_middleware( - CORSMiddleware, - allow_origins=["*"], # TODO: Make configurable - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], - ) +# +# Priority order: +# 1. Explicit ``server.cors`` in flocks.json → use those origins (plus +# the localhost fallback regex). +# 2. ``_FLOCKS_WEBUI_HOST`` / ``_FLOCKS_WEBUI_PORT`` env vars set by +# ``start_backend()`` for a concrete IP → auto-whitelist that single +# origin. We deliberately do NOT auto-whitelist when the WebUI binds +# to ``0.0.0.0``: matching ``[^/]+:`` would accept every host on +# that port, effectively disabling CORS. Remote deployments that run +# ``--webui-host 0.0.0.0`` must set ``server.cors`` explicitly in +# ``flocks.json``. +# 3. Fallback → only localhost (any port) via regex. +# +# Config is read lazily on the first request via +# :class:`_DeferredCORSMiddleware` so that importing ``app`` in an async +# context (e.g. pytest fixtures) does not call ``asyncio.run()`` inside a +# running event loop, and so that ``Config.get_global()`` is not invoked at +# import time — which would otherwise cache ``HOME`` before test harnesses +# can monkey-patch it. + +_LOCALHOST_ORIGIN_RE = r"^https?://(127\.0\.0\.1|localhost)(:\d+)?$" + +_LOCALHOST_HOSTS = {"127.0.0.1", "localhost", "::1"} + + +def _is_localhost(host: str) -> bool: + return host in _LOCALHOST_HOSTS + + +def _read_cors_config() -> tuple[list[str], Optional[str]]: + """Return (allow_origins, allow_origin_regex) for CORSMiddleware. + + Reads ``server.cors`` directly from ``flocks.json`` using synchronous + JSON I/O — this avoids ``asyncio.run()`` inside a running event loop + and keeps the hot path off the async ``Config.get()`` pipeline. + """ + import json + + try: + cfg_file = Config.get_config_file() + if cfg_file.exists(): + with cfg_file.open("r", encoding="utf-8") as f: + data = json.load(f) + server_cfg = data.get("server") or {} + cors = server_cfg.get("cors") + if isinstance(cors, list): + origins = [c for c in cors if isinstance(c, str) and c] + if origins: + return origins, _LOCALHOST_ORIGIN_RE + except Exception: + pass + + webui_host = os.environ.get("_FLOCKS_WEBUI_HOST", "") + webui_port = os.environ.get("_FLOCKS_WEBUI_PORT", "") + + if ( + webui_host + and webui_port + and not _is_localhost(webui_host) + and webui_host != "0.0.0.0" + ): + extra_origin = f"http://{webui_host}:{webui_port}" + return [extra_origin], _LOCALHOST_ORIGIN_RE + + return [], _LOCALHOST_ORIGIN_RE + + +class _DeferredCORSMiddleware: + """Lazy wrapper around :class:`CORSMiddleware`. + + Starlette builds the middleware stack on the first request, but the + inner middleware's constructor kwargs are evaluated at + ``add_middleware`` call time. We defer one step further: the wrapped + :class:`CORSMiddleware` is instantiated on the first incoming request, + after the test harness (or the real runtime) has finished setting up + ``HOME`` / config paths. + """ + + def __init__(self, app) -> None: + self.app = app + self._inner = None + + async def __call__(self, scope, receive, send): + if self._inner is None: + allow_origins, allow_origin_regex = _read_cors_config() + self._inner = CORSMiddleware( + self.app, + allow_origins=allow_origins, + allow_origin_regex=allow_origin_regex, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + await self._inner(scope, receive, send) # Instance Context Middleware @@ -453,8 +538,9 @@ async def general_exception_handler(request: Request, exc: Exception): ) -# Configure CORS -configure_cors() +# Configure CORS (config is read lazily on the first request; see +# _DeferredCORSMiddleware for rationale). +app.add_middleware(_DeferredCORSMiddleware) # Import and include routers diff --git a/flocks/server/routes/file.py b/flocks/server/routes/file.py index fbbbaa6e..2493cff6 100644 --- a/flocks/server/routes/file.py +++ b/flocks/server/routes/file.py @@ -8,7 +8,9 @@ from fastapi import APIRouter, HTTPException, Query from pydantic import BaseModel +from flocks.config.config import Config from flocks.utils.file import File, FileNode, FileContent, FileInfo +from flocks.utils.http_file_read_guard import resolve_path_for_http_file_access from flocks.utils.log import Log router = APIRouter() @@ -23,8 +25,13 @@ async def list_files(path: str = Query(..., description="Directory path")): List files and directories in a specified path. """ try: - nodes = await File.list(path) + cfg = await Config.get() + safe_path = await resolve_path_for_http_file_access(path, cfg) + nodes = await File.list(safe_path) return nodes + except PermissionError: + log.warning("http_file.list.denied", {"path": path}) + raise HTTPException(status_code=403, detail="Access denied") except Exception as e: log.error("file.list.error", {"error": str(e), "path": path}) raise HTTPException(status_code=500, detail=str(e)) @@ -38,10 +45,15 @@ async def read_file(path: str = Query(..., description="File path")): Read the content of a specified file. """ try: - content = await File.read(path) + cfg = await Config.get() + safe_path = await resolve_path_for_http_file_access(path, cfg) + content = await File.read(safe_path) return content except FileNotFoundError as e: raise HTTPException(status_code=404, detail=str(e)) + except PermissionError: + log.warning("http_file.read.denied", {"path": path}) + raise HTTPException(status_code=403, detail="Access denied") except Exception as e: log.error("file.read.error", {"error": str(e), "path": path}) raise HTTPException(status_code=500, detail=str(e)) @@ -59,6 +71,14 @@ async def search_files( Search for files or directories by name or pattern in the project directory. """ + # Only enforce basic resource limits here. The underlying ``File.search`` + # invokes ``subprocess.run`` with ``shell=False`` and an argv list, so the + # query cannot reach a shell — there is no injection vector that warrants + # rejecting legitimate filenames containing ``;`` ``|`` ``$`` ``` ``` etc. + # The null byte is still refused because many POSIX APIs treat it as a + # string terminator and will silently truncate the argument. + if not query or len(query) > 200 or "\x00" in query: + raise HTTPException(status_code=400, detail="Invalid search query") try: results = await File.search(query=query, limit=limit, dirs=dirs, type=type) return results @@ -103,14 +123,26 @@ async def find_text(pattern: str = Query(..., description="Search pattern")): Find text Search for text patterns across files in the project using grep. + Only searches within the Flocks project root directory. """ try: import subprocess import os - - cwd = os.getcwd() - - # Use grep for text search + from flocks.utils.paths import find_flocks_project_root + + project_root = find_flocks_project_root() + if project_root is None: + raise HTTPException(status_code=403, detail="No Flocks project root found") + cwd = str(project_root) + + if not pattern or len(pattern) > 500 or "\x00" in pattern: + raise HTTPException(status_code=400, detail="Invalid search pattern") + + # ``grep`` runs as its own argv (no shell); the ``--`` sentinel stops + # ``grep`` from interpreting a leading ``-`` in ``pattern`` as an + # option. We intentionally keep regex semantics (the historical + # contract) and rely on ``subprocess.run(shell=False)`` plus the + # bounded cwd (``project_root``) to contain the command. cmd = [ "grep", "-rn", # recursive, line numbers @@ -132,6 +164,7 @@ async def find_text(pattern: str = Query(..., description="Search pattern")): "--exclude-dir=__pycache__", "--exclude-dir=.venv", "--exclude-dir=venv", + "--", pattern, ".", ] @@ -147,7 +180,7 @@ async def find_text(pattern: str = Query(..., description="Search pattern")): matches = [] if result.returncode == 0: - for line in result.stdout.split("\n")[:100]: # Limit to 100 matches + for line in result.stdout.split("\n")[:100]: if not line.strip(): continue @@ -164,6 +197,8 @@ async def find_text(pattern: str = Query(..., description="Search pattern")): }) return matches + except HTTPException: + raise except Exception as e: log.error("text.search.error", {"error": str(e), "pattern": pattern}) raise HTTPException(status_code=500, detail=str(e)) diff --git a/flocks/utils/__init__.py b/flocks/utils/__init__.py index 06ad4b9a..b7379192 100644 --- a/flocks/utils/__init__.py +++ b/flocks/utils/__init__.py @@ -3,6 +3,13 @@ from flocks.utils.log import Log from flocks.utils.id import Identifier from flocks.utils.json_repair import parse_json_robust, repair_truncated_json -from flocks.utils.paths import find_project_root +from flocks.utils.paths import find_flocks_project_root, find_project_root -__all__ = ["Log", "Identifier", "parse_json_robust", "repair_truncated_json", "find_project_root"] +__all__ = [ + "Log", + "Identifier", + "parse_json_robust", + "repair_truncated_json", + "find_project_root", + "find_flocks_project_root", +] diff --git a/flocks/utils/http_file_read_guard.py b/flocks/utils/http_file_read_guard.py new file mode 100644 index 00000000..82a9e38d --- /dev/null +++ b/flocks/utils/http_file_read_guard.py @@ -0,0 +1,225 @@ +""" +Path validation for HTTP ``/api/file/*`` endpoints. + +Blocks arbitrary file read without changing :meth:`flocks.utils.file.File.read` +used by internal callers (agent tools, memory, etc.). + +- Used only from ``flocks.server.routes.file``. +- Reads ``allowReadPaths`` via :meth:`flocks.config.config.Config.get` (Pydantic alias). +- When no ``.flocks`` project root exists, does not fall back to cwd as a sandbox root. +""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import List, Optional, Set + +from flocks.config.config import Config, ConfigInfo +from flocks.utils.paths import find_flocks_project_root + + +def _safe_system_files_resolved() -> Set[str]: + """Small built-in allowlist of safe system files (resolved absolute paths).""" + out: Set[str] = set() + for p in ("/etc/hosts", "/etc/hostname", "/etc/resolv.conf"): + try: + rp = Path(p).resolve() + if rp.is_file(): + out.add(str(rp)) + except OSError: + continue + return out + + +_SAFE_SYSTEM_FILES: Set[str] = _safe_system_files_resolved() + + +def _normalize_allow_read_entries(entries: Optional[List[str]]) -> List[str]: + """Validate extra readable paths from config. + + Requirements: absolute, not FS root, not under config dir or ``~/.ssh``. + """ + if not entries: + return [] + + cfg_dir = Config.get_config_path().resolve() + try: + ssh_dir = (Path.home() / ".ssh").resolve() + except OSError: + ssh_dir = None + + out: List[str] = [] + seen: Set[str] = set() + + for raw in entries: + if not raw or not isinstance(raw, str): + continue + expanded = os.path.normpath(os.path.expanduser(raw.strip())) + if not expanded: + continue + if not os.path.isabs(expanded): + continue + try: + p = Path(expanded).resolve() + except OSError: + continue + + if p.parent == p: + continue + + ps = str(p) + + if ps == str(cfg_dir) or p.is_relative_to(cfg_dir): + continue + if ssh_dir is not None and (ps == str(ssh_dir) or p.is_relative_to(ssh_dir)): + continue + + if ps not in seen: + seen.add(ps) + out.append(ps) + + return out + + +def _blocked_for_http_read(resolved_str: str) -> bool: + """Always deny these sensitive locations, even if another allow-root would match. + + ``resolved_str`` must already be a :meth:`Path.resolve`-d absolute path. + """ + r = Path(resolved_str) + + cfg_dir = Config.get_config_path().resolve() + if r == cfg_dir or r.is_relative_to(cfg_dir): + return True + + try: + ssh = (Path.home() / ".ssh").resolve() + if r == ssh or r.is_relative_to(ssh): + return True + except OSError: + pass + + if os.name == "posix": + for prefix in ("/proc", "/sys", "/dev"): + if resolved_str == prefix or resolved_str.startswith(prefix + os.sep): + return True + + return False + + +def _is_filesystem_root(p: Path) -> bool: + try: + r = p.resolve() + except OSError: + return True + return r.parent == r + + +def _assert_path_contained(file_path: str, root: str) -> str: + """Check that *file_path* resolves to somewhere inside *root*. + + Follows symlinks to prevent escape via symlinked directories/files. + Raises ``ValueError`` when the resolved path is outside *root*. + + Returns the resolved absolute path. + """ + root_resolved = Path(root).resolve() + target_resolved = Path(file_path).resolve() + + if not (target_resolved == root_resolved or target_resolved.is_relative_to(root_resolved)): + raise ValueError( + f"Path {file_path} resolves to {target_resolved} which is outside {root_resolved}" + ) + + return str(target_resolved) + + +def _initial_abs_path(user_path: str, project_root: Optional[Path]) -> str: + """Turn the query path into an absolute path. + + Relative paths are only allowed when a project root exists and are + resolved relative to it. + """ + raw = user_path.strip() + if not raw: + raise PermissionError("Empty path") + if "\x00" in raw: + raise PermissionError("Invalid path") + if len(raw) > 4096: + raise PermissionError("Path too long") + + expanded = str(Path(raw).expanduser()) + if os.path.isabs(expanded): + return os.path.normpath(expanded) + + if project_root is None: + raise PermissionError("Relative paths require a Flocks project root (.flocks/)") + + return os.path.normpath(os.path.join(str(project_root.resolve()), expanded)) + + +async def resolve_path_for_http_file_access( + user_path: str, + config: ConfigInfo, +) -> str: + """Resolve and authorize ``user_path`` for HTTP file access. + + On success, returns an absolute path safe to open. + + Raises: + PermissionError: Path is not allowed for remote HTTP file access. + """ + project_root = find_flocks_project_root() + abs_guess = _initial_abs_path(user_path, project_root) + + extra_roots = _normalize_allow_read_entries(config.allow_read_paths) + + roots: List[str] = [] + if project_root is not None and not _is_filesystem_root(project_root): + roots.append(str(project_root.resolve())) + data_dir = Config.get_data_path().resolve() + if not _is_filesystem_root(data_dir): + roots.append(str(data_dir)) + + from flocks.workspace.manager import WorkspaceManager + + ws_dir = WorkspaceManager.get_instance().get_workspace_dir().resolve() + if not _is_filesystem_root(ws_dir): + roots.append(str(ws_dir)) + + for r in extra_roots: + if r not in roots: + roots.append(r) + + seen: Set[str] = set() + uniq: List[str] = [] + for r in roots: + if r not in seen: + seen.add(r) + uniq.append(r) + + for root in uniq: + try: + resolved = _assert_path_contained(abs_guess, root) + if not _blocked_for_http_read(resolved): + return resolved + except ValueError: + continue + + try: + safe_resolved = str(Path(abs_guess).resolve()) + except OSError as e: + raise PermissionError("Invalid path") from e + + if ( + _SAFE_SYSTEM_FILES + and safe_resolved in _SAFE_SYSTEM_FILES + and not _blocked_for_http_read(safe_resolved) + ): + return safe_resolved + + raise PermissionError("Path is not allowed for remote file access") + + +__all__ = ["resolve_path_for_http_file_access"] diff --git a/flocks/utils/paths.py b/flocks/utils/paths.py index 8867db17..1f56a4c2 100644 --- a/flocks/utils/paths.py +++ b/flocks/utils/paths.py @@ -5,6 +5,20 @@ from pathlib import Path +def find_flocks_project_root() -> Path | None: + """Walk upward from cwd and return the first directory that contains ``.flocks/``. + + Unlike :func:`find_project_root`, this returns ``None`` when none is found—no + fallback to cwd. Use this for security-sensitive checks (e.g. avoid treating + the whole filesystem as a project when cwd is ``/``). + """ + current = Path.cwd().resolve() + for directory in (current, *current.parents): + if (directory / ".flocks").is_dir(): + return directory + return None + + def find_project_root() -> Path: """Walk up from cwd to locate the Flocks project root. @@ -13,17 +27,15 @@ def find_project_root() -> Path: until the filesystem root is reached. Falls back to ``Path.cwd()`` when nothing is found (e.g. first-run before - ``.flocks/`` has been created). + ``.flocks/`` has been created). Prefer :func:`find_flocks_project_root` for + HTTP file access and other security-sensitive path checks. Returns: The nearest ancestor directory (inclusive of cwd) that contains a ``.flocks/`` sub-directory, or ``Path.cwd()`` as a fallback. """ - current = Path.cwd().resolve() - for directory in [current, *current.parents]: - if (directory / ".flocks").is_dir(): - return directory - return current + found = find_flocks_project_root() + return found if found is not None else Path.cwd().resolve() -__all__ = ["find_project_root"] +__all__ = ["find_project_root", "find_flocks_project_root"]