diff --git a/.flocks/plugins/skills/tool-builder/SKILL.md b/.flocks/plugins/skills/tool-builder/SKILL.md index 37059551..06f99ce8 100644 --- a/.flocks/plugins/skills/tool-builder/SKILL.md +++ b/.flocks/plugins/skills/tool-builder/SKILL.md @@ -428,8 +428,8 @@ Run the bundled validator before anything else. It is self-contained `_provider.yaml` and any script handler: ```bash -SKILL_DIR="$(realpath ~/.flocks/plugins/skills/tool-builder)" -uv run python "$SKILL_DIR/validator.py" "$TOOL_PATH" +SKILL_DIR="$(git rev-parse --show-toplevel)/.flocks/plugins/skills/tool-builder" +uv run python "$SKILL_DIR/scripts/validator.py" "$TOOL_PATH" ``` The validator checks (this list is enforced, not aspirational): @@ -487,7 +487,7 @@ them — note the reason when reporting back. For a CI-style check that fails on warnings too: ```bash -uv run python "$SKILL_DIR/validator.py" --strict "$TOOL_PATH" +uv run python "$SKILL_DIR/scripts/validator.py" --strict "$TOOL_PATH" ``` ### Step 1: Load Test @@ -546,69 +546,29 @@ If load fails: **read the error, fix the root cause**, and re-run. ### Step 2: Smoke Test -Execute the tool with **safe, minimal test parameters** to confirm end-to-end functionality: +Confirm the tool works in the real agent flow with **safe, minimal test parameters**: -**YAML-HTTP tools (Mode A):** -```bash -uv run python -c " -import asyncio -from pathlib import Path -from flocks.tool.tool_loader import yaml_to_tool, _read_yaml_raw -from flocks.tool.registry import ToolContext +1. Use `tool_search` to load the tool into the current callable set. +2. Then call the tool directly with the prepared smoke-test parameters. -yaml_path = Path('$TOOL_PATH').expanduser() -raw = _read_yaml_raw(yaml_path) -tool = yaml_to_tool(raw, yaml_path) -ctx = ToolContext(session_id='test', message_id='test') - -# Replace with actual safe test parameters -test_params = {$TEST_PARAMS} - -async def run(): - result = await tool.execute(ctx, **test_params) - print(f'Success: {result.success}') - if result.error: - print(f'Error: {result.error}') - if result.output: - output_str = str(result.output) - print(f'Output: {output_str[:500]}') - return result.success - -ok = asyncio.run(run()) -if not ok: - print('WARN: Smoke test returned success=False (may be expected for auth errors with unconfigured API keys)') -" -``` +Use exact-select form so the session loads the intended tool rather than a fuzzy match: -**Python tools (Mode B):** -```bash -uv run python -c " -import asyncio -from flocks.tool.registry import ToolRegistry, ToolContext -import importlib.util -from pathlib import Path +```text +tool_search(query="select:$TOOL_NAME") +``` -path = Path('$TOOL_PATH').expanduser() -spec = importlib.util.spec_from_file_location(f'_test_{path.stem}', str(path)) -mod = importlib.util.module_from_spec(spec) -spec.loader.exec_module(mod) +Then invoke the tool itself: -ctx = ToolContext(session_id='test', message_id='test') -test_params = {$TEST_PARAMS} +```text +$TOOL_NAME(...) +``` -async def run(): - result = await ToolRegistry.execute('$TOOL_NAME', ctx, **test_params) - print(f'Success: {result.success}') - if result.error: - print(f'Error: {result.error}') - if result.output: - output_str = str(result.output) - print(f'Output: {output_str[:500]}') - return result.success +Smoke-test rules: -asyncio.run(run()) -" -``` +- Do not stop at static validation or registry import success; the tool must be callable through the normal tool interface. +- Prefer the smallest harmless happy-path input that still exercises the real handler. +- If `tool_search` does not return the new tool, treat that as a failure in discovery/loading and fix it before proceeding. +- If the tool call returns an auth error because the API key is not configured yet, that is an acceptable `WARN` for API tools. Record it explicitly. ### Choosing test parameters diff --git a/.flocks/plugins/skills/tool-builder/validator.py b/.flocks/plugins/skills/tool-builder/scripts/validator.py similarity index 92% rename from .flocks/plugins/skills/tool-builder/validator.py rename to .flocks/plugins/skills/tool-builder/scripts/validator.py index c6ee7dc2..196f0d5e 100644 --- a/.flocks/plugins/skills/tool-builder/validator.py +++ b/.flocks/plugins/skills/tool-builder/scripts/validator.py @@ -26,11 +26,11 @@ that is not ``async def`` Usage: - uv run python validator.py + uv run python scripts/validator.py Exit codes: - 0 — no FAIL items (WARN allowed) - 1 — at least one FAIL item OR validator could not run + 0 - no FAIL items (WARN allowed) + 1 - at least one FAIL item OR validator could not run """ from __future__ import annotations @@ -73,7 +73,7 @@ "task", "schedule_task_center", "todo", "plan", "run_workflow", "run_workflow_node", "echo", "get_time", - "skill", "question", + "skill_load", "question", } PARAM_PATTERN = re.compile(r"\{([^}]+)\}") @@ -139,11 +139,11 @@ def render(self) -> str: f"Summary: {self.fail_count} FAIL, {self.warn_count} WARN" ) if self.fail_count == 0 and self.warn_count == 0: - lines[-1] += " — looks good." + lines[-1] += " - looks good." elif self.fail_count == 0: - lines[-1] += " — fix WARN items if you want a clean report." + lines[-1] += " - fix WARN items if you want a clean report." else: - lines[-1] += " — fix FAIL items before declaring the tool ready." + lines[-1] += " - fix FAIL items before declaring the tool ready." return "\n".join(lines) @@ -174,7 +174,7 @@ def _provider_dir(yaml_path: Path) -> Optional[Path]: """If the YAML is under ``api/{provider}/foo.yaml``, return the provider dir.""" parent = yaml_path.parent if parent.name == "api": - # Standalone tool directly under api/ — no provider dir. + # Standalone tool directly under api/ - no provider dir. return None grandparent = parent.parent if grandparent.name == "api": @@ -232,7 +232,7 @@ def _validate_yaml_metadata( if name in RESERVED_TOOL_NAMES: report.fail( section, - f"name '{name}' collides with a built-in tool — pick another", + f"name '{name}' collides with a built-in tool - pick another", ) stem = yaml_path.stem if stem != name: @@ -248,7 +248,7 @@ def _validate_yaml_metadata( elif len(str(description).strip()) < 20: report.warn( section, - f"description is only {len(str(description).strip())} chars — " + f"description is only {len(str(description).strip())} chars - " "the LLM uses this to decide when to invoke the tool", ) else: @@ -259,7 +259,7 @@ def _validate_yaml_metadata( # API tools commonly inherit category from _provider.yaml; only warn. report.warn( section, - "no 'category' set — loader will fall back to 'custom' " + "no 'category' set - loader will fall back to 'custom' " "(or provider defaults if present)", ) elif category not in VALID_CATEGORIES: @@ -275,19 +275,19 @@ def _validate_yaml_metadata( if enabled is None: report.warn( section, - "no 'enabled' field — defaults to true; set explicitly so " + "no 'enabled' field - defaults to true; set explicitly so " "the tool is unambiguously activated", ) elif enabled is not True: report.warn( section, - f"enabled = {enabled!r} — tool will NOT be active immediately. " + f"enabled = {enabled!r} - tool will NOT be active immediately. " "Set 'enabled: true' unless the user asked for it disabled.", ) else: report.ok(section, "enabled = true") - # provider field — required for API service card display + # provider field - required for API service card display if _tool_under_api(yaml_path): provider = data.get("provider") prov_dir = _provider_dir(yaml_path) @@ -296,7 +296,7 @@ def _validate_yaml_metadata( section, "tool is under api/ but neither 'provider' field nor " "a provider subdirectory with _provider.yaml is present " - "— it will not appear as an API service card", + "- it will not appear as an API service card", ) elif provider: report.ok(section, f"provider = {provider}") @@ -313,7 +313,7 @@ def _validate_yaml_parameters(data: Dict[str, Any], report: Report) -> Set[str]: if input_schema is None and params_list is None: report.warn( section, - "no inputSchema or parameters declared — " + "no inputSchema or parameters declared - " "the LLM will not be able to pass arguments", ) return declared @@ -322,7 +322,7 @@ def _validate_yaml_parameters(data: Dict[str, Any], report: Report) -> Set[str]: report.warn( section, "both 'inputSchema' and 'parameters' are present; " - "'inputSchema' wins — drop 'parameters' to avoid confusion", + "'inputSchema' wins - drop 'parameters' to avoid confusion", ) if isinstance(input_schema, dict): @@ -411,14 +411,14 @@ def _validate_param_entry( if not description or not str(description).strip(): report.warn( section, - f"parameter '{pname}' missing 'description' — " + f"parameter '{pname}' missing 'description' - " "the LLM cannot reliably fill it in", ) if is_required and "default" in pinfo: report.warn( section, - f"parameter '{pname}' is required but also has a default — " + f"parameter '{pname}' is required but also has a default - " "default is ignored when required=true", ) @@ -437,13 +437,13 @@ def _validate_yaml_handler( if isinstance(execution, dict): report.fail( section, - "uses inline 'execution' block — disabled for safety. " + "uses inline 'execution' block - disabled for safety. " "Use handler.type=script with a separate handler file.", ) else: report.fail( section, - "missing 'handler' section — loader will refuse to register " + "missing 'handler' section - loader will refuse to register " "the tool", ) return @@ -472,13 +472,13 @@ def _validate_http_handler( method = handler.get("method") if not method: - report.warn(section, "no 'method' set — loader defaults to GET") + report.warn(section, "no 'method' set - loader defaults to GET") elif str(method).upper() not in {"GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"}: report.warn(section, f"unusual HTTP method: {method!r}") url = handler.get("url") if not url: - report.fail(section, "handler.url is empty — request would target ''") + report.fail(section, "handler.url is empty - request would target ''") else: report.ok(section, f"handler.url = {url}") prov_dir = _provider_dir(yaml_path) @@ -487,7 +487,7 @@ def _validate_http_handler( report.fail( section, "url uses {base_url} but the tool is not under " - "api/{provider}/ — there is no _provider.yaml to inject it", + "api/{provider}/ - there is no _provider.yaml to inject it", ) # Collect placeholders across url/headers/query_params/body @@ -517,7 +517,7 @@ def _scan(value: Any) -> None: report.fail( section, f"placeholder '{{{name}}}' is referenced in url/headers/" - f"query_params/body but not declared as a parameter — " + f"query_params/body but not declared as a parameter - " "loader will substitute an empty string", ) @@ -560,7 +560,7 @@ def _scan_secret(value: Any) -> None: for s in sorted(secret_refs): report.warn( "Secrets", - f"references {{secret:{s}}} — confirm it exists in " + f"references {{secret:{s}}} - confirm it exists in " "~/.flocks/config/.secret.json", ) @@ -641,7 +641,7 @@ def _validate_script_handler( "Add it to the signature or accept **kwargs.", ) - # Detect imports of ToolResult — warn if missing. + # Detect imports of ToolResult - warn if missing. has_toolresult_import = any( isinstance(node, ast.ImportFrom) and node.module == "flocks.tool.registry" @@ -669,7 +669,7 @@ def _validate_provider_yaml( if not provider_file.is_file(): report.fail( section, - f"_provider.yaml is missing at {provider_file} — required for " + f"_provider.yaml is missing at {provider_file} - required for " "the API service card to render", ) return @@ -691,7 +691,7 @@ def _validate_provider_yaml( if not prov_data.get("description_cn"): report.warn( section, - "_provider.yaml missing 'description_cn' — Chinese UI will fall " + "_provider.yaml missing 'description_cn' - Chinese UI will fall " "back to English", ) @@ -702,21 +702,21 @@ def _validate_provider_yaml( if not defaults.get("base_url"): report.fail( section, - "_provider.yaml.defaults.base_url is missing — handler urls " + "_provider.yaml.defaults.base_url is missing - handler urls " "using {base_url} will resolve to '/path'", ) if "category" not in defaults and not data.get("category"): report.warn( section, "_provider.yaml.defaults.category is missing and the tool also " - "has no category — loader falls back to 'custom'", + "has no category - loader falls back to 'custom'", ) auth = prov_data.get("auth") if auth is None: report.warn( section, - "_provider.yaml has no 'auth' block — that is fine for " + "_provider.yaml has no 'auth' block - that is fine for " "open APIs, but most providers need a credential", ) elif isinstance(auth, dict): @@ -732,7 +732,7 @@ def _validate_provider_yaml( if inject_as == "query_param" and not auth.get("param_name"): report.warn( section, - "_provider.yaml.auth.param_name missing — defaults to 'api_key'", + "_provider.yaml.auth.param_name missing - defaults to 'api_key'", ) @@ -777,7 +777,7 @@ def validate_python_tool(py_path: Path) -> Report: if not decorated: report.fail( "Decorator", - "no @ToolRegistry.register_function decorator found — " + "no @ToolRegistry.register_function decorator found - " "the tool will not be registered on import", ) return report @@ -841,7 +841,7 @@ def _validate_python_decorated_function( if name in RESERVED_TOOL_NAMES: report.fail( section, - f"name '{name}' collides with a built-in tool — pick another", + f"name '{name}' collides with a built-in tool - pick another", ) report.ok(section, f"name = {name}") @@ -850,7 +850,7 @@ def _validate_python_decorated_function( elif len(description.strip()) < 20: report.warn( section, - f"description is only {len(description.strip())} chars — " + f"description is only {len(description.strip())} chars - " "the LLM uses this to decide when to invoke the tool", ) else: @@ -859,10 +859,10 @@ def _validate_python_decorated_function( if category_node is None: report.warn( section, - "no 'category=' set — defaults to ToolCategory.CUSTOM", + "no 'category=' set - defaults to ToolCategory.CUSTOM", ) elif isinstance(category_node, ast.Attribute): - # ToolCategory.SOMETHING — accept; full validation requires runtime. + # ToolCategory.SOMETHING - accept; full validation requires runtime. report.ok(section, f"category = ToolCategory.{category_node.attr}") elif isinstance(category_node, ast.Constant) and isinstance(category_node.value, str): if category_node.value not in VALID_CATEGORIES: @@ -878,7 +878,7 @@ def _validate_python_decorated_function( if parameters_node is None: report.warn( section, - "no 'parameters=' provided — the tool exposes zero arguments", + "no 'parameters=' provided - the tool exposes zero arguments", ) elif isinstance(parameters_node, ast.List): if not parameters_node.elts: @@ -900,14 +900,14 @@ def _validate_python_decorated_function( report.warn( section, f"parameter '{pname}' missing 'type=' " - "(defaults will not work — type is required)", + "(defaults will not work - type is required)", ) pdesc = _const_str(_kwarg_value(elt, "description")) if not pdesc or not pdesc.strip(): report.warn( section, - f"parameter '{pname}' missing 'description=' — " + f"parameter '{pname}' missing 'description=' - " "the LLM cannot reliably fill it in", ) if declared_params: @@ -969,7 +969,7 @@ def _validate_python_decorated_function( if not returns_toolresult: report.warn( section, - "no 'return ToolResult(...)' detected — loader will wrap the " + "no 'return ToolResult(...)' detected - loader will wrap the " "return value, but explicit ToolResult is recommended", ) else: diff --git a/flocks/agent/agents/hephaestus/agent.yaml b/flocks/agent/agents/hephaestus/agent.yaml index be418d7c..5d081f3f 100644 --- a/flocks/agent/agents/hephaestus/agent.yaml +++ b/flocks/agent/agents/hephaestus/agent.yaml @@ -17,7 +17,7 @@ tools: - apply_patch - websearch - webfetch - - skill + - skill_load - delegate_task - task - todoread diff --git a/flocks/agent/agents/rex/prompt_builder.py b/flocks/agent/agents/rex/prompt_builder.py index 653d26d9..89e51578 100644 --- a/flocks/agent/agents/rex/prompt_builder.py +++ b/flocks/agent/agents/rex/prompt_builder.py @@ -193,7 +193,7 @@ def build_dynamic_rex_prompt( - Prefer existing libraries over new dependencies. - Prefer small, focused changes over large refactors. - When uncertain about scope, ask. -- If a user query matches a skill and the relevant tools, load the skill first and follow its guidance. +- If a user query matches a skill and the relevant tools, call `skill_load` first and follow its guidance. __COMMAND_GUIDANCE__ @@ -221,7 +221,7 @@ def _build_rex_skills_section(available_skills: List["AvailableSkill"]) -> str: lines = [ "### Available Skills", "", - "Load a skill when the task clearly matches its domain expertise.", + "Call `skill_load` when the task clearly matches a skill's domain expertise.", "", ] for skill in available_skills: diff --git a/flocks/agent/agents/rex_junior/agent.yaml b/flocks/agent/agents/rex_junior/agent.yaml index 04996590..c33afbe7 100644 --- a/flocks/agent/agents/rex_junior/agent.yaml +++ b/flocks/agent/agents/rex_junior/agent.yaml @@ -13,7 +13,7 @@ tools: - edit - write - bash - - skill + - skill_load - delegate_task - task - todoread diff --git a/flocks/agent/agents/self_enhance/agent.yaml b/flocks/agent/agents/self_enhance/agent.yaml index 28bde8b3..3f04cb1b 100644 --- a/flocks/agent/agents/self_enhance/agent.yaml +++ b/flocks/agent/agents/self_enhance/agent.yaml @@ -21,7 +21,7 @@ tools: - bash - websearch - webfetch - - skill + - skill_load - background_output - background_cancel prompt_metadata: diff --git a/flocks/agent/agents/self_enhance/prompt.md b/flocks/agent/agents/self_enhance/prompt.md index 271b2253..e7d4ae9a 100644 --- a/flocks/agent/agents/self_enhance/prompt.md +++ b/flocks/agent/agents/self_enhance/prompt.md @@ -14,7 +14,7 @@ You receive a description of a capability gap. Your job is to close that gap by: 3. Verifying it works 4. Reporting the result back clearly -You have strong capability-acquisition access through `bash`, `read`, `write`, `edit`, `apply_patch`, `websearch`, `webfetch`, and `skill`. Use them freely but safely. +You have strong capability-acquisition access through `bash`, `read`, `write`, `edit`, `apply_patch`, `websearch`, `webfetch`, and `skill_load`. Use them freely but safely. --- @@ -64,7 +64,7 @@ If a PyPI package is needed, install it via `bash` using the project virtualenv Once the solution is proven, use the `tool-builder` skill to create a permanent Flocks plugin: ``` -skill(name="tool-builder") +skill_load(name="tool-builder") ``` Follow the skill's instructions to create either: diff --git a/flocks/server/routes/misc.py b/flocks/server/routes/misc.py index eaa43bcf..3a8faf21 100644 --- a/flocks/server/routes/misc.py +++ b/flocks/server/routes/misc.py @@ -11,7 +11,7 @@ from flocks.utils.log import Log from flocks.provider.provider import Provider, ProviderConfig -from flocks.tool.system.skill import get_all_skills, get_skill +from flocks.tool.system.skill_load import get_all_skills, get_skill from flocks.command.command import Command diff --git a/flocks/session/lifecycle/compaction/models.py b/flocks/session/lifecycle/compaction/models.py index 359fd285..b251aed1 100644 --- a/flocks/session/lifecycle/compaction/models.py +++ b/flocks/session/lifecycle/compaction/models.py @@ -10,7 +10,7 @@ PRUNE_MINIMUM = 20_000 PRUNE_PROTECT = 40_000 -PRUNE_PROTECTED_TOOLS = ["skill"] +PRUNE_PROTECTED_TOOLS = ["skill_load"] PRESERVE_LAST_STEPS = 10 DEFAULT_COMPACTION_PROMPT = """\ diff --git a/flocks/session/runner.py b/flocks/session/runner.py index 4888d21a..c07ca4ba 100644 --- a/flocks/session/runner.py +++ b/flocks/session/runner.py @@ -1781,24 +1781,6 @@ async def _build_callable_tool_schema( tools = [] for tool_info in selected_tool_infos: description = tool_info.description - if tool_info.name == "skill": - # Import here to avoid circular dependency. - from flocks.tool.system.skill import build_description - from flocks.skill.skill import Skill - - # `list_enabled()` honors the user's per-skill disable toggle - # in ``~/.flocks/config/skill_settings.json``. We MUST rebuild - # the description at schema-build time (not just in the wrapper - # invoked when the LLM eventually calls the `skill` tool), - # because this description ships as part of the tool index in - # every system prompt — without this refresh, a disabled skill - # would still appear in the LLM's view of available skills. - skills = await Skill.list_enabled() - description = build_description(skills) - log.info("runner.build_tools.skill_description", { - "skill_count": len(skills), - "description_preview": description[:100], - }) # Surface provider/service version to the model so it can pick # version-appropriate parameters (e.g. SIP v9.2 vs older spec). diff --git a/flocks/tool/catalog.py b/flocks/tool/catalog.py index e974fdfb..5b8497b7 100644 --- a/flocks/tool/catalog.py +++ b/flocks/tool/catalog.py @@ -50,7 +50,7 @@ class ToolCatalogMetadata(BaseModel): "run_workflow_node": ["workflow", "execution"], "question": ["user-interaction", "clarification"], "flocks_skills": ["skill", "management"], - "skill": ["knowledge", "skill"], + "skill_load": ["knowledge", "skill"], "tool_search": ["tool-discovery", "capability-search"], "session_list": ["session", "history"], "session_get": ["session", "history"], diff --git a/flocks/tool/file/read.py b/flocks/tool/file/read.py index 0e44ad96..1e71d251 100644 --- a/flocks/tool/file/read.py +++ b/flocks/tool/file/read.py @@ -27,7 +27,7 @@ # Constants — keep file reads paginated while allowing larger local context. DEFAULT_READ_LIMIT = 2000 MAX_LINE_LENGTH = 2000 -MAX_BYTES = 20 * 1024 # 20 KB +MAX_BYTES = 50 * 1024 # 50 KB # Binary file extensions BINARY_EXTENSIONS = { @@ -45,6 +45,9 @@ DESCRIPTION = """Reads a file from the local filesystem. You can access any file directly by using this tool. Assume this tool is able to read all files on the machine. If the User provides a path to a file assume that path is valid. It is okay to read a file that does not exist; an error will be returned. +Do not use this tool when a dedicated tool is a better fit: +- Load SKILL.md for one specific skill -> `skill_load` + Usage: - filePath may be absolute, use `~`, or be relative to the current project directory - By default, it reads up to 2000 lines starting from the beginning of the file diff --git a/flocks/tool/registry.py b/flocks/tool/registry.py index 9cee6d63..17b1afe1 100644 --- a/flocks/tool/registry.py +++ b/flocks/tool/registry.py @@ -944,6 +944,16 @@ def _load_plugin_tools(cls) -> None: log.warn("tool_registry.plugin_load_failed", {"error": str(e)}) after = set(cls._tools.keys()) new_plugin_tools = sorted(after - before) + python_tool_sources: Dict[str, Path] = {} + try: + from flocks.tool.tool_loader import discover_python_tool_sources + + python_tool_sources = discover_python_tool_sources() + except Exception as e: + log.debug("tool_registry.python_source_discovery_failed", {"error": str(e)}) + + user_plugin_root = (Path.home() / ".flocks" / "plugins").resolve() + python_plugin_names: set[str] = set() for name in new_plugin_tools: tool = cls._tools.get(name) if tool is None: @@ -953,7 +963,27 @@ def _load_plugin_tools(cls) -> None: # consumer that would normally stamp source="plugin_py". if tool.info.source is None: tool.info.source = "plugin_py" - cls._plugin_tool_names = new_plugin_tools + # Some python plugin tools may be registered as a side effect before + # this method captures ``before`` (for example during import chains + # kicked off by built-in tool initialization). Do not rely solely on + # the ``after - before`` delta; reconcile against the actual plugin + # files on disk so refresh/restart keeps their source metadata stable. + for name, origin in python_tool_sources.items(): + tool = cls._tools.get(name) + if tool is None: + continue + existing_source = tool.info.source + if existing_source not in (None, "plugin_py"): + continue + tool.info.source = "plugin_py" + python_plugin_names.add(name) + origin_path = origin.resolve() + try: + origin_path.relative_to(user_plugin_root) + tool.info.native = False + except ValueError: + tool.info.native = True + cls._plugin_tool_names = sorted(set(new_plugin_tools) | python_plugin_names) cls._bootstrap_user_api_services() # Defence-in-depth: ``register()`` is the canonical writer for # ``_enabled_defaults`` but this catches any tool that landed in @@ -1297,8 +1327,8 @@ def _register_builtin_tools(cls) -> None: ("flocks.tool.task", ["task", "schedule_task_center", "todo", "plan", "run_workflow", "run_workflow_node"]), # security/ — SSH forensics + threat intelligence (optional: asyncssh) ("flocks.tool.security", ["ssh_host_cmd", "ssh_run_script"]), - # system/ — background tasks, questions, model config, memory, skill, MCP management, session management, slash commands - ("flocks.tool.system", ["background_output", "background_cancel", "question", "model_config", "memory", "skill", "flocks_mcp", "session_manage", "slash_command", "tool_search"]), + # system/ — background tasks, questions, model config, memory, skill_load, MCP management, session management, slash commands + ("flocks.tool.system", ["background_output", "background_cancel", "question", "model_config", "memory", "skill_load", "flocks_mcp", "session_manage", "slash_command", "tool_search"]), # skill/ — skill management (search, install, status, deps, remove) ("flocks.tool.skill", ["flocks_skills"]), # device/ — security device asset context diff --git a/flocks/tool/system/skill.py b/flocks/tool/system/skill_load.py similarity index 86% rename from flocks/tool/system/skill.py rename to flocks/tool/system/skill_load.py index dc241963..0f70e627 100644 --- a/flocks/tool/system/skill.py +++ b/flocks/tool/system/skill_load.py @@ -1,7 +1,7 @@ """ Skill Tool - Load and execute skills. -The `skill` tool is the load-on-demand half of the skill system: keep the +The `skill_load` tool is the load-on-demand half of the skill system: keep the tool schema short, and load the full SKILL.md only after the model has already decided which skill applies. """ @@ -16,7 +16,7 @@ from flocks.utils.log import Log -log = Log.create(service="tool.skill") +log = Log.create(service="tool.skill_load") MAX_SKILL_DESCRIPTION_PREVIEW_CHARS = 500 @@ -28,7 +28,7 @@ "prompt's available-skills guidance or another discovery step. " "If a skills listing tool is available, use that first when unsure which " "skill applies. Once you know the name, you must call " - "skill(name=\"\") before acting on the skill." + "skill_load(name=\"\") before acting on the skill." ) @@ -39,13 +39,13 @@ def _truncate_skill_description(description: str, name: str) -> str: Uses head + tail truncation so both the opening (scope/triggers) and the closing (hard constraints, "must load first") survive. Inserts a marker - that tells the model how to fetch the full content via the `skill` tool. + that tells the model how to fetch the full content via the `skill_load` tool. """ max_chars = MAX_SKILL_DESCRIPTION_PREVIEW_CHARS if len(description) <= max_chars: return description - marker = f' … [truncated; load full SKILL.md via skill(name="{name}") before acting] … ' + marker = f' … [truncated; load full SKILL.md via skill_load(name="{name}") before acting] … ' available = max_chars - len(marker) if available < 80: # Marker alone is unusually long (very long skill name); fall back to @@ -58,7 +58,7 @@ def _truncate_skill_description(description: str, name: str) -> str: def build_description(skills: List[SkillInfo]) -> str: - """Return the stable, token-light `skill` tool description.""" + """Return the stable, token-light `skill_load` tool description.""" _ = skills return SKILL_TOOL_DESCRIPTION @@ -138,7 +138,7 @@ async def skill_tool_impl( # ``truncated=True`` here is intentional: it tells ToolRegistry's # auto-truncate path (registry.py: "Auto-truncate output unless the tool - # already handled it") to leave our payload alone. The `skill` tool is the + # already handled it") to leave our payload alone. The `skill_load` tool is the # *load-on-demand* counterpart of the tiny preview that ships in the system # prompt -- if the model just decided to load this skill, it needs the # FULL SKILL.md to act on. Cropping it at 100 KB / 1000 lines (the @@ -215,7 +215,7 @@ async def get_skill(name: str) -> dict | None: @ToolRegistry.register_function( - name="skill", + name="skill_load", description=SKILL_TOOL_DESCRIPTION, category=ToolCategory.SYSTEM, native=True, @@ -232,18 +232,5 @@ async def skill_tool( ctx: ToolContext, name: str, ) -> ToolResult: - """Wrapper that refreshes the `skill` tool description on every call. - - Why we keep refreshing instead of relying on a one-shot registration: - toggling a skill off in the UI must immediately remove it from the LLM's - view, but the `skill` tool description is part of the tool index baked - into the system prompt. Re-building from `Skill.list_enabled()` here - mirrors the same call in `session/runner.py:build_tools` so a disabled - skill never re-appears on the next turn. - """ - tool = ToolRegistry.get("skill") - if tool: - skills = await Skill.list_enabled() - tool.info.description = build_description(skills) - + """Wrapper that keeps `skill_load` as a pure load-on-demand tool.""" return await skill_tool_impl(ctx, name) diff --git a/flocks/tool/task/run_workflow.py b/flocks/tool/task/run_workflow.py index 669f8b4f..2f266d1c 100644 --- a/flocks/tool/task/run_workflow.py +++ b/flocks/tool/task/run_workflow.py @@ -359,7 +359,7 @@ async def run_workflow_tool( Returns: ToolResult with workflow execution results """ - # Update tool description with available workflows on each call (like the skill tool) + # Update tool description with available workflows on each call (like the skill_load tool) tool = ToolRegistry.get("run_workflow") if tool: tool.info.description = await _build_description() diff --git a/flocks/tool/tool_loader.py b/flocks/tool/tool_loader.py index 48027ce8..64e4071f 100644 --- a/flocks/tool/tool_loader.py +++ b/flocks/tool/tool_loader.py @@ -914,6 +914,29 @@ def _iter_python_tool_files() -> List[Path]: return files +def discover_python_tool_sources() -> Dict[str, Path]: + """Map registered python tool names to the plugin file that defines them.""" + tool_sources: Dict[str, Path] = {} + for path in _iter_python_tool_files(): + try: + source = path.read_text(encoding="utf-8") + module = ast.parse(source) + except Exception as e: + log.warn("tool.python.parse_failed", {"path": str(path), "error": str(e)}) + continue + + for node in ast.walk(module): + if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + continue + for decorator in node.decorator_list: + if not isinstance(decorator, ast.Call): + continue + registered_name = _register_function_name(decorator) + if registered_name and registered_name not in tool_sources: + tool_sources[registered_name] = path + return tool_sources + + def _register_function_name(call: ast.Call) -> Optional[str]: """Extract tool name from a ToolRegistry.register_function decorator call.""" func = call.func diff --git a/tests/agent/test_prompt_utils.py b/tests/agent/test_prompt_utils.py index 3e2fae84..e90c8c68 100644 --- a/tests/agent/test_prompt_utils.py +++ b/tests/agent/test_prompt_utils.py @@ -75,10 +75,10 @@ def test_unknown_tool_defaults_to_system(self): assert result[0].category == "system" def test_returns_avail_tool_objects(self): - mapping = {"skill": _make_tool_entry("skill", "system")} + mapping = {"skill_load": _make_tool_entry("skill_load", "system")} with patch(self._REGISTRY_GET, side_effect=_registry_side_effect(mapping)), \ patch(self._REGISTRY_INIT): - result = categorize_tools(["skill"]) + result = categorize_tools(["skill_load"]) assert all(isinstance(t, AvailableTool) for t in result) def test_preserves_tool_names(self): @@ -264,9 +264,9 @@ def test_empty_agents_still_returns_header(self): output = build_agent_selection_table([]) assert "Available Agents" in output - def test_default_flow_hint_present(self): + def test_default_flow_hint_absent(self): output = build_agent_selection_table([]) - assert "Default flow" in output + assert "Default flow" not in output def test_triggers_rendered_when_available(self): agent = self._make_agent("explore") diff --git a/tests/integration/test_capability_awareness.py b/tests/integration/test_capability_awareness.py index 975e7935..a043fb50 100644 --- a/tests/integration/test_capability_awareness.py +++ b/tests/integration/test_capability_awareness.py @@ -136,7 +136,7 @@ def test_mock_tool_set_all_categories_visible(self): "bash": _make_tool_entry("bash", "terminal"), "grep": _make_tool_entry("grep", "search"), "webfetch": _make_tool_entry("webfetch", "browser"), - "skill": _make_tool_entry("skill", "system"), + "skill_load": _make_tool_entry("skill_load", "system"), } with patch("flocks.tool.registry.ToolRegistry.get", side_effect=lambda n: tool_map.get(n)), \ patch("flocks.tool.registry.ToolRegistry.init"): diff --git a/tests/session/test_runner_step.py b/tests/session/test_runner_step.py index e7a72bf5..0ae8830e 100644 --- a/tests/session/test_runner_step.py +++ b/tests/session/test_runner_step.py @@ -538,7 +538,7 @@ async def test_build_tools_refreshes_skill_description_from_enabled_skills(self) runner = _make_runner() agent = _make_agent(name="rex") skill_tool = ToolInfo( - name="skill", + name="skill_load", description="Original skill description", category=ToolCategory.SYSTEM, native=True, @@ -558,8 +558,8 @@ async def test_build_tools_refreshes_skill_description_from_enabled_skills(self) ): tools = await runner._build_callable_tool_schema(agent, []) - assert tools[0]["function"]["name"] == "skill" - assert tools[0]["function"]["description"] == "Refreshed skill description" + assert tools[0]["function"]["name"] == "skill_load" + assert tools[0]["function"]["description"] == "Original skill description" class TestBuildSystemPrompts: diff --git a/tests/skills/test_tool_validator.py b/tests/skills/test_tool_validator.py index ed1b4f98..88a224cb 100644 --- a/tests/skills/test_tool_validator.py +++ b/tests/skills/test_tool_validator.py @@ -1,4 +1,4 @@ -"""Tests for .flocks/plugins/skills/tool-builder/validator.py.""" +"""Tests for .flocks/plugins/skills/tool-builder/scripts/validator.py.""" import sys import textwrap from pathlib import Path @@ -7,7 +7,7 @@ # Make the validator importable without installing it. SKILL_DIR = Path(__file__).parent.parent.parent / ".flocks" / "plugins" / "skills" / "tool-builder" -sys.path.insert(0, str(SKILL_DIR)) +sys.path.insert(0, str(SKILL_DIR / "scripts")) from validator import main, validate_yaml_tool, validate_python_tool # noqa: E402 diff --git a/tests/tool/test_agent_toolset.py b/tests/tool/test_agent_toolset.py index 6863e38c..e6df0309 100644 --- a/tests/tool/test_agent_toolset.py +++ b/tests/tool/test_agent_toolset.py @@ -108,7 +108,7 @@ def test_builtin_agent_yaml_tool_names_match_current_registry_surface() -> None: "run_workflow", "run_workflow_node", "session_list", - "skill", + "skill_load", "task", "todoread", "todowrite", diff --git a/tests/tool/test_builtin_management_tools.py b/tests/tool/test_builtin_management_tools.py index ac7b70ad..eb0a2406 100644 --- a/tests/tool/test_builtin_management_tools.py +++ b/tests/tool/test_builtin_management_tools.py @@ -11,10 +11,10 @@ def test_flocks_mcp_is_registered_as_builtin_tool() -> None: assert tool.info.source in {None, "builtin"} -def test_skill_remains_registered_as_builtin_tool() -> None: +def test_skill_load_remains_registered_as_builtin_tool() -> None: ToolRegistry.init() - tool = ToolRegistry.get("skill") + tool = ToolRegistry.get("skill_load") assert tool is not None assert tool.info.native is True diff --git a/tests/tool/test_read_tool_limits.py b/tests/tool/test_read_tool_limits.py index 9805d496..dfbd4a02 100644 --- a/tests/tool/test_read_tool_limits.py +++ b/tests/tool/test_read_tool_limits.py @@ -22,7 +22,7 @@ def tool_context() -> ToolContext: def test_read_tool_limit_constants(): assert read_tool_module.DEFAULT_READ_LIMIT == 2000 assert read_tool_module.MAX_LINE_LENGTH == 2000 - assert read_tool_module.MAX_BYTES == 20 * 1024 + assert read_tool_module.MAX_BYTES == 50 * 1024 @pytest.mark.asyncio @@ -57,7 +57,7 @@ async def test_long_lines_are_truncated_at_2000_characters(tool_context, tmp_pat async def test_byte_limit_truncation_prompts_offset_continue(tool_context, tmp_path): file_path = tmp_path / "wide-lines.txt" file_path.write_text( - "\n".join("x" * 100 for _ in range(300)), + "\n".join("x" * 100 for _ in range(600)), encoding="utf-8", ) diff --git a/tests/tool/test_skill_tool_description.py b/tests/tool/test_skill_tool_description.py index 41317adb..eab68a92 100644 --- a/tests/tool/test_skill_tool_description.py +++ b/tests/tool/test_skill_tool_description.py @@ -1,11 +1,11 @@ -"""Tests for flocks.tool.system.skill. +"""Tests for flocks.tool.system.skill_load. -Two complementary aspects of the skill tool's load-on-demand design are +Two complementary aspects of the skill_load tool's load-on-demand design are exercised here: 1. ``build_description`` — the tool schema must stay short and stable. It should tell the model to discover the right skill first, then call - ``skill(name=...)`` to load the full SKILL.md before acting. + ``skill_load(name=...)`` to load the full SKILL.md before acting. 2. ``skill_tool`` — when the model actually calls the tool, the FULL SKILL.md must come back unredacted. This mirrors hermes-agent's ``skill_view``. @@ -25,7 +25,7 @@ from flocks.skill.skill import Skill, SkillInfo from flocks.tool.registry import ToolContext, ToolRegistry -from flocks.tool.system.skill import ( +from flocks.tool.system.skill_load import ( MAX_SKILL_DESCRIPTION_PREVIEW_CHARS, _truncate_skill_description, build_description, @@ -82,7 +82,7 @@ def test_truncation_inserts_skill_load_hint(self): out = _truncate_skill_description(desc, "onesec-use") # The truncation marker must point the model at the right tool call, # otherwise progressive disclosure breaks. - assert 'skill(name="onesec-use")' in out + assert 'skill_load(name="onesec-use")' in out assert "truncated" in out def test_chinese_threat_intel_skill_keeps_trailing_constraint(self): @@ -125,7 +125,7 @@ def test_description_does_not_inline_skill_inventory(self): def test_includes_progressive_disclosure_instruction(self): out = build_description([_skill("alpha", "demo")]) # Must still direct the model at the load-on-demand call pattern. - assert "skill(name=" in out + assert "skill_load(name=" in out assert "load the full skill.md" in out.lower() assert "MUST" in out or "must" in out.lower() @@ -236,7 +236,7 @@ async def test_skill_tool_sets_truncated_flag_to_bypass_registry(self, fake_skil @pytest.mark.asyncio async def test_registry_execute_does_not_truncate_skill_output(self, fake_skill_dir): - """End-to-end: when the `skill` tool is executed via ToolRegistry + """End-to-end: when the `skill_load` tool is executed via ToolRegistry (which is what the real session loop does), the auto-truncate path must NOT fire and the tail must survive.""" skill_info = SkillInfo( @@ -245,8 +245,8 @@ async def test_registry_execute_does_not_truncate_skill_output(self, fake_skill_ location=str(fake_skill_dir / "SKILL.md"), ) - skill_tool = ToolRegistry.get("skill") - assert skill_tool is not None, "skill tool must be registered" + skill_tool = ToolRegistry.get("skill_load") + assert skill_tool is not None, "skill_load tool must be registered" with patch.object(Skill, "all", AsyncMock(return_value=[skill_info])), \ patch.object(Skill, "get", AsyncMock(return_value=skill_info)): diff --git a/tests/tool/test_slash_command_extended.py b/tests/tool/test_slash_command_extended.py index 82f3a53c..02572f94 100644 --- a/tests/tool/test_slash_command_extended.py +++ b/tests/tool/test_slash_command_extended.py @@ -120,7 +120,7 @@ def mock_tools(self): _make_tool_info("bash", "terminal"), _make_tool_info("grep", "search"), _make_tool_info("webfetch", "browser"), - _make_tool_info("skill", "system"), + _make_tool_info("skill_load", "system"), ] async def test_returns_success(self, mock_tools): diff --git a/tests/tool/test_tool_plugin.py b/tests/tool/test_tool_plugin.py index 02232bca..bf05d895 100644 --- a/tests/tool/test_tool_plugin.py +++ b/tests/tool/test_tool_plugin.py @@ -22,6 +22,7 @@ create_yaml_tool, delete_python_tool, delete_yaml_tool, + discover_python_tool_sources, find_yaml_tool, list_yaml_tools, read_yaml_tool, @@ -670,6 +671,41 @@ def test_delete_project_level_yaml_tool(self, tmp_path: Path, monkeypatch): assert not project_yaml.exists() assert find_yaml_tool("project_del_tool") is None + def test_discover_python_tool_sources_scans_user_and_project_dirs(self, tmp_path: Path, monkeypatch): + monkeypatch.setattr("flocks.tool.tool_loader._TOOLS_SUBDIR", tmp_path / "user_tools") + monkeypatch.chdir(tmp_path) + + user_python = tmp_path / "user_tools" / "python" / "user_tool.py" + user_python.parent.mkdir(parents=True, exist_ok=True) + user_python.write_text(textwrap.dedent("""\ + from flocks.tool.registry import ToolRegistry + + @ToolRegistry.register_function( + name="user_tool", + description="user tool", + ) + async def user_tool(ctx): + return None + """)) + + project_python = tmp_path / ".flocks" / "plugins" / "tools" / "python" / "project_tool.py" + project_python.parent.mkdir(parents=True, exist_ok=True) + project_python.write_text(textwrap.dedent("""\ + from flocks.tool.registry import ToolRegistry + + @ToolRegistry.register_function( + name="project_tool", + description="project tool", + ) + async def project_tool(ctx): + return None + """)) + + sources = discover_python_tool_sources() + + assert sources["user_tool"] == user_python + assert sources["project_tool"] == project_python + # --------------------------------------------------------------------------- # scan_directory recursive @@ -821,9 +857,11 @@ def test_load_plugin_tools_marks_decorator_registered_tools_as_plugin_py(self): old_tools = ToolRegistry._tools.copy() old_plugin_names = ToolRegistry._plugin_tool_names.copy() + old_enabled_defaults = ToolRegistry._enabled_defaults.copy() try: ToolRegistry._tools = {} ToolRegistry._plugin_tool_names = [] + ToolRegistry._enabled_defaults = {} def _fake_plugin_load() -> None: ToolRegistry.register(Tool( @@ -843,6 +881,159 @@ def _fake_plugin_load() -> None: finally: ToolRegistry._tools = old_tools ToolRegistry._plugin_tool_names = old_plugin_names + ToolRegistry._enabled_defaults = old_enabled_defaults + + def test_load_plugin_tools_marks_project_python_tools_native(self, tmp_path: Path): + from flocks.tool.registry import ToolRegistry, ToolInfo, ToolCategory, Tool + + old_tools = ToolRegistry._tools.copy() + old_plugin_names = ToolRegistry._plugin_tool_names.copy() + old_enabled_defaults = ToolRegistry._enabled_defaults.copy() + project_tool_path = tmp_path / ".flocks" / "plugins" / "tools" / "python" / "project_tool.py" + try: + ToolRegistry._tools = {} + ToolRegistry._plugin_tool_names = [] + ToolRegistry._enabled_defaults = {} + + def _fake_plugin_load() -> None: + ToolRegistry.register(Tool( + info=ToolInfo( + name="project_tool", + description="Project tool", + category=ToolCategory.CUSTOM, + ), + handler=lambda ctx, **kwargs: None, + )) + + with patch("flocks.plugin.PluginLoader.load_all", side_effect=_fake_plugin_load): + with patch( + "flocks.tool.tool_loader.discover_python_tool_sources", + return_value={"project_tool": project_tool_path}, + ): + ToolRegistry._load_plugin_tools() + + assert ToolRegistry._tools["project_tool"].info.source == "plugin_py" + assert ToolRegistry._tools["project_tool"].info.native is True + finally: + ToolRegistry._tools = old_tools + ToolRegistry._plugin_tool_names = old_plugin_names + ToolRegistry._enabled_defaults = old_enabled_defaults + + def test_load_plugin_tools_marks_user_python_tools_non_native(self, tmp_path: Path): + from flocks.tool.registry import ToolRegistry, ToolInfo, ToolCategory, Tool + + old_tools = ToolRegistry._tools.copy() + old_plugin_names = ToolRegistry._plugin_tool_names.copy() + old_enabled_defaults = ToolRegistry._enabled_defaults.copy() + user_tool_path = tmp_path / "user_flocks" / ".flocks" / "plugins" / "tools" / "python" / "user_tool.py" + try: + ToolRegistry._tools = {} + ToolRegistry._plugin_tool_names = [] + ToolRegistry._enabled_defaults = {} + + def _fake_plugin_load() -> None: + ToolRegistry.register(Tool( + info=ToolInfo( + name="user_tool", + description="User tool", + category=ToolCategory.CUSTOM, + native=True, + ), + handler=lambda ctx, **kwargs: None, + )) + + with patch("pathlib.Path.home", return_value=tmp_path / "user_flocks"): + with patch("flocks.plugin.PluginLoader.load_all", side_effect=_fake_plugin_load): + with patch( + "flocks.tool.tool_loader.discover_python_tool_sources", + return_value={"user_tool": user_tool_path}, + ): + ToolRegistry._load_plugin_tools() + + assert ToolRegistry._tools["user_tool"].info.source == "plugin_py" + assert ToolRegistry._tools["user_tool"].info.native is False + finally: + ToolRegistry._tools = old_tools + ToolRegistry._plugin_tool_names = old_plugin_names + ToolRegistry._enabled_defaults = old_enabled_defaults + + def test_load_plugin_tools_does_not_reclassify_builtin_name_collision(self, tmp_path: Path): + from flocks.tool.registry import ToolRegistry, ToolInfo, ToolCategory, Tool + + old_tools = ToolRegistry._tools.copy() + old_plugin_names = ToolRegistry._plugin_tool_names.copy() + old_enabled_defaults = ToolRegistry._enabled_defaults.copy() + colliding_tool_path = tmp_path / ".flocks" / "plugins" / "tools" / "python" / "webfetch.py" + try: + ToolRegistry._tools = { + "webfetch": Tool( + info=ToolInfo( + name="webfetch", + description="Builtin webfetch", + category=ToolCategory.SYSTEM, + source="builtin", + native=True, + ), + handler=lambda ctx, **kwargs: None, + ) + } + ToolRegistry._plugin_tool_names = [] + ToolRegistry._enabled_defaults = {} + + with patch("flocks.plugin.PluginLoader.load_all", return_value=None): + with patch( + "flocks.tool.tool_loader.discover_python_tool_sources", + return_value={"webfetch": colliding_tool_path}, + ): + ToolRegistry._load_plugin_tools() + + assert ToolRegistry._tools["webfetch"].info.source == "builtin" + assert ToolRegistry._tools["webfetch"].info.native is True + assert ToolRegistry._plugin_tool_names == [] + + ToolRegistry._unregister_plugin_tools() + assert "webfetch" in ToolRegistry._tools + finally: + ToolRegistry._tools = old_tools + ToolRegistry._plugin_tool_names = old_plugin_names + ToolRegistry._enabled_defaults = old_enabled_defaults + + def test_load_plugin_tools_reconciles_early_registered_python_plugin(self, tmp_path: Path): + from flocks.tool.registry import ToolRegistry, ToolInfo, ToolCategory, Tool + + old_tools = ToolRegistry._tools.copy() + old_plugin_names = ToolRegistry._plugin_tool_names.copy() + old_enabled_defaults = ToolRegistry._enabled_defaults.copy() + user_tool_path = tmp_path / "user_flocks" / ".flocks" / "plugins" / "tools" / "python" / "calculator.py" + try: + ToolRegistry._tools = { + "calculator": Tool( + info=ToolInfo( + name="calculator", + description="Calculator", + category=ToolCategory.CUSTOM, + ), + handler=lambda ctx, **kwargs: None, + ) + } + ToolRegistry._plugin_tool_names = [] + ToolRegistry._enabled_defaults = {} + + with patch("pathlib.Path.home", return_value=tmp_path / "user_flocks"): + with patch("flocks.plugin.PluginLoader.load_all", return_value=None): + with patch( + "flocks.tool.tool_loader.discover_python_tool_sources", + return_value={"calculator": user_tool_path}, + ): + ToolRegistry._load_plugin_tools() + + assert ToolRegistry._tools["calculator"].info.source == "plugin_py" + assert ToolRegistry._tools["calculator"].info.native is False + assert ToolRegistry._plugin_tool_names == ["calculator"] + finally: + ToolRegistry._tools = old_tools + ToolRegistry._plugin_tool_names = old_plugin_names + ToolRegistry._enabled_defaults = old_enabled_defaults # --------------------------------------------------------------------------- diff --git a/tests/tool/test_tools.py b/tests/tool/test_tools.py index ebfa86f1..e35e29cd 100644 --- a/tests/tool/test_tools.py +++ b/tests/tool/test_tools.py @@ -153,7 +153,7 @@ def test_expected_tools_registered(self): # P1 tools (6) "webfetch", "todoread", "todowrite", "question", "plan_enter", "plan_exit", # P2 tools (5) - "task", "lsp", "skill", + "task", "lsp", "skill_load", "background_output", "background_cancel", # P3 tools (2) "websearch", "apply_patch", @@ -910,13 +910,13 @@ async def test_lsp_exists(self): assert tool is not None -class TestSkillTool: - """Test the skill tool""" +class TestSkillLoadTool: + """Test the skill_load tool""" @pytest.mark.asyncio - async def test_skill_exists(self): - """Test that skill tool is registered""" - tool = ToolRegistry.get("skill") + async def test_skill_load_exists(self): + """Test that skill_load tool is registered""" + tool = ToolRegistry.get("skill_load") assert tool is not None diff --git a/tui/flocks/session/compaction.ts b/tui/flocks/session/compaction.ts index ae692212..65ebd25d 100644 --- a/tui/flocks/session/compaction.ts +++ b/tui/flocks/session/compaction.ts @@ -41,7 +41,7 @@ export namespace SessionCompaction { export const PRUNE_MINIMUM = 20_000 export const PRUNE_PROTECT = 40_000 - const PRUNE_PROTECTED_TOOLS = ["skill"] + const PRUNE_PROTECTED_TOOLS = ["skill_load"] // goes backwards through parts until there are 40_000 tokens worth of tool // calls. then erases output of previous tool calls. idea is to throw away old diff --git a/tui/flocks/tool/skill.ts b/tui/flocks/tool/skill.ts index 386abdae..9df44701 100644 --- a/tui/flocks/tool/skill.ts +++ b/tui/flocks/tool/skill.ts @@ -9,7 +9,7 @@ const parameters = z.object({ name: z.string().describe("The skill identifier from available_skills (e.g., 'code-review' or 'category/helper')"), }) -export const SkillTool = Tool.define("skill", async (ctx) => { +export const SkillTool = Tool.define("skill_load", async (ctx) => { const skills = await Skill.all() // Filter skills by agent permissions if agent provided