diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 96b22a8..1ecd993 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,10 +14,10 @@ jobs: python-version: ["3.10", "3.12"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0 - name: Set up Python ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }} diff --git a/agent/agent.py b/agent/agent.py index b263cf1..3034fd5 100644 --- a/agent/agent.py +++ b/agent/agent.py @@ -129,13 +129,19 @@ def extract_service_name(input_str: str) -> str: my-service -> my-service """ import re + from urllib.parse import urlparse - # Handle GitHub URLs - if "github.com" in input_str: - # Extract repo name from URL - match = re.search(r"github\.com/[^/]+/([^/]+?)(?:\.git)?(?:/.*)?$", input_str) - if match: - return match.group(1) + # Handle GitHub URLs - use proper URL parsing to prevent substring attacks + # e.g., reject "https://github.com.attacker.com/..." or "https://attacker.com?q=github.com" + try: + parsed = urlparse(input_str) + if parsed.scheme in ("http", "https") and parsed.netloc == "github.com": + # Extract repo name from URL path + match = re.search(r"^/[^/]+/([^/]+?)(?:\.git)?(?:/.*)?$", parsed.path) + if match: + return match.group(1) + except Exception: + pass # Handle file paths if "/" in input_str: diff --git a/agent/cli.py b/agent/cli.py index 6f57c2f..61cabea 100644 --- a/agent/cli.py +++ b/agent/cli.py @@ -49,36 +49,19 @@ def load_manifest(artifacts_dir: Path) -> dict | None: def load_components(artifacts_dir: Path) -> list[dict]: - """Load components from service_discovery in an existing artifacts directory. + """Load components from service_discovery in an existing artifacts directory.""" + combined = artifacts_dir / "service_discovery" / "components.json" + if not combined.exists(): + return [] - Merges both 'libraries' and 'applications' arrays from components.json, - falling back to separate libraries.json / applications.json files. - """ - components = [] + with open(combined) as f: + data = json.load(f) - # Try components.json first (combined format) - combined = artifacts_dir / "service_discovery" / "components.json" - if combined.exists(): - with open(combined) as f: - data = json.load(f) - components.extend(data.get("libraries", [])) - components.extend(data.get("applications", [])) - return components - - # Fall back to separate files - for filename in ("libraries.json", "applications.json"): - path = artifacts_dir / "service_discovery" / filename - if path.exists(): - with open(path) as f: - data = json.load(f) - # Handle both array and object-with-key formats - if isinstance(data, list): - components.extend(data) - elif isinstance(data, dict): - for key in ("libraries", "applications"): - components.extend(data.get(key, [])) - - return components + if isinstance(data, dict): + return data.get("components", []) + if isinstance(data, list): + return data + return [] def git_diff_files(repo_path: Path, last_sha: str, head_sha: str) -> list[str]: diff --git a/agent/discovery/engine.py b/agent/discovery/engine.py index 074bff5..0d94bec 100644 --- a/agent/discovery/engine.py +++ b/agent/discovery/engine.py @@ -48,7 +48,9 @@ def discover_components( except Exception as e: logger.warning( "Failed to parse %s with %s plugin: %s", - manifest_path, plugin.name, e, + manifest_path, + plugin.name, + e, ) continue @@ -105,7 +107,8 @@ def _resolve_internal_deps(components: List[Component]) -> None: else: logger.debug( "Could not resolve internal dep '%s' for %s", - dep, comp.name, + dep, + comp.name, ) comp.internal_dependencies = resolved @@ -125,15 +128,11 @@ def _detect_repo_shape(components: List[Component]) -> str: def _write_output(components: List[Component], output_dir: Path) -> None: - """Write components.json in the standard format.""" + """Write components.json as a flat list of all components.""" output_dir.mkdir(parents=True, exist_ok=True) - libraries = [c for c in components if c.kind == ComponentKind.LIBRARY] - executables = [c for c in components if c.kind != ComponentKind.LIBRARY] - output = { - "libraries": [c.to_dict() for c in libraries], - "applications": [c.to_dict() for c in executables], + "components": [c.to_dict() for c in components], "metadata": { "total_components": len(components), "by_kind": { @@ -148,18 +147,7 @@ def _write_output(components: List[Component], output_dir: Path) -> None: }, } - # Write combined components.json with open(output_dir / "components.json", "w") as f: json.dump(output, f, indent=2) - # Also write separate files for backward compat - with open(output_dir / "libraries.json", "w") as f: - json.dump({"libraries": [c.to_dict() for c in libraries]}, f, indent=2) - - with open(output_dir / "applications.json", "w") as f: - json.dump({"applications": [c.to_dict() for c in executables]}, f, indent=2) - - logger.info( - "Wrote %d components to %s (%d libraries, %d executables)", - len(components), output_dir, len(libraries), len(executables), - ) + logger.info("Wrote %d components to %s", len(components), output_dir) diff --git a/agent/discovery/validator.py b/agent/discovery/validator.py index ee4f3ca..e794016 100644 --- a/agent/discovery/validator.py +++ b/agent/discovery/validator.py @@ -80,9 +80,8 @@ def validate_graph( """ errors: List[str] = [] name_set = {c.name for c in components} - library_names = {c.name for c in components if c.is_library} - # Every library should be in the depth order + # Every component should appear in exactly one depth level all_ordered = set() for level in depth_order: for name in level: @@ -90,9 +89,9 @@ def validate_graph( errors.append(f"Component '{name}' appears in multiple depth levels") all_ordered.add(name) - missing = library_names - all_ordered + missing = name_set - all_ordered if missing: - errors.append(f"Libraries missing from depth order: {missing}") + errors.append(f"Components missing from depth order: {missing}") # Validate topological property: for each component at depth N, # all its dependencies should be at depth < N diff --git a/agent/schemas/__init__.py b/agent/schemas/__init__.py index 1271931..63caf0c 100644 --- a/agent/schemas/__init__.py +++ b/agent/schemas/__init__.py @@ -4,28 +4,22 @@ Component, ComponentKind, LanguageType, - KnowledgeBasis, - Application, - Library, ExternalDependency, CodeCitation, component_from_dict, ) -from .dependency_graph import DependencyGraph, ApplicationEdge +from .dependency_graph import DependencyGraph, ComponentEdge from .manifest import ArtifactManifest, ArtifactFile, MANIFEST_SCHEMA_VERSION __all__ = [ "Component", "ComponentKind", "LanguageType", - "KnowledgeBasis", - "Application", - "Library", "ExternalDependency", "CodeCitation", "component_from_dict", "DependencyGraph", - "ApplicationEdge", + "ComponentEdge", "ArtifactManifest", "ArtifactFile", "MANIFEST_SCHEMA_VERSION", diff --git a/agent/schemas/core.py b/agent/schemas/core.py index 8d904c7..33a0d51 100644 --- a/agent/schemas/core.py +++ b/agent/schemas/core.py @@ -1,14 +1,11 @@ """Data structures for discovered components. -Provides a unified Component type with a pluggable ComponentKind taxonomy, -replacing the previous binary Library/Application split. The old types are -preserved as backward-compatible aliases. +Provides a unified Component type with a pluggable ComponentKind taxonomy. """ from dataclasses import dataclass, field from enum import Enum -from typing import List, Optional, Dict, Any -from pathlib import Path +from typing import List, Dict, Any # --------------------------------------------------------------------------- @@ -38,9 +35,6 @@ def from_str(cls, value: str) -> "ComponentKind": try: return cls(value.lower()) except ValueError: - # Backward compat: map old "application" to SERVICE - if value.lower() == "application": - return cls.SERVICE return cls.UNKNOWN @@ -203,9 +197,7 @@ def from_dict(cls, data) -> "ExternalDependency": class Component: """A single component in a codebase. - Replaces the old Library/Application split with a unified type and a - ComponentKind enum for classification. All discovery, graph building, - and analysis operate on Components. + All discovery, graph building, and analysis operate on Components. """ name: str @@ -220,17 +212,12 @@ class Component: metadata: Dict[str, Any] = field(default_factory=dict) citations: List[CodeCitation] = field(default_factory=list) - # Backward-compat fields (populated from old Library/Application data) - libraries_used: List[str] = field(default_factory=list) - internal_applications: List[str] = field(default_factory=list) - def to_dict(self) -> dict: - """Serialize to dictionary. Output is backward-compatible with old format.""" + """Serialize to dictionary.""" d: dict = { "name": self.name, "kind": self.kind.value, "type": self.type, - "classification": self._legacy_classification(), "root_path": self.root_path, "description": self.description, "internal_dependencies": self.internal_dependencies, @@ -246,27 +233,13 @@ def to_dict(self) -> dict: d["metadata"] = self.metadata if self.citations: d["citations"] = [c.to_dict() for c in self.citations] - # Legacy fields for backward compat - if self.libraries_used: - d["libraries_used"] = self.libraries_used - if self.internal_applications: - d["internal_applications"] = self.internal_applications return d - def _legacy_classification(self) -> str: - """Map ComponentKind to old 'library'/'application' for backward compat.""" - if self.kind == ComponentKind.LIBRARY: - return "library" - return "application" - @classmethod def from_dict(cls, data: dict) -> "Component": - """Deserialize from dictionary. Handles both new and old formats.""" - # Determine kind: prefer new 'kind' field, fall back to 'classification' + """Deserialize from dictionary.""" if "kind" in data: kind = ComponentKind.from_str(data["kind"]) - elif "classification" in data: - kind = ComponentKind.from_str(data["classification"]) else: kind = ComponentKind.UNKNOWN @@ -285,8 +258,6 @@ def from_dict(cls, data: dict) -> "Component": key_files=data.get("key_files", []), metadata=data.get("metadata", {}), citations=[CodeCitation.from_dict(c) for c in data.get("citations", [])], - libraries_used=data.get("libraries_used", []), - internal_applications=data.get("internal_applications", []), ) @property @@ -303,161 +274,6 @@ def is_executable(self) -> bool: ) -# --------------------------------------------------------------------------- -# Backward compatibility: Library and Application as thin wrappers -# --------------------------------------------------------------------------- - - -@dataclass -class BaseComponent: - """Base class for legacy applications and libraries.""" - - name: str - type: str - root_path: Path - manifest_path: Optional[Path] = None - description: str = "" - key_files: List[Path] = field(default_factory=list) - metadata: Dict[str, Any] = field(default_factory=dict) - - -@dataclass -class Library(BaseComponent): - """Legacy type. Use Component with kind=LIBRARY instead.""" - - external_dependencies: List[ExternalDependency] = field(default_factory=list) - internal_dependencies: List[str] = field(default_factory=list) - citations: List[CodeCitation] = field(default_factory=list) - - def to_dict(self) -> dict: - return { - "name": self.name, - "type": self.type, - "classification": "library", - "kind": "library", - "root_path": str(self.root_path), - "manifest_path": str(self.manifest_path) if self.manifest_path else None, - "description": self.description, - "external_dependencies": [ - d.to_dict() if isinstance(d, ExternalDependency) else d - for d in self.external_dependencies - ], - "internal_dependencies": self.internal_dependencies, - "key_files": [str(f) for f in self.key_files], - "metadata": self.metadata, - "citations": [c.to_dict() for c in self.citations], - } - - @classmethod - def from_dict(cls, data: dict) -> "Library": - return cls( - name=data["name"], - type=data["type"], - root_path=Path(data["root_path"]), - manifest_path=Path(data["manifest_path"]) - if data.get("manifest_path") - else None, - description=data.get("description", ""), - external_dependencies=[ - ExternalDependency.from_dict(d) - for d in data.get("external_dependencies", []) - ], - internal_dependencies=data.get("internal_dependencies", []), - key_files=[Path(f) for f in data.get("key_files", [])], - metadata=data.get("metadata", {}), - citations=[CodeCitation.from_dict(c) for c in data.get("citations", [])], - ) - - def to_component(self) -> Component: - """Convert to unified Component.""" - return Component( - name=self.name, - kind=ComponentKind.LIBRARY, - type=self.type, - root_path=str(self.root_path), - manifest_path=str(self.manifest_path) if self.manifest_path else "", - description=self.description, - internal_dependencies=self.internal_dependencies, - external_dependencies=self.external_dependencies, - key_files=[str(f) for f in self.key_files], - metadata=self.metadata, - citations=self.citations, - ) - - -@dataclass -class Application(BaseComponent): - """Legacy type. Use Component with kind=SERVICE/CLI/FRONTEND instead.""" - - external_dependencies: List[ExternalDependency] = field(default_factory=list) - libraries_used: List[str] = field(default_factory=list) - internal_applications: List[str] = field(default_factory=list) - citations: List[CodeCitation] = field(default_factory=list) - - def to_dict(self) -> dict: - return { - "name": self.name, - "type": self.type, - "classification": "application", - "kind": "service", - "root_path": str(self.root_path), - "manifest_path": str(self.manifest_path) if self.manifest_path else None, - "description": self.description, - "external_dependencies": [ - d.to_dict() if isinstance(d, ExternalDependency) else d - for d in self.external_dependencies - ], - "libraries_used": self.libraries_used, - "internal_applications": self.internal_applications, - "key_files": [str(f) for f in self.key_files], - "metadata": self.metadata, - "citations": [c.to_dict() for c in self.citations], - } - - @classmethod - def from_dict(cls, data: dict) -> "Application": - return cls( - name=data["name"], - type=data["type"], - root_path=Path(data["root_path"]), - manifest_path=Path(data["manifest_path"]) - if data.get("manifest_path") - else None, - description=data.get("description", ""), - external_dependencies=[ - ExternalDependency.from_dict(d) - for d in data.get("external_dependencies", []) - ], - libraries_used=data.get("libraries_used", []), - internal_applications=data.get("internal_applications", []), - key_files=[Path(f) for f in data.get("key_files", [])], - metadata=data.get("metadata", {}), - citations=[CodeCitation.from_dict(c) for c in data.get("citations", [])], - ) - - def to_component(self) -> Component: - """Convert to unified Component.""" - return Component( - name=self.name, - kind=ComponentKind.SERVICE, - type=self.type, - root_path=str(self.root_path), - manifest_path=str(self.manifest_path) if self.manifest_path else "", - description=self.description, - internal_dependencies=[], - external_dependencies=self.external_dependencies, - key_files=[str(f) for f in self.key_files], - metadata=self.metadata, - citations=self.citations, - libraries_used=self.libraries_used, - internal_applications=self.internal_applications, - ) - - -# Backward compatibility alias -KnowledgeBasis = Application | Library - - def component_from_dict(data: dict) -> Component: """Factory function to deserialize into a Component.""" return Component.from_dict(data) diff --git a/agent/schemas/dependency_graph.py b/agent/schemas/dependency_graph.py index 78e6ac2..b256810 100644 --- a/agent/schemas/dependency_graph.py +++ b/agent/schemas/dependency_graph.py @@ -1,40 +1,40 @@ """Dependency graph data structure with topological sorting.""" from dataclasses import dataclass, field -from typing import List, Dict, Tuple, Optional +from typing import List, Dict from collections import defaultdict, deque @dataclass -class ApplicationEdge: - """Represents an interaction edge between two internal applications. +class ComponentEdge: + """Represents an interaction edge between two components. - Populated during application analysis when code-analyzer subagents - discover internal application-to-application interactions (i.e. between - applications within the same codebase). External applications are tracked - on the Application dataclass itself. + Populated during analysis when code-analyzer subagents discover + internal component-to-component interactions within the same codebase. """ - from_app: str # Name of the calling application - to_app: str # Name of the callee application - communication_protocol: List[str] = field(default_factory=list) # e.g., ["HTTP", "HTTPS"], ["gRPC"], ["Message Queue"] + from_component: str # Name of the calling component + to_component: str # Name of the callee component + communication_protocol: List[str] = field( + default_factory=list + ) # e.g., ["HTTP", "HTTPS"], ["gRPC"], ["Message Queue"] description: str = "" # Few sentence summary of the interaction def to_dict(self) -> dict: """Serialize to dictionary for JSON output.""" return { - "from": self.from_app, - "to": self.to_app, + "from": self.from_component, + "to": self.to_component, "communication_protocol": self.communication_protocol, "description": self.description, } @classmethod - def from_dict(cls, data: dict) -> "ApplicationEdge": + def from_dict(cls, data: dict) -> "ComponentEdge": """Deserialize from dictionary.""" return cls( - from_app=data["from"], - to_app=data["to"], + from_component=data["from"], + to_component=data["to"], communication_protocol=data.get("communication_protocol", []), description=data.get("description", ""), ) @@ -42,7 +42,7 @@ def from_dict(cls, data: dict) -> "ApplicationEdge": @dataclass class DependencyGraph: - """Directed graph representing component dependencies (libraries and applications).""" + """Directed graph representing component dependencies.""" nodes: List[str] = field(default_factory=list) # Component names edges: Dict[str, List[str]] = field( @@ -69,20 +69,6 @@ def get_dependents(self, component_name: str) -> List[str]: """Get components that depend on this component.""" return [node for node in self.nodes if component_name in self.edges[node]] - def get_analysis_order(self) -> Tuple[List[str], List[str]]: - """ - Get the two-phase analysis order (legacy interface). - - Returns: - Tuple of (phase1_components, phase2_components_ordered) - """ - depth_order = self.get_depth_order() - phase1 = depth_order[0] if depth_order else [] - phase2 = [] - for level in depth_order[1:]: - phase2.extend(level) - return (phase1, phase2) - def get_depth_order(self) -> List[List[str]]: """ Get N-level depth-ordered analysis buckets. @@ -198,7 +184,9 @@ def strongconnect(node: str): return sccs - def _topological_sort(self, nodes: List[str], in_degree: Dict[str, int]) -> List[str]: + def _topological_sort( + self, nodes: List[str], in_degree: Dict[str, int] + ) -> List[str]: """ Perform topological sort on a subset of nodes using Kahn's algorithm. @@ -254,4 +242,4 @@ def from_dict(cls, data: dict) -> "DependencyGraph": graph = cls() graph.nodes = data["nodes"] graph.edges = defaultdict(list, data["edges"]) - return graph \ No newline at end of file + return graph diff --git a/agent/schemas/manifest.py b/agent/schemas/manifest.py index 6205681..c6c7b16 100644 --- a/agent/schemas/manifest.py +++ b/agent/schemas/manifest.py @@ -41,7 +41,9 @@ def from_dict(cls, data: dict) -> "ArtifactFile": ) @classmethod - def from_path(cls, file_path: Path, root_dir: Path, category: str = "") -> "ArtifactFile": + def from_path( + cls, file_path: Path, root_dir: Path, category: str = "" + ) -> "ArtifactFile": """Create an ArtifactFile by reading from disk. Args: @@ -86,8 +88,7 @@ class ArtifactManifest: source_commit: str = "" # Git commit hash of the analyzed source (if available) # Content summary - libraries_count: int = 0 # Number of libraries analyzed - applications_count: int = 0 # Number of applications analyzed + components_count: int = 0 # Number of components analyzed total_files: int = 0 # Total artifact files in this version # File inventory @@ -106,8 +107,7 @@ def to_dict(self) -> dict: "schema_version": self.schema_version, "source_repo": self.source_repo, "source_commit": self.source_commit, - "libraries_count": self.libraries_count, - "applications_count": self.applications_count, + "components_count": self.components_count, "total_files": self.total_files, "files": [f.to_dict() for f in self.files], "metadata": self.metadata, @@ -127,8 +127,7 @@ def from_dict(cls, data: dict) -> "ArtifactManifest": schema_version=data.get("schema_version", MANIFEST_SCHEMA_VERSION), source_repo=data.get("source_repo", ""), source_commit=data.get("source_commit", ""), - libraries_count=data.get("libraries_count", 0), - applications_count=data.get("applications_count", 0), + components_count=data.get("components_count", 0), total_files=data.get("total_files", 0), files=[ArtifactFile.from_dict(f) for f in data.get("files", [])], metadata=data.get("metadata", {}), diff --git a/agent/utils/dependency_graph.py b/agent/utils/dependency_graph.py index 24c9a88..0fd31ab 100644 --- a/agent/utils/dependency_graph.py +++ b/agent/utils/dependency_graph.py @@ -1,96 +1,62 @@ """Dependency graph builder utility.""" -from typing import List, Tuple, Dict +from typing import List, Dict from pathlib import Path -from agent.schemas.core import Library, Application, ExternalDependency +from agent.schemas.core import Component, ExternalDependency from agent.schemas.dependency_graph import DependencyGraph class DependencyGraphBuilder: - """Builds library dependency graphs from service definitions.""" + """Builds component dependency graphs from discovered components.""" - def __init__(self, libraries: List[Library]): - """Initialize with list of libraries.""" - self.services = {lib.name: lib for lib in libraries} + def __init__(self, components: List[Component]): + """Initialize with list of components.""" + self.components = {comp.name: comp for comp in components} self.graph = DependencyGraph() self._build_graph() def _build_graph(self): - """Build the library dependency graph.""" - # Add all library nodes - for lib_name in self.services: - self.graph.add_node(lib_name) + """Build the component dependency graph.""" + for name in self.components: + self.graph.add_node(name) - # Add edges for internal library dependencies - for lib_name, lib in self.services.items(): - for dep in lib.internal_dependencies: - # Only add edge if dependency is an internal library - if dep in self.services: - self.graph.add_edge(lib_name, dep) + for name, comp in self.components.items(): + for dep in comp.internal_dependencies: + if dep in self.components: + self.graph.add_edge(name, dep) def build(self) -> DependencyGraph: """Return the built dependency graph.""" return self.graph - def get_analysis_order(self) -> Tuple[List[str], List[str]]: - """ - Get two-phase analysis order. - - Returns: - Tuple of (phase1_libraries, phase2_libraries_ordered) - """ - return self.graph.get_analysis_order() + def get_depth_order(self) -> List[List[str]]: + """Get depth-ordered analysis buckets.""" + return self.graph.get_depth_order() def save_graph_visualization(self, output_path: Path): - """Save markdown visualization of the library graph.""" - phase1, phase2 = self.get_analysis_order() + """Save markdown visualization of the component graph.""" + depth_order = self.get_depth_order() with open(output_path, "w") as f: - f.write("# Library Dependency Graph\n\n") - f.write("## Phase 1: Foundation Libraries (No Dependencies)\n\n") - if phase1: - for lib_name in phase1: - lib = self.services[lib_name] - f.write(f"### `{lib_name}`\n\n") - f.write(f"- **Type**: {lib.type}\n") - f.write(f"- **Path**: `{lib.root_path}`\n") - if lib.description: - f.write(f"- **Description**: {lib.description}\n") - f.write(f"- **Dependencies**: None\n") - if lib.external_dependencies: - ext_parts = [] - for d in lib.external_dependencies[:5]: - if isinstance(d, ExternalDependency): - label = f"`{d.name}`" - if d.version: - label += f" ({d.version})" - else: - label = f"`{d}`" - ext_parts.append(label) - ext = ", ".join(ext_parts) - if len(lib.external_dependencies) > 5: - ext += f" (+{len(lib.external_dependencies) - 5} more)" - f.write(f"- **External Dependencies**: {ext}\n") - f.write("\n") - else: - f.write("*(None)*\n\n") - - f.write("## Phase 2: Dependent Libraries (Topological Order)\n\n") - if phase2: - for lib_name in phase2: - lib = self.services[lib_name] - deps = self.graph.get_direct_dependencies(lib_name) - f.write(f"### `{lib_name}`\n\n") - f.write(f"- **Type**: {lib.type}\n") - f.write(f"- **Path**: `{lib.root_path}`\n") - if lib.description: - f.write(f"- **Description**: {lib.description}\n") + f.write("# Component Dependency Graph\n\n") + for depth, level in enumerate(depth_order): + f.write(f"## Depth {depth}\n\n") + for comp_name in level: + comp = self.components[comp_name] + deps = self.graph.get_direct_dependencies(comp_name) + f.write(f"### `{comp_name}` ({comp.kind.value})\n\n") + f.write(f"- **Type**: {comp.type}\n") + f.write(f"- **Path**: `{comp.root_path}`\n") + if comp.description: + f.write(f"- **Description**: {comp.description}\n") if deps: dep_list = ", ".join(f"`{d}`" for d in deps) f.write(f"- **Depends On**: {dep_list}\n") - if lib.external_dependencies: + else: + f.write(f"- **Dependencies**: None\n") + if comp.external_dependencies: ext_parts = [] - for d in lib.external_dependencies[:5]: + for d in comp.external_dependencies[:5]: if isinstance(d, ExternalDependency): label = f"`{d.name}`" if d.version: @@ -99,9 +65,7 @@ def save_graph_visualization(self, output_path: Path): label = f"`{d}`" ext_parts.append(label) ext = ", ".join(ext_parts) - if len(lib.external_dependencies) > 5: - ext += f" (+{len(lib.external_dependencies) - 5} more)" + if len(comp.external_dependencies) > 5: + ext += f" (+{len(comp.external_dependencies) - 5} more)" f.write(f"- **External Dependencies**: {ext}\n") f.write("\n") - else: - f.write("*(None)*\n\n") diff --git a/scripts/build_dependency_graph.py b/scripts/build_dependency_graph.py index 0e58d6d..3e7e147 100755 --- a/scripts/build_dependency_graph.py +++ b/scripts/build_dependency_graph.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 """ -Build dependency graphs using existing graph utilities. +Build dependency graph from discovered components. This script: -1. Reads component discovery data (libraries.json and applications.json) -2. Builds library and application graphs using existing DependencyGraphBuilder -3. Performs topological sort for library analysis order -4. Outputs library_graph.json and application_graph.json +1. Reads component discovery data (components.json) +2. Builds a component dependency graph using DependencyGraphBuilder +3. Computes depth-ordered analysis buckets +4. Outputs graph.json and markdown visualization Usage: python scripts/build_dependency_graph.py @@ -21,249 +21,76 @@ # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) -from agent.schemas.core import Library, Application, ExternalDependency +from agent.schemas.core import Component, ExternalDependency from agent.utils.dependency_graph import DependencyGraphBuilder -def load_components(discovery_dir: Path) -> tuple[list[Library], list[Application]]: - """ - Load components from discovery directory. +def load_components(discovery_dir: Path) -> list[Component]: + """Load components from discovery directory.""" + components_file = discovery_dir / "components.json" + if not components_file.exists(): + raise FileNotFoundError(f"components.json not found in {discovery_dir}") - Reads libraries.json and applications.json from the discovery directory. + with open(components_file) as f: + data = json.load(f) - Returns: - Tuple of (libraries, applications) as typed objects - """ - libraries = [] - applications = [] - - # Load libraries - libraries_file = discovery_dir / "libraries.json" - if libraries_file.exists(): - with open(libraries_file) as f: - data = json.load(f) - for lib in data.get("libraries", []): - libraries.append( - Library( - name=lib["name"], - type=lib.get("type", "unknown"), - root_path=Path(lib.get("root_path", lib["name"])), - description=lib.get("description", ""), - manifest_path=Path(lib["manifest_path"]) - if lib.get("manifest_path") - else None, - internal_dependencies=lib.get("internal_dependencies", []), - external_dependencies=[ - ExternalDependency.from_dict(d) - for d in lib.get("external_dependencies", []) - ], - key_files=[Path(f) for f in lib.get("key_files", [])], - ) - ) - - # Load applications - applications_file = discovery_dir / "applications.json" - if applications_file.exists(): - with open(applications_file) as f: - data = json.load(f) - for app in data.get("applications", []): - applications.append( - Application( - name=app["name"], - type=app.get("type", "unknown"), - root_path=Path(app.get("root_path", app["name"])), - description=app.get("description", ""), - manifest_path=Path(app["manifest_path"]) - if app.get("manifest_path") - else None, - external_dependencies=[ - ExternalDependency.from_dict(d) - for d in app.get("external_dependencies", []) - ], - libraries_used=app.get("libraries_used", []), - internal_applications=app.get("internal_applications", []), - key_files=[Path(f) for f in app.get("key_files", [])], - ) - ) - - # Fallback: try loading from a single components.json for backward compatibility - if not libraries and not applications: - components_file = discovery_dir / "components.json" - if components_file.exists(): - with open(components_file) as f: - data = json.load(f) - for lib in data.get("libraries", []): - libraries.append( - Library( - name=lib["name"], - type=lib.get("type", "unknown"), - root_path=Path(lib.get("root_path", lib["name"])), - description=lib.get("description", ""), - manifest_path=Path(lib["manifest_path"]) - if lib.get("manifest_path") - else None, - internal_dependencies=lib.get("internal_dependencies", []), - external_dependencies=[ - ExternalDependency.from_dict(d) - for d in lib.get("external_dependencies", []) - ], - key_files=[Path(f) for f in lib.get("key_files", [])], - ) - ) - for app in data.get("applications", []): - applications.append( - Application( - name=app["name"], - type=app.get("type", "unknown"), - root_path=Path(app.get("root_path", app["name"])), - description=app.get("description", ""), - manifest_path=Path(app["manifest_path"]) - if app.get("manifest_path") - else None, - external_dependencies=[ - ExternalDependency.from_dict(d) - for d in app.get("external_dependencies", []) - ], - libraries_used=app.get("libraries_used", []), - internal_applications=app.get("internal_applications", []), - key_files=[Path(f) for f in app.get("key_files", [])], - ) - ) - - return libraries, applications + if isinstance(data, dict): + comp_list = data.get("components", []) + elif isinstance(data, list): + comp_list = data + else: + raise ValueError(f"Unknown components.json format in {discovery_dir}") + return [Component.from_dict(c) for c in comp_list] -def build_library_graph_json(builder: DependencyGraphBuilder) -> dict: - """ - Build library_graph.json output structure. - Matches the data model specified in lead_agent.txt: - { - "graph_type": "library_dependencies", - "nodes": [{ - "id": "library-name", - "type": "rust-crate" | "nodejs-package" | "python-package", - "classification": "library", - "external_dependencies": [{"name": "...", "version": "...", "category": "...", "purpose": "..."}], - "phase": 1 | 2 - }], - "edges": [{"from": "library-a", "to": "library-b"}], - "analysis_order": { - "phase1": [...], // Libraries with no dependencies - "phase2": [...] // Libraries in topological order - } - } - """ +def build_graph_json(builder: DependencyGraphBuilder) -> dict: + """Build graph.json output structure.""" graph = builder.graph - phase1, phase2 = builder.get_analysis_order() + depth_order = builder.get_depth_order() + + # Build depth map for each component + depth_map: dict[str, int] = {} + for depth, level in enumerate(depth_order): + for name in level: + depth_map[name] = depth - # Build nodes with phase assignments nodes = [] - for lib_name in graph.nodes: - service = builder.services[lib_name] + for comp_name in graph.nodes: + comp = builder.components[comp_name] node = { - "id": lib_name, - "type": service.type, - "classification": "library", - "phase": 1 if lib_name in phase1 else 2, + "id": comp_name, + "kind": comp.kind.value, + "type": comp.type, + "depth": depth_map.get(comp_name, 0), } - if service.external_dependencies: + if comp.external_dependencies: node["external_dependencies"] = [ d.to_dict() if isinstance(d, ExternalDependency) else d - for d in service.external_dependencies + for d in comp.external_dependencies ] nodes.append(node) - # Build edges (from -> to where 'from' depends on 'to') edges = [] for from_node, to_nodes in graph.edges.items(): for to_node in to_nodes: - edges.append( - { - "from": from_node, - "to": to_node, - } - ) + edges.append({"from": from_node, "to": to_node}) return { - "graph_type": "library_dependencies", + "graph_type": "component_dependencies", "nodes": nodes, "edges": edges, - "analysis_order": { - "phase1": phase1, - "phase2": phase2, - }, - } - - -def build_application_graph_json( - applications: list[Application], libraries: list[Library] -) -> dict: - """ - Build initial application_graph.json (nodes only, edges added during analysis). - - Matches the data model specified in lead_agent.txt: - { - "graph_type": "application_interactions", - "nodes": [{ - "id": "application-name", - "type": "rust-crate" | "nodejs-package" | "python-package", - "classification": "application", - "libraries_used": ["internal library names"], - "external_dependencies": [{"name": "...", "version": "...", "category": "...", "purpose": "..."}], - "internal_applications": ["other apps this one calls"], - "key_files": ["important source files"] - }], - "edges": [ - { - "from": "calling-application", - "to": "callee-application", - "communication_protocol": ["HTTP", "HTTPS"], - "description": "Few sentence summary of interaction" - } - ] - } - - Note: Edges are populated during application analysis when code-analyzer - subagents discover application-to-application interactions. See ApplicationEdge - schema in agent/schemas/dependency_graph.py for edge structure. - """ - library_names = {lib.name for lib in libraries} - - nodes = [] - for app in applications: - # Filter libraries_used to only include actual internal libraries - libraries_used = [dep for dep in app.libraries_used if dep in library_names] - - nodes.append( - { - "id": app.name, - "type": app.type, - "classification": "application", - "libraries_used": libraries_used, - "external_dependencies": [ - d.to_dict() if isinstance(d, ExternalDependency) else d - for d in app.external_dependencies - ], - "internal_applications": app.internal_applications, - "key_files": [str(f) for f in app.key_files], - } - ) - - return { - "graph_type": "application_interactions", - "nodes": nodes, - "edges": [], # Edges discovered and added during application analysis + "depth_order": depth_order, } def main(): parser = argparse.ArgumentParser( - description="Build dependency graphs and compute analysis order" + description="Build dependency graph and compute analysis order" ) parser.add_argument( "discovery_dir", - help="Path to service_discovery directory (containing libraries.json and applications.json)", + help="Path to service_discovery directory (containing components.json)", ) parser.add_argument( "output_dir", @@ -281,102 +108,33 @@ def main(): # Load components print(f"Loading components from {discovery_dir}...") - libraries, applications = load_components(discovery_dir) - - print(f"Found {len(libraries)} libraries and {len(applications)} applications") + components = load_components(discovery_dir) + print(f"Found {len(components)} components") - # Build library dependency graph - print("\nBuilding library dependency graph...") - lib_builder = DependencyGraphBuilder(libraries) - lib_graph = lib_builder.build() - phase1, phase2 = lib_builder.get_analysis_order() + # Build dependency graph + print("\nBuilding component dependency graph...") + builder = DependencyGraphBuilder(components) + depth_order = builder.get_depth_order() - print(f" Phase 1: {len(phase1)} libraries (no dependencies)") - print(f" Phase 2: {len(phase2)} libraries (topological order)") + for i, level in enumerate(depth_order): + print(f" Depth {i}: {len(level)} components") - # Build JSON outputs - library_graph_json = build_library_graph_json(lib_builder) - application_graph_json = build_application_graph_json(applications, libraries) + # Build JSON output + graph_json = build_graph_json(builder) # Create output directory output_dir.mkdir(parents=True, exist_ok=True) - # Write library graph - lib_graph_path = output_dir / "library_graph.json" - with open(lib_graph_path, "w") as f: - json.dump(library_graph_json, f, indent=2) - print(f"\n✓ Wrote {lib_graph_path}") - - # Write application graph - app_graph_path = output_dir / "application_graph.json" - with open(app_graph_path, "w") as f: - json.dump(application_graph_json, f, indent=2) - print(f"✓ Wrote {app_graph_path}") + # Write graph + graph_path = output_dir / "graph.json" + with open(graph_path, "w") as f: + json.dump(graph_json, f, indent=2) + print(f"\nWrote {graph_path}") - # Write markdown visualizations using existing method - lib_md_path = output_dir / "library_graph.md" - lib_builder.save_graph_visualization(lib_md_path) - print(f"✓ Wrote {lib_md_path}") - - # Simple application graph markdown - app_md_path = output_dir / "application_graph.md" - with open(app_md_path, "w") as f: - f.write("# Application Interaction Graph\n\n") - f.write("## Applications\n\n") - for node in application_graph_json["nodes"]: - f.write(f"### `{node['id']}`\n\n") - f.write(f"- **Type**: {node['type']}\n") - if node["libraries_used"]: - libs = ", ".join(f"`{lib}`" for lib in node["libraries_used"]) - f.write(f"- **Uses Libraries**: {libs}\n") - if node["external_dependencies"]: - ext_deps_display = [] - for dep in node["external_dependencies"][:8]: - if isinstance(dep, dict): - name = dep.get("name", str(dep)) - version = dep.get("version", "") - ext_deps_display.append( - f"`{name}`" + (f" ({version})" if version else "") - ) - else: - ext_deps_display.append(f"`{dep}`") - ext_str = ", ".join(ext_deps_display) - remaining = len(node["external_dependencies"]) - 8 - if remaining > 0: - ext_str += f" (+{remaining} more)" - f.write(f"- **External Dependencies**: {ext_str}\n") - if node.get("internal_applications"): - apps = ", ".join(f"`{a}`" for a in node["internal_applications"]) - f.write(f"- **Interacts With**: {apps}\n") - if node.get("key_files"): - files = ", ".join(f"`{kf}`" for kf in node["key_files"][:3]) - if len(node["key_files"]) > 3: - files += f" (+{len(node['key_files']) - 3} more)" - f.write(f"- **Key Files**: {files}\n") - f.write("\n") - f.write("## Interactions\n\n") - if application_graph_json["edges"]: - for edge in application_graph_json["edges"]: - protocols = ( - ", ".join(edge["communication_protocol"]) - if edge.get("communication_protocol") - else "Unknown" - ) - f.write(f"- **`{edge['from']}`** → **`{edge['to']}`**\n") - f.write(f" - **Protocol**: {protocols}\n") - if edge.get("description"): - f.write(f" - **Description**: {edge['description']}\n") - f.write("\n") - else: - f.write("*(Edges will be populated during application analysis)*\n") - print(f"✓ Wrote {app_md_path}") - - print("\nAnalysis order:") - print(f" Phase 1 (parallel): {', '.join(phase1) if phase1 else 'none'}") - if phase2: - print(f" Phase 2 (sequential): {', '.join(phase2)}") - else: - print(" Phase 2: none") + # Write markdown visualization + md_path = output_dir / "graph.md" + builder.save_graph_visualization(md_path) + print(f"Wrote {md_path}") if __name__ == "__main__": diff --git a/scripts/build_knowledge_graph.py b/scripts/build_knowledge_graph.py index 5283354..09290c5 100644 --- a/scripts/build_knowledge_graph.py +++ b/scripts/build_knowledge_graph.py @@ -28,12 +28,7 @@ def load_components(discovery_dir: Path) -> list[Component]: - """Load components from discovery directory. - - Supports both new unified format and legacy format for backward compat. - """ - components: list[Component] = [] - + """Load components from discovery directory.""" components_file = discovery_dir / "components.json" if not components_file.exists(): raise FileNotFoundError(f"components.json not found in {discovery_dir}") @@ -41,22 +36,14 @@ def load_components(discovery_dir: Path) -> list[Component]: with open(components_file) as f: data = json.load(f) - # Check format: new unified format has "components" key with kinds as subkeys - if "components" in data and isinstance(data["components"], dict): - # New unified format - for kind_str, kind_components in data["components"].items(): - for comp_data in kind_components: - components.append(Component.from_dict(comp_data)) - elif "libraries" in data or "applications" in data: - # Legacy format - for lib_data in data.get("libraries", []): - components.append(Component.from_dict(lib_data)) - for app_data in data.get("applications", []): - components.append(Component.from_dict(app_data)) + if isinstance(data, dict): + comp_list = data.get("components", []) + elif isinstance(data, list): + comp_list = data else: raise ValueError(f"Unknown components.json format in {discovery_dir}") - return components + return [Component.from_dict(c) for c in comp_list] def main(): diff --git a/tests/discovery/test_dependency_graph.py b/tests/discovery/test_dependency_graph.py index cb53500..508cff4 100644 --- a/tests/discovery/test_dependency_graph.py +++ b/tests/discovery/test_dependency_graph.py @@ -134,25 +134,6 @@ def test_multiple_sccs(self): assert set(levels[1]) == {"c", "d"} -class TestLegacyInterface: - def test_get_analysis_order(self): - g = DependencyGraph() - for n in ["a", "b", "c"]: - g.add_node(n) - g.add_edge("b", "a") - g.add_edge("c", "b") - - phase1, phase2 = g.get_analysis_order() - assert phase1 == ["a"] - assert phase2 == ["b", "c"] - - def test_get_analysis_order_empty(self): - g = DependencyGraph() - phase1, phase2 = g.get_analysis_order() - assert phase1 == [] - assert phase2 == [] - - class TestGraphOperations: def test_get_direct_dependencies(self): g = DependencyGraph() diff --git a/tests/discovery/test_engine.py b/tests/discovery/test_engine.py index 866b61c..9400bdd 100644 --- a/tests/discovery/test_engine.py +++ b/tests/discovery/test_engine.py @@ -5,7 +5,11 @@ from pathlib import Path from agent.discovery.engine import discover_components, _detect_repo_shape -from agent.discovery.validator import validate_discovery, validate_graph, validate_analysis +from agent.discovery.validator import ( + validate_discovery, + validate_graph, + validate_analysis, +) from agent.schemas.core import Component, ComponentKind @@ -42,10 +46,15 @@ def test_single_go_module(self, repo): def test_polyglot_repo(self, repo): repo.write("go.mod", "module github.com/org/backend\n\ngo 1.21\n") repo.write("server.go", "package main\n\nfunc main() {}\n") - repo.write("web/package.json", json.dumps({ - "name": "frontend", - "dependencies": {"react": "^18"}, - })) + repo.write( + "web/package.json", + json.dumps( + { + "name": "frontend", + "dependencies": {"react": "^18"}, + } + ), + ) comps = discover_components(repo.root) types = {c.type for c in comps} @@ -73,14 +82,12 @@ def test_writes_output_files(self, repo): discover_components(repo.root, output_dir=output_dir) assert (output_dir / "components.json").exists() - assert (output_dir / "libraries.json").exists() - assert (output_dir / "applications.json").exists() with open(output_dir / "components.json") as f: data = json.load(f) - assert "libraries" in data - assert "applications" in data + assert "components" in data assert "metadata" in data + assert isinstance(data["components"], list) def test_output_metadata(self, repo): repo.write("pyproject.toml", '[project]\nname = "mylib"\n') @@ -100,20 +107,33 @@ def test_empty(self): assert _detect_repo_shape([]) == "empty" def test_single_package(self): - comp = Component(name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path=".") + comp = Component( + name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="." + ) assert _detect_repo_shape([comp]) == "single-package" def test_monorepo(self): comps = [ - Component(name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="a"), - Component(name="b", kind=ComponentKind.SERVICE, type="go-module", root_path="b"), + Component( + name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="a" + ), + Component( + name="b", kind=ComponentKind.SERVICE, type="go-module", root_path="b" + ), ] assert _detect_repo_shape(comps) == "monorepo" def test_polyglot(self): comps = [ - Component(name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="a"), - Component(name="b", kind=ComponentKind.FRONTEND, type="typescript-package", root_path="web"), + Component( + name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="a" + ), + Component( + name="b", + kind=ComponentKind.FRONTEND, + type="typescript-package", + root_path="web", + ), ] assert _detect_repo_shape(comps) == "polyglot-monorepo" @@ -121,15 +141,21 @@ def test_polyglot(self): class TestValidateDiscovery: def test_valid(self, repo): comps = [ - Component(name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="."), + Component( + name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="." + ), ] errors = validate_discovery(comps, repo.root) assert errors == [] def test_duplicate_names(self, repo): comps = [ - Component(name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="a"), - Component(name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="b"), + Component( + name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="a" + ), + Component( + name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="b" + ), ] (repo.root / "a").mkdir() (repo.root / "b").mkdir() @@ -138,7 +164,12 @@ def test_duplicate_names(self, repo): def test_missing_root_path(self, repo): comps = [ - Component(name="a", kind=ComponentKind.LIBRARY, type="go-module", root_path="nonexistent"), + Component( + name="a", + kind=ComponentKind.LIBRARY, + type="go-module", + root_path="nonexistent", + ), ] errors = validate_discovery(comps, repo.root) assert any("does not exist" in e for e in errors) @@ -146,8 +177,11 @@ def test_missing_root_path(self, repo): def test_self_dependency(self, repo): comps = [ Component( - name="a", kind=ComponentKind.LIBRARY, type="go-module", - root_path=".", internal_dependencies=["a"], + name="a", + kind=ComponentKind.LIBRARY, + type="go-module", + root_path=".", + internal_dependencies=["a"], ), ] errors = validate_discovery(comps, repo.root) @@ -156,8 +190,11 @@ def test_self_dependency(self, repo): def test_unresolved_dependency(self, repo): comps = [ Component( - name="a", kind=ComponentKind.LIBRARY, type="go-module", - root_path=".", internal_dependencies=["nonexistent"], + name="a", + kind=ComponentKind.LIBRARY, + type="go-module", + root_path=".", + internal_dependencies=["nonexistent"], ), ] errors = validate_discovery(comps, repo.root) @@ -169,8 +206,11 @@ def test_valid_depth_order(self): comps = [ Component(name="a", kind=ComponentKind.LIBRARY, type="t", root_path="a"), Component( - name="b", kind=ComponentKind.LIBRARY, type="t", - root_path="b", internal_dependencies=["a"], + name="b", + kind=ComponentKind.LIBRARY, + type="t", + root_path="b", + internal_dependencies=["a"], ), ] depth_order = [["a"], ["b"]] @@ -181,8 +221,11 @@ def test_topological_violation(self): comps = [ Component(name="a", kind=ComponentKind.LIBRARY, type="t", root_path="a"), Component( - name="b", kind=ComponentKind.LIBRARY, type="t", - root_path="b", internal_dependencies=["a"], + name="b", + kind=ComponentKind.LIBRARY, + type="t", + root_path="b", + internal_dependencies=["a"], ), ] # b depends on a, but both at depth 0 → violation diff --git a/tests/test_cli_diff.py b/tests/test_cli_diff.py index 1c2e789..e099d52 100644 --- a/tests/test_cli_diff.py +++ b/tests/test_cli_diff.py @@ -35,54 +35,56 @@ def test_missing_manifest(self, tmp_path): assert load_manifest(tmp_path) is None def test_valid_manifest(self, repo): - repo.write_json("manifest.json", { - "service_name": "myapp", - "artifact_version": 3, - "source_commit": "abc123", - }) + repo.write_json( + "manifest.json", + { + "service_name": "myapp", + "artifact_version": 3, + "source_commit": "abc123", + }, + ) result = load_manifest(repo.root) assert result["service_name"] == "myapp" assert result["artifact_version"] == 3 assert result["source_commit"] == "abc123" def test_manifest_without_source_commit(self, repo): - repo.write_json("manifest.json", { - "service_name": "myapp", - "artifact_version": 1, - }) + repo.write_json( + "manifest.json", + { + "service_name": "myapp", + "artifact_version": 1, + }, + ) result = load_manifest(repo.root) assert result is not None assert result.get("source_commit", "") == "" class TestLoadComponents: - def test_combined_components_json(self, repo): - repo.write_json("service_discovery/components.json", { - "libraries": [ - {"name": "core", "root_path": "core"}, - {"name": "utils", "root_path": "utils"}, - ], - "applications": [ - {"name": "server", "root_path": "cmd/server"}, - ], - }) + def test_components_json(self, repo): + repo.write_json( + "service_discovery/components.json", + { + "components": [ + {"name": "core", "kind": "library", "root_path": "core"}, + {"name": "utils", "kind": "library", "root_path": "utils"}, + {"name": "server", "kind": "service", "root_path": "cmd/server"}, + ], + }, + ) comps = load_components(repo.root) assert len(comps) == 3 names = {c["name"] for c in comps} assert names == {"core", "utils", "server"} - def test_separate_files(self, repo): - repo.write_json("service_discovery/libraries.json", { - "libraries": [{"name": "core", "root_path": "core"}], - }) - repo.write_json("service_discovery/applications.json", { - "applications": [{"name": "server", "root_path": "cmd/server"}], - }) - comps = load_components(repo.root) - assert len(comps) == 2 - - def test_empty_directory(self, repo): - (repo.root / "service_discovery").mkdir(parents=True) + def test_empty_components(self, repo): + repo.write_json( + "service_discovery/components.json", + { + "components": [], + }, + ) comps = load_components(repo.root) assert comps == [] @@ -91,9 +93,12 @@ def test_no_service_discovery_dir(self, tmp_path): assert comps == [] def test_array_format_fallback(self, repo): - repo.write_json("service_discovery/libraries.json", [ - {"name": "core", "root_path": "core"}, - ]) + repo.write_json( + "service_discovery/components.json", + [ + {"name": "core", "root_path": "core"}, + ], + ) comps = load_components(repo.root) assert len(comps) == 1 @@ -181,13 +186,15 @@ def test_no_components(self, repo): @patch("agent.cli.git_diff_files") def test_incremental_with_changes(self, mock_diff, repo): mock_diff.return_value = ["core/types.go", "api/server.go"] - repo.write_json("service_discovery/components.json", { - "libraries": [ - {"name": "core", "root_path": "core"}, - {"name": "api", "root_path": "api"}, - ], - "applications": [], - }) + repo.write_json( + "service_discovery/components.json", + { + "components": [ + {"name": "core", "kind": "library", "root_path": "core"}, + {"name": "api", "kind": "service", "root_path": "api"}, + ], + }, + ) result = compute_diff_context(repo.root, repo.root, "old_sha", "new_sha") assert result["mode"] == "incremental" @@ -198,10 +205,14 @@ def test_incremental_with_changes(self, mock_diff, repo): @patch("agent.cli.git_diff_files") def test_incremental_with_unmapped(self, mock_diff, repo): mock_diff.return_value = ["core/types.go", "newpkg/foo.go"] - repo.write_json("service_discovery/components.json", { - "libraries": [{"name": "core", "root_path": "core"}], - "applications": [], - }) + repo.write_json( + "service_discovery/components.json", + { + "components": [ + {"name": "core", "kind": "library", "root_path": "core"}, + ], + }, + ) result = compute_diff_context(repo.root, repo.root, "old", "new") assert result["mode"] == "incremental" @@ -211,10 +222,14 @@ def test_incremental_with_unmapped(self, mock_diff, repo): @patch("agent.cli.git_diff_files") def test_no_changes(self, mock_diff, repo): mock_diff.return_value = [] - repo.write_json("service_discovery/components.json", { - "libraries": [{"name": "core", "root_path": "core"}], - "applications": [], - }) + repo.write_json( + "service_discovery/components.json", + { + "components": [ + {"name": "core", "kind": "library", "root_path": "core"}, + ], + }, + ) result = compute_diff_context(repo.root, repo.root, "old", "new") # Empty diff → full analysis (safety fallback) diff --git a/tests/test_component_schema.py b/tests/test_component_schema.py index 3da30cb..8563609 100644 --- a/tests/test_component_schema.py +++ b/tests/test_component_schema.py @@ -1,4 +1,4 @@ -"""Tests for Component schema, backward compatibility, and ComponentKind.""" +"""Tests for Component schema and ComponentKind.""" import pytest @@ -8,8 +8,6 @@ LanguageType, ExternalDependency, CodeCitation, - Library, - Application, component_from_dict, ) @@ -34,13 +32,10 @@ def test_from_str_case_insensitive(self): assert ComponentKind.from_str("LIBRARY") == ComponentKind.LIBRARY assert ComponentKind.from_str("Service") == ComponentKind.SERVICE - def test_from_str_application_migration(self): - """Old 'application' classification maps to SERVICE.""" - assert ComponentKind.from_str("application") == ComponentKind.SERVICE - def test_from_str_unknown_fallback(self): assert ComponentKind.from_str("garbage") == ComponentKind.UNKNOWN assert ComponentKind.from_str("") == ComponentKind.UNKNOWN + assert ComponentKind.from_str("application") == ComponentKind.UNKNOWN class TestComponent: @@ -77,34 +72,20 @@ def test_to_dict_includes_kind(self): d = c.to_dict() assert d["kind"] == "service" - def test_to_dict_legacy_classification(self): - """to_dict includes backward-compat 'classification' field.""" + def test_to_dict_no_classification(self): + """to_dict should not include legacy 'classification' field.""" lib = Component(name="a", kind=ComponentKind.LIBRARY, type="t", root_path=".") - assert lib.to_dict()["classification"] == "library" + assert "classification" not in lib.to_dict() svc = Component(name="b", kind=ComponentKind.SERVICE, type="t", root_path=".") - assert svc.to_dict()["classification"] == "application" - - cli = Component(name="c", kind=ComponentKind.CLI, type="t", root_path=".") - assert cli.to_dict()["classification"] == "application" + assert "classification" not in svc.to_dict() def test_from_dict_with_kind(self): d = {"name": "a", "kind": "cli", "type": "t", "root_path": "."} c = Component.from_dict(d) assert c.kind == ComponentKind.CLI - def test_from_dict_with_classification(self): - """Old format with 'classification' instead of 'kind'.""" - d = {"name": "a", "classification": "library", "type": "t", "root_path": "."} - c = Component.from_dict(d) - assert c.kind == ComponentKind.LIBRARY - - def test_from_dict_application_to_service(self): - d = {"name": "a", "classification": "application", "type": "t", "root_path": "."} - c = Component.from_dict(d) - assert c.kind == ComponentKind.SERVICE - - def test_from_dict_no_kind_or_classification(self): + def test_from_dict_no_kind(self): d = {"name": "a", "type": "t", "root_path": "."} c = Component.from_dict(d) assert c.kind == ComponentKind.UNKNOWN @@ -120,7 +101,9 @@ def test_is_executable(self): svc = Component(name="b", kind=ComponentKind.SERVICE, type="t", root_path=".") cli = Component(name="c", kind=ComponentKind.CLI, type="t", root_path=".") fe = Component(name="d", kind=ComponentKind.FRONTEND, type="t", root_path=".") - contract = Component(name="e", kind=ComponentKind.CONTRACT, type="t", root_path=".") + contract = Component( + name="e", kind=ComponentKind.CONTRACT, type="t", root_path="." + ) assert lib.is_executable is False assert svc.is_executable is True @@ -134,82 +117,6 @@ def test_optional_fields_omitted(self): assert "manifest_path" not in d # Empty string omitted assert "metadata" not in d # Empty dict omitted assert "citations" not in d # Empty list omitted - assert "libraries_used" not in d - assert "internal_applications" not in d - - -class TestBackwardCompatLibrary: - def test_library_roundtrip(self): - from pathlib import Path - lib = Library( - name="core", - type="go-module", - root_path=Path("core"), - internal_dependencies=["common"], - external_dependencies=[ExternalDependency(name="serde")], - ) - d = lib.to_dict() - lib2 = Library.from_dict(d) - assert lib2.name == "core" - assert lib2.internal_dependencies == ["common"] - - def test_library_to_component(self): - from pathlib import Path - lib = Library( - name="core", - type="go-module", - root_path=Path("core"), - internal_dependencies=["common"], - ) - c = lib.to_component() - assert isinstance(c, Component) - assert c.kind == ComponentKind.LIBRARY - assert c.name == "core" - assert c.internal_dependencies == ["common"] - - def test_library_dict_has_kind(self): - from pathlib import Path - lib = Library(name="a", type="t", root_path=Path(".")) - d = lib.to_dict() - assert d["kind"] == "library" - assert d["classification"] == "library" - - -class TestBackwardCompatApplication: - def test_application_roundtrip(self): - from pathlib import Path - app = Application( - name="server", - type="go-module", - root_path=Path("cmd/server"), - libraries_used=["core", "common"], - internal_applications=["worker"], - ) - d = app.to_dict() - app2 = Application.from_dict(d) - assert app2.name == "server" - assert app2.libraries_used == ["core", "common"] - assert app2.internal_applications == ["worker"] - - def test_application_to_component(self): - from pathlib import Path - app = Application( - name="server", - type="go-module", - root_path=Path("cmd/server"), - libraries_used=["core"], - ) - c = app.to_component() - assert isinstance(c, Component) - assert c.kind == ComponentKind.SERVICE - assert c.libraries_used == ["core"] - - def test_application_dict_has_kind(self): - from pathlib import Path - app = Application(name="a", type="t", root_path=Path(".")) - d = app.to_dict() - assert d["kind"] == "service" - assert d["classification"] == "application" class TestComponentFromDict: @@ -227,12 +134,14 @@ def test_from_string(self): assert d.version == "" def test_from_dict_full(self): - d = ExternalDependency.from_dict({ - "name": "tokio", - "version": "1.35", - "category": "async-runtime", - "purpose": "Async runtime", - }) + d = ExternalDependency.from_dict( + { + "name": "tokio", + "version": "1.35", + "category": "async-runtime", + "purpose": "Async runtime", + } + ) assert d.name == "tokio" assert d.version == "1.35" assert d.category == "async-runtime" diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 033ebf5..e202c43 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -4,7 +4,11 @@ import pytest from pathlib import Path -from agent.schemas.manifest import ArtifactManifest, ArtifactFile, MANIFEST_SCHEMA_VERSION +from agent.schemas.manifest import ( + ArtifactManifest, + ArtifactFile, + MANIFEST_SCHEMA_VERSION, +) class TestArtifactFile: @@ -48,8 +52,7 @@ def test_roundtrip(self): model="claude-sonnet-4-20250514", source_repo="github.com/Layr-Labs/eigenda", source_commit="abc123", - libraries_count=12, - applications_count=7, + components_count=19, total_files=25, files=[ ArtifactFile(path="core.md", size_bytes=100, sha256="aaa"), @@ -61,8 +64,7 @@ def test_roundtrip(self): assert m2.service_name == "eigenda" assert m2.artifact_version == 3 assert m2.source_commit == "abc123" - assert m2.libraries_count == 12 - assert m2.applications_count == 7 + assert m2.components_count == 19 assert len(m2.files) == 1 assert m2.files[0].path == "core.md" assert m2.metadata == {"custom": "value"} @@ -157,10 +159,12 @@ def test_scan_directory(self, tmp_path): def test_from_dict_defaults(self): """Minimal dict should deserialize with sensible defaults.""" - m = ArtifactManifest.from_dict({ - "service_name": "test", - "artifact_version": 1, - }) + m = ArtifactManifest.from_dict( + { + "service_name": "test", + "artifact_version": 1, + } + ) assert m.generated_at == "" assert m.model == "" assert m.files == [] diff --git a/tests/test_prompt.py b/tests/test_prompt.py index 326e94a..d09f679 100644 --- a/tests/test_prompt.py +++ b/tests/test_prompt.py @@ -25,19 +25,27 @@ def work_dir(tmp_path, monkeypatch): class TestFullAnalysis: def test_basic_prompt(self): prompt = build_analysis_prompt( - Path("/repo/myapp"), "myapp", {"mode": "full"}, + Path("/repo/myapp"), + "myapp", + {"mode": "full"}, ) assert "Analyze the codebase at /repo/myapp" in prompt def test_includes_source_commit(self): prompt = build_analysis_prompt( - Path("/repo/myapp"), "myapp", {"mode": "full"}, head_sha="abc123", + Path("/repo/myapp"), + "myapp", + {"mode": "full"}, + head_sha="abc123", ) assert "SOURCE_COMMIT: abc123" in prompt def test_no_source_commit_when_empty(self): prompt = build_analysis_prompt( - Path("/repo/myapp"), "myapp", {"mode": "full"}, head_sha="", + Path("/repo/myapp"), + "myapp", + {"mode": "full"}, + head_sha="", ) assert "SOURCE_COMMIT" not in prompt @@ -51,13 +59,16 @@ def test_discovery_complete_when_files_exist(self, tmp_path): try: prompt = build_analysis_prompt( - Path("/repo"), svc, {"mode": "full"}, + Path("/repo"), + svc, + {"mode": "full"}, ) assert "DISCOVERY_COMPLETE" in prompt - assert "SKIP Phase 0.2" in prompt + assert "SKIP discovery phases" in prompt finally: # Clean up import shutil + shutil.rmtree(work, ignore_errors=True) def test_discovery_complete_with_graph(self, tmp_path): @@ -67,20 +78,25 @@ def test_discovery_complete_with_graph(self, tmp_path): (work / "service_discovery").mkdir(exist_ok=True) (work / "service_discovery" / "components.json").write_text("{}") (work / "dependency_graphs").mkdir(exist_ok=True) - (work / "dependency_graphs" / "library_graph.json").write_text("{}") + (work / "dependency_graphs" / "graph.json").write_text("{}") try: prompt = build_analysis_prompt( - Path("/repo"), svc, {"mode": "full"}, + Path("/repo"), + svc, + {"mode": "full"}, ) - assert "library_graph.json" in prompt + assert "graph.json" in prompt finally: import shutil + shutil.rmtree(work, ignore_errors=True) def test_no_discovery_complete_without_files(self): prompt = build_analysis_prompt( - Path("/repo"), "nonexistent_svc_12345", {"mode": "full"}, + Path("/repo"), + "nonexistent_svc_12345", + {"mode": "full"}, ) assert "DISCOVERY_COMPLETE" not in prompt @@ -90,13 +106,14 @@ def test_changed_components_section(self, tmp_path): artifacts = tmp_path / "artifacts" (artifacts / "service_discovery").mkdir(parents=True) (artifacts / "service_discovery" / "components.json").write_text( - json.dumps({ - "libraries": [ - {"name": "core", "root_path": "core"}, - {"name": "api", "root_path": "api"}, - ], - "applications": [], - }) + json.dumps( + { + "components": [ + {"name": "core", "kind": "library", "root_path": "core"}, + {"name": "api", "kind": "service", "root_path": "api"}, + ], + } + ) ) diff_context = { @@ -107,7 +124,10 @@ def test_changed_components_section(self, tmp_path): } prompt = build_analysis_prompt( - Path("/repo"), "myapp", diff_context, artifacts_dir=artifacts, + Path("/repo"), + "myapp", + diff_context, + artifacts_dir=artifacts, ) assert "CHANGED_COMPONENTS:" in prompt assert "core" in prompt @@ -117,7 +137,7 @@ def test_unmapped_files_section(self, tmp_path): artifacts = tmp_path / "artifacts" (artifacts / "service_discovery").mkdir(parents=True) (artifacts / "service_discovery" / "components.json").write_text( - json.dumps({"libraries": [], "applications": []}) + json.dumps({"components": []}) ) diff_context = { @@ -128,7 +148,10 @@ def test_unmapped_files_section(self, tmp_path): } prompt = build_analysis_prompt( - Path("/repo"), "myapp", diff_context, artifacts_dir=artifacts, + Path("/repo"), + "myapp", + diff_context, + artifacts_dir=artifacts, ) assert "NEW_FILES_OUTSIDE_KNOWN_COMPONENTS:" in prompt assert "newpkg/foo.go" in prompt @@ -137,10 +160,13 @@ def test_existing_artifacts_reference(self, tmp_path): artifacts = tmp_path / "artifacts" (artifacts / "service_discovery").mkdir(parents=True) (artifacts / "service_discovery" / "components.json").write_text( - json.dumps({ - "libraries": [{"name": "core", "root_path": "core"}], - "applications": [], - }) + json.dumps( + { + "components": [ + {"name": "core", "kind": "library", "root_path": "core"}, + ], + } + ) ) diff_context = { @@ -151,14 +177,19 @@ def test_existing_artifacts_reference(self, tmp_path): } prompt = build_analysis_prompt( - Path("/repo"), "myapp", diff_context, artifacts_dir=artifacts, + Path("/repo"), + "myapp", + diff_context, + artifacts_dir=artifacts, ) assert "EXISTING_ARTIFACTS:" in prompt assert str(artifacts) in prompt def test_full_mode_no_changed_components(self): prompt = build_analysis_prompt( - Path("/repo"), "myapp", {"mode": "full"}, + Path("/repo"), + "myapp", + {"mode": "full"}, ) assert "CHANGED_COMPONENTS" not in prompt assert "EXISTING_ARTIFACTS" not in prompt