Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 19 additions & 17 deletions src/semble/index/file_walker.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,18 @@ def walk_files(root: Path, extensions: Sequence[str], ignore: Sequence[str] | No
:yield: Path to each file under root matching the criteria.
:ytype: Path
"""
# This should be a list. Traversal is done in order, so the order matters.
ignored = []
extensions_as_patterns = [f"!*{ext}" for ext in extensions]
ignored.extend(extensions_as_patterns)
ignored.extend(sorted(_DEFAULT_IGNORED_DIRS))
# Always give user patterns preference
ignored.extend(ignore or [])
base_spec = GitIgnoreSpec.from_lines(ignored, backend="simple")
extensions_set = frozenset(extensions)
dir_patterns = list(sorted(_DEFAULT_IGNORED_DIRS)) + list(ignore or [])
base_spec = GitIgnoreSpec.from_lines(dir_patterns, backend="simple")
s = IgnoreSpec(base=root, spec=base_spec)
yield from _walk(root, [s])
yield from _walk(root, [s], extensions_set)


def _is_ignored(path: Path, specs: list[IgnoreSpec]) -> bool:
def _is_ignored(path: Path, specs: list[IgnoreSpec]) -> tuple[bool, bool]:
"""Check if a path is ignored by any of the provided ignore specs."""
is_dir = path.is_dir()
# Everything starts off as unignored
ignored = not is_dir

ignored = False
found = False
for ignore_spec in specs:
try:
# If there is no relative path, this is invalid.
Expand All @@ -100,13 +94,20 @@ def _is_ignored(path: Path, specs: list[IgnoreSpec]) -> bool:

if pattern.match_file(relative_str) is not None:
ignored = pattern.include
# Bypass extension filter only for negation patterns with a file
# extension suffix (e.g. !special.kjs, !*.py). Patterns without
# a suffix (e.g. !vendor/, !.github/*) target directories or
# broad globs and should not bypass extension filtering.
pat = pattern.pattern or ""
found = not ignored and bool(Path(pat.rstrip("/")).suffix)

return ignored
return ignored, found


def _walk(
directory: Path,
inherited_specs: list[IgnoreSpec],
extensions: frozenset[str],
) -> Iterator[Path]:
"""Recursive function for walking files under a directory."""
spec = _load_ignore_for_dir(directory)
Expand All @@ -120,10 +121,11 @@ def _walk(
# Don't follow symlinks
if item.is_symlink():
continue
if _is_ignored(item, inherited_specs):
is_ignored, found = _is_ignored(item, inherited_specs)
if is_ignored:
continue

if item.is_dir():
yield from _walk(item, inherited_specs)
elif item.is_file():
yield from _walk(item, inherited_specs, extensions)
elif item.is_file() and (found or item.suffix.lower() in extensions):
yield item
2 changes: 1 addition & 1 deletion src/semble/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version_triple__ = (0, 1, 7)
__version_triple__ = (0, 1, 8)
__version__ = ".".join(map(str, __version_triple__))
41 changes: 32 additions & 9 deletions tests/test_file_walker.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,27 @@ def _touch(path: Path, content: str = "x = 1\n") -> None:
"out/*\n!out/deep/keep.py\n",
set(),
),
# Explicit file negation bypasses extension filter: !special.kjs is yielded even if .kjs is not in extensions.
(
["special.kjs", "other.kjs", "main.py"],
None,
"*.kjs\n!special.kjs\n",
{"main.py", "special.kjs"},
),
# Glob negation without suffix does NOT bypass extension filter.
(
[".github/workflows/ci.yaml", "src/main.py"],
None,
"!.github/*\n",
{"src/main.py"},
),
# Directory negation does NOT bypass extension filter: files inside vendor/ still need a matching extension.
(
["vendor/special.kjs", "vendor/main.py"],
None,
"*\n!vendor/\n",
{"vendor/main.py"},
),
],
)
def test_walk_files_filtering(
Expand Down Expand Up @@ -85,9 +106,9 @@ def test_walk_files_prunes_ignored_dirs(tmp_path: Path) -> None:
def test_is_ignored_skips_spec_with_unrelated_base(tmp_path: Path) -> None:
"""An IgnoreSpec whose base is not an ancestor of the path is silently skipped.

Files default to ignored in _is_ignored. When the first spec has an
unrelated base, the ValueError is caught and the spec is skipped without
crashing. A second spec with the correct base can still un-ignore the file.
When the first spec has an unrelated base, the ValueError is caught and the
spec is skipped without crashing. A second spec with the correct base can
still ignore the file.
"""
from pathspec import GitIgnoreSpec

Expand All @@ -108,18 +129,20 @@ def test_is_ignored_skips_spec_with_unrelated_base(tmp_path: Path) -> None:
spec=GitIgnoreSpec.from_lines(["*.py"]),
)

# With only the unrelated spec the file stays in its default state (ignored)
# With only the unrelated spec the file is not ignored (spec is skipped),
# and, crucially, no exception is raised.
assert _is_ignored(target_file, [unrelated_spec]) is True
ignored, _ = _is_ignored(target_file, [unrelated_spec])
assert ignored is False

# Spec rooted at project_a that un-ignores .py files
# Spec rooted at project_a that ignores .py files
matching_spec = IgnoreSpec(
base=project_a,
spec=GitIgnoreSpec.from_lines(["!*.py"]),
spec=GitIgnoreSpec.from_lines(["*.py"]),
)

# The unrelated spec is safely skipped; the matching spec un-ignores the file.
assert _is_ignored(target_file, [unrelated_spec, matching_spec]) is False
# The unrelated spec is safely skipped; the matching spec ignores the file.
ignored, _ = _is_ignored(target_file, [unrelated_spec, matching_spec])
assert ignored is True


def test_walk_files_skips_symlinks(tmp_path: Path) -> None:
Expand Down
Loading