Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 29 additions & 9 deletions fastcode/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
from typing import Dict, List, Optional, Any
import logging
from git import Repo, GitCommandError
from pathspec import PathSpec
from pathspec.patterns import GitWildMatchPattern

from .utils import (
is_supported_file,
should_ignore_path,
get_repo_name_from_url,
normalize_path,
ensure_dir,
Expand Down Expand Up @@ -228,19 +229,39 @@ def scan_files(self) -> List[Dict[str, Any]]:
files = []
total_size = 0
max_file_size_bytes = self.max_file_size_mb * 1024 * 1024

ignore_spec = PathSpec.from_lines(GitWildMatchPattern, effective_ignore)

def is_ignored_repo_relative(rel_path: str, *, is_dir: bool = False) -> bool:
"""Match ignore patterns against normalized repo-relative paths."""
normalized = normalize_path(rel_path)
if ignore_spec.match_file(normalized):
return True
# Directory-style patterns (e.g. "output/" or ".venv/") are most
# reliable with a trailing slash candidate.
if is_dir and ignore_spec.match_file(f"{normalized}/"):
return True
return False

for root, dirs, filenames in os.walk(self.repo_path):
# Filter out ignored directories
dirs[:] = [d for d in dirs if not should_ignore_path(
os.path.join(root, d), effective_ignore
)]
# Filter ignored directories using repo-relative paths so gitwildmatch
# patterns like "output/" or ".venv/" match consistently.
filtered_dirs = []
for d in dirs:
abs_dir_path = os.path.join(root, d)
rel_dir_path = normalize_path(
os.path.relpath(abs_dir_path, self.repo_path)
)
if is_ignored_repo_relative(rel_dir_path, is_dir=True):
continue
filtered_dirs.append(d)
dirs[:] = filtered_dirs
Comment on lines 245 to +257
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should_ignore_path() recompiles a PathSpec on every call (see fastcode/utils.py). In this loop it’s now invoked multiple times per directory and once per file, which can be a significant CPU cost on large repos. Consider compiling the ignore spec once in scan_files() (e.g., build a PathSpec from effective_ignore) and then calling spec.match_file(...) for both directory and file checks.

Copilot uses AI. Check for mistakes.

for filename in filenames:
file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, self.repo_path)
relative_path = normalize_path(os.path.relpath(file_path, self.repo_path))

# Check if should ignore
if should_ignore_path(relative_path, effective_ignore):
if is_ignored_repo_relative(relative_path):
continue

# Check if supported extension
Expand Down Expand Up @@ -347,4 +368,3 @@ def cleanup(self):
def __del__(self):
"""Cleanup on deletion"""
self.cleanup()