diff --git a/Cargo.lock b/Cargo.lock index 626d0c0..dfb2d87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -163,6 +163,7 @@ version = "0.0.0" dependencies = [ "assert_cmd", "clap", + "globset", "predicates", "serde", "serde_json", @@ -254,6 +255,19 @@ dependencies = [ "wasip3", ] +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "hashbrown" version = "0.15.5" diff --git a/Cargo.toml b/Cargo.toml index 23ae411..25cbd0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ tracing-subscriber = { version = "0.3", default-features = false, features = ["f tracing-appender = "0.2" serde_yaml = "0.9" walkdir = "2" +globset = "0.4" [dev-dependencies] tempfile = "3" diff --git a/src/parser/ignore.rs b/src/parser/ignore.rs new file mode 100644 index 0000000..f3d011b --- /dev/null +++ b/src/parser/ignore.rs @@ -0,0 +1,112 @@ +use globset::{Glob, GlobSet, GlobSetBuilder}; +use std::fmt; +use std::path::Path; + +const DEFAULT_PATTERNS: &[&str] = &[ + "node_modules/**", + "target/**", + "dist/**", + ".git/**", + ".commandindex/**", + "*.min.js", + "*.lock", +]; + +#[derive(Debug)] +pub enum IgnoreError { + Io(std::io::Error), +} + +impl fmt::Display for IgnoreError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + IgnoreError::Io(e) => write!(f, "IO error: {e}"), + } + } +} + +impl std::error::Error for IgnoreError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + IgnoreError::Io(e) => Some(e), + } + } +} + +impl From for IgnoreError { + fn from(e: std::io::Error) -> Self { + IgnoreError::Io(e) + } +} + +pub struct IgnoreFilter { + glob_set: GlobSet, +} + +impl Default for IgnoreFilter { + fn default() -> Self { + let mut builder = GlobSetBuilder::new(); + for pattern in DEFAULT_PATTERNS { + if let Ok(glob) = Glob::new(pattern) { + builder.add(glob); + } + } + let glob_set = builder.build().unwrap(); + Self { glob_set } + } +} + +impl IgnoreFilter { + /// `.cmindexignore` ファイルからフィルターを構築する。 + /// ファイルが存在しない場合はデフォルトルールを使用する。 + pub fn from_file(path: &Path) -> Result { + if path.exists() { + let content = std::fs::read_to_string(path)?; + Ok(Self::from_content(&content)) + } else { + Ok(Self::default()) + } + } + + /// パターン文字列からフィルターを構築する + pub fn from_content(content: &str) -> Self { + let mut builder = GlobSetBuilder::new(); + + for line in content.lines() { + let trimmed = line.trim(); + + // Skip empty lines and comments + if trimmed.is_empty() || trimmed.starts_with('#') { + continue; + } + + // Normalize directory patterns: "dir/" -> "dir/**" + let pattern = if trimmed.ends_with('/') { + format!("{trimmed}**") + } else { + trimmed.to_string() + }; + + match Glob::new(&pattern) { + Ok(glob) => { + builder.add(glob); + } + Err(e) => { + tracing::warn!("Invalid glob pattern '{}': {}", trimmed, e); + } + } + } + + let glob_set = builder.build().unwrap_or_else(|e| { + tracing::warn!("Failed to build glob set: {}", e); + GlobSetBuilder::new().build().unwrap() + }); + + Self { glob_set } + } + + /// パスが除外対象かどうかを判定する + pub fn is_ignored(&self, path: &Path) -> bool { + self.glob_set.is_match(path) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b769775..02ed070 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,4 +1,5 @@ pub mod frontmatter; +pub mod ignore; pub mod link; pub mod markdown; diff --git a/tests/ignore_filter.rs b/tests/ignore_filter.rs new file mode 100644 index 0000000..df0985b --- /dev/null +++ b/tests/ignore_filter.rs @@ -0,0 +1,142 @@ +use commandindex::parser::ignore::IgnoreFilter; +use std::fs; +use std::path::Path; +use tempfile::TempDir; + +// === Default rules tests === + +#[test] +fn test_default_ignores_node_modules() { + let filter = IgnoreFilter::default(); + assert!(filter.is_ignored(Path::new("node_modules/foo/bar.js"))); +} + +#[test] +fn test_default_ignores_target() { + let filter = IgnoreFilter::default(); + assert!(filter.is_ignored(Path::new("target/debug/build"))); +} + +#[test] +fn test_default_ignores_git() { + let filter = IgnoreFilter::default(); + assert!(filter.is_ignored(Path::new(".git/objects/abc"))); +} + +#[test] +fn test_default_ignores_commandindex() { + let filter = IgnoreFilter::default(); + assert!(filter.is_ignored(Path::new(".commandindex/tantivy/index"))); +} + +#[test] +fn test_default_ignores_min_js() { + let filter = IgnoreFilter::default(); + assert!(filter.is_ignored(Path::new("vendor/jquery.min.js"))); +} + +#[test] +fn test_default_ignores_lock_files() { + let filter = IgnoreFilter::default(); + assert!(filter.is_ignored(Path::new("Cargo.lock"))); + assert!(filter.is_ignored(Path::new("yarn.lock"))); +} + +#[test] +fn test_default_allows_normal_files() { + let filter = IgnoreFilter::default(); + assert!(!filter.is_ignored(Path::new("src/main.rs"))); + assert!(!filter.is_ignored(Path::new("docs/README.md"))); + assert!(!filter.is_ignored(Path::new("app.js"))); +} + +// === Custom rules tests === + +#[test] +fn test_custom_patterns() { + let content = "*.log\nbuild/\nsecrets.json"; + let filter = IgnoreFilter::from_content(content); + + assert!(filter.is_ignored(Path::new("app.log"))); + assert!(filter.is_ignored(Path::new("build/output.bin"))); + assert!(filter.is_ignored(Path::new("secrets.json"))); + assert!(!filter.is_ignored(Path::new("src/main.rs"))); +} + +#[test] +fn test_comment_lines_ignored() { + let content = "# This is a comment\n*.log\n# Another comment"; + let filter = IgnoreFilter::from_content(content); + + assert!(filter.is_ignored(Path::new("debug.log"))); + assert!(!filter.is_ignored(Path::new("src/main.rs"))); +} + +#[test] +fn test_empty_lines_ignored() { + let content = "\n*.log\n\n*.tmp\n\n"; + let filter = IgnoreFilter::from_content(content); + + assert!(filter.is_ignored(Path::new("debug.log"))); + assert!(filter.is_ignored(Path::new("temp.tmp"))); +} + +#[test] +fn test_directory_pattern_with_trailing_slash() { + let content = "vendor/"; + let filter = IgnoreFilter::from_content(content); + + assert!(filter.is_ignored(Path::new("vendor/lib/foo.js"))); + assert!(!filter.is_ignored(Path::new("src/vendor.rs"))); +} + +#[test] +fn test_invalid_pattern_skipped() { + let content = "*.log\n[invalid\n*.tmp"; + let filter = IgnoreFilter::from_content(content); + + assert!(filter.is_ignored(Path::new("debug.log"))); + assert!(filter.is_ignored(Path::new("temp.tmp"))); + // Invalid pattern should be skipped, not cause an error +} + +#[test] +fn test_empty_content() { + let content = ""; + let filter = IgnoreFilter::from_content(content); + assert!(!filter.is_ignored(Path::new("anything.rs"))); +} + +// === File-based tests === + +#[test] +fn test_from_file_exists() { + let tmp = TempDir::new().unwrap(); + let ignore_path = tmp.path().join(".cmindexignore"); + fs::write(&ignore_path, "*.log\nbuild/").unwrap(); + + let filter = IgnoreFilter::from_file(&ignore_path).unwrap(); + assert!(filter.is_ignored(Path::new("app.log"))); + assert!(filter.is_ignored(Path::new("build/output.bin"))); + assert!(!filter.is_ignored(Path::new("src/main.rs"))); +} + +#[test] +fn test_from_file_not_exists_uses_defaults() { + let tmp = TempDir::new().unwrap(); + let ignore_path = tmp.path().join(".cmindexignore"); + // File does not exist + + let filter = IgnoreFilter::from_file(&ignore_path).unwrap(); + // Should use default rules + assert!(filter.is_ignored(Path::new("node_modules/foo.js"))); + assert!(filter.is_ignored(Path::new("target/debug/build"))); + assert!(!filter.is_ignored(Path::new("src/main.rs"))); +} + +#[test] +fn test_only_comments_and_blanks() { + let content = "# comment\n\n# another comment\n \n"; + let filter = IgnoreFilter::from_content(content); + assert!(!filter.is_ignored(Path::new("anything.txt"))); +}