Skip to content

Commit

Permalink
refactor; roughly sort regex by simplicity (#301)
Browse files Browse the repository at this point in the history
Make sure that many of these can be shortcutted to not invoke
an actual wildcard search at all.
  • Loading branch information
Byron committed Apr 9, 2022
1 parent 4178a63 commit a7c3a63
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 171 deletions.
172 changes: 86 additions & 86 deletions git-glob/tests/fixtures/make_baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,35 @@ while read -r pattern nomatch; do
echo "$pattern" > .gitignore
git check-ignore -vn "$nomatch" 2>&1 || :
done <<EOF >>git-baseline.nmatch
*hello.txt hello.txt-and-then-some
*hello.txt goodbye.txt
*some/path/to/hello.txt some/path/to/hello.txt-and-then-some
*some/path/to/hello.txt some/other/path/to/hello.txt
*some/path/to/hello.txt a/bigger/some/path/to/hello.txt
abc?def abc/def
a*b*c abcd
abc*abc*abc abcabcabcabcabcabcabca
a[0-9]b a_b
a[!0-9]b a0b
a[!0-9]b a9b
[!-] -
a[^0-9]b a0b
a[^0-9]b a9b
[^-] -
{a,b} a
{a,b} b
{[}],foo} }
{foo} foo
{*.foo,*.bar,*.wat} test.foo
{*.foo,*.bar,*.wat} test.bar
{*.foo,*.bar,*.wat} test.wat
abc*def abc/def
\[a-z] \a
\? \a
\* \\
aBcDeFg abcdefg
aBcDeFg ABCDEFG
aBcDeFg AbCdEfG
some/**/needle.txt some/other/notthis.txt
some/**/**/needle.txt some/other/notthis.txt
/**/test one/notthis
Expand All @@ -23,45 +49,19 @@ some/**/**/needle.txt some/other/notthis.txt
**/.* abc/ab.c
.*/** a.bc
.*/** abc/a.bc
a[0-9]b a_b
a[!0-9]b a0b
a[!0-9]b a9b
[!-] -
*hello.txt hello.txt-and-then-some
*hello.txt goodbye.txt
*some/path/to/hello.txt some/path/to/hello.txt-and-then-some
*some/path/to/hello.txt some/other/path/to/hello.txt
./foo foo
**/foo foofoo
**/foo/bar foofoo/bar
/*.c mozilla-sha1/sha1.c
**/m4/ltoptions.m4 csharp/src/packages/repositories.config
a[^0-9]b a0b
a[^0-9]b a9b
[^-] -
some/*/needle.txt some/needle.txt
some/*/needle.txt some/one/two/needle.txt
some/*/needle.txt some/one/two/three/needle.txt
.*/** .abc
foo/** foo
*some/path/to/hello.txt a/bigger/some/path/to/hello.txt
{a,b} a
{a,b} b
{**/src/**,foo} abc/src/bar
{**/src/**,foo} foo
{[}],foo} }
{foo} foo
{*.foo,*.bar,*.wat} test.foo
{*.foo,*.bar,*.wat} test.bar
{*.foo,*.bar,*.wat} test.wat
abc*def abc/def
abc[/]def abc/def
\\[a-z] \\a
\\? \\a
\\* \\\\
aBcDeFg abcdefg
aBcDeFg ABCDEFG
aBcDeFg AbCdEfG
{**/src/**,foo} abc/src/bar
{**/src/**,foo} foo
abc[/]def abc/def
EOF

while read -r pattern match; do
Expand All @@ -70,67 +70,67 @@ while read -r pattern match; do
git check-ignore -vn "$match" 2>&1 || :
done <<EOF >>git-baseline.match
*.c mozilla-sha1/sha1.c
*.rs .rs
*hello.txt hello.txt
*hello.txt gareth_says_hello.txt
*hello.txt some/path/to/hello.txt
*hello.txt some\path\to\hello.txt
*hello.txt an/absolute/path/to/hello.txt
*some/path/to/hello.txt some/path/to/hello.txt
a foo/a
a a
a*b a_b
a*b*c abc
a*b*c a_b_c
a*b*c a___b___c
abc*abc*abc abcabcabcabcabcabcabc
a*a*a*a*a*a*a*a*a aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
a*b[xyz]c*d abxcdbxcddd
*.rs .rs
☃ ☃
some/**/needle.txt some/needle.txt
some/**/needle.txt some/one/needle.txt
some/**/needle.txt some/one/two/needle.txt
some/**/needle.txt some/other/needle.txt
** abcde
a*b a_b
a*b*c abc
a*b*c a_b_c
a*b*c a___b___c
abc*abc*abc abcabcabcabcabcabcabc
a*a*a*a*a*a*a*a*a aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
a*b[xyz]c*d abxcdbxcddd
☃ ☃
** abcde
** .asdf
** x/.asdf
some/**/**/needle.txt some/needle.txt
some/**/**/needle.txt some/one/needle.txt
some/**/**/needle.txt some/one/two/needle.txt
some/**/**/needle.txt some/other/needle.txt
**/test one/two/test
**/test one/test
**/test test
/**/test one/two/test
/**/test one/test
/**/test test
**/.* .abc
**/.* abc/.abc
**/foo/bar foo/bar
.*/** .abc/abc
test/** test/
test/** test/one
test/** test/one/two
some/*/needle.txt some/one/needle.txt
a[0-9]b a0b
a[0-9]b a9b
a[!0-9]b a_b
[a-z123] 1
[1a-z23] 1
[123a-z] 1
[abc-] -
[-abc] -
[-a-c] b
[a-c-] b
[-] -
a[^0-9]b a_b
*hello.txt hello.txt
*hello.txt gareth_says_hello.txt
*hello.txt some/path/to/hello.txt
*hello.txt some\\path\\to\\hello.txt
*hello.txt an/absolute/path/to/hello.txt
*some/path/to/hello.txt some/path/to/hello.txt
_[[]_[]]_[?]_[*]_!_ _[_]_?_*_!_
a,b a,b
abc/def abc/def
\\[ [
\\? ?
\\* *
aBcDeFg aBcDeFg
a[0-9]b a0b
a[0-9]b a9b
a[!0-9]b a_b
[a-z123] 1
[1a-z23] 1
[123a-z] 1
[abc-] -
[-abc] -
[-a-c] b
[a-c-] b
[-] -
a[^0-9]b a_b
_[[]_[]]_[?]_[*]_!_ _[_]_?_*_!_
a,b a,b
\[ [
\? ?
\* *
aBcDeFg aBcDeFg
some/**/needle.txt some/needle.txt
some/**/needle.txt some/one/needle.txt
some/**/needle.txt some/one/two/needle.txt
some/**/needle.txt some/other/needle.txt
some/**/**/needle.txt some/needle.txt
some/**/**/needle.txt some/one/needle.txt
some/**/**/needle.txt some/one/two/needle.txt
some/**/**/needle.txt some/other/needle.txt
**/test one/two/test
**/test one/test
**/test test
/**/test one/two/test
/**/test one/test
/**/test test
**/.* .abc
**/.* abc/.abc
**/foo/bar foo/bar
.*/** .abc/abc
test/** test/
test/** test/one
test/** test/one/two
some/*/needle.txt some/one/needle.txt
abc/def abc/def
EOF

git config core.ignorecase true
Expand Down
86 changes: 1 addition & 85 deletions git-glob/tests/glob.rs
Original file line number Diff line number Diff line change
@@ -1,86 +1,2 @@
mod matching;
mod parse;
mod matching {
use bstr::{BStr, ByteSlice};
use std::collections::BTreeSet;

#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Copy, Clone)]
pub struct GitMatch<'a> {
pattern: &'a BStr,
value: &'a BStr,
/// True if git could match `value` with `pattern`
is_match: bool,
}

pub struct Baseline<'a> {
inner: bstr::Lines<'a>,
}

impl<'a> Iterator for Baseline<'a> {
type Item = GitMatch<'a>;

fn next(&mut self) -> Option<Self::Item> {
let mut tokens = self.inner.next()?.splitn(2, |b| *b == b' ');
let pattern = tokens.next().expect("pattern").as_bstr();
let value = tokens.next().expect("value").as_bstr().trim_start().as_bstr();

let git_match = self.inner.next()?;
let is_match = !git_match.starts_with(b"::\t");
Some(GitMatch {
pattern,
value,
is_match,
})
}
}

impl<'a> Baseline<'a> {
fn new(input: &'a [u8]) -> Self {
Baseline {
inner: input.as_bstr().lines(),
}
}
}

#[test]
#[ignore]
fn compare_baseline_with_ours() {
let dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh").unwrap();
{
let input = std::fs::read(dir.join("git-baseline.match")).unwrap();
let mut seen = BTreeSet::default();
for git_match in Baseline::new(&input) {
assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match);
assert!(
git_match.is_match,
"baseline for matches must indeed be matches - check baseline and git version: {:?}",
git_match
);
let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works");
assert!(pattern.matches(git_match.value))
}
}

{
let input = std::fs::read(dir.join("git-baseline.nmatch")).unwrap();
let mut seen = BTreeSet::default();
for git_match in Baseline::new(&input) {
assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match);
assert!(
!git_match.is_match,
"baseline for no-matches must indeed not be matches - check baseline and git version: {:?}",
git_match
);
let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works");
assert!(!pattern.matches(git_match.value))
}
}
}

#[test]
#[ignore]
fn check_case_insensitive() {}

#[test]
#[ignore]
fn negated_patterns() {}
}
85 changes: 85 additions & 0 deletions git-glob/tests/matching/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use bstr::{BStr, ByteSlice};
use std::collections::BTreeSet;

#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Copy, Clone)]
pub struct GitMatch<'a> {
pattern: &'a BStr,
value: &'a BStr,
/// True if git could match `value` with `pattern`
is_match: bool,
}

pub struct Baseline<'a> {
inner: bstr::Lines<'a>,
}

impl<'a> Iterator for Baseline<'a> {
type Item = GitMatch<'a>;

fn next(&mut self) -> Option<Self::Item> {
let mut tokens = self.inner.next()?.splitn(2, |b| *b == b' ');
let pattern = tokens.next().expect("pattern").as_bstr();
let value = tokens.next().expect("value").as_bstr().trim_start().as_bstr();

let git_match = self.inner.next()?;
let is_match = !git_match.starts_with(b"::\t");
Some(GitMatch {
pattern,
value,
is_match,
})
}
}

impl<'a> Baseline<'a> {
fn new(input: &'a [u8]) -> Self {
Baseline {
inner: input.as_bstr().lines(),
}
}
}

#[test]
#[ignore]
fn compare_baseline_with_ours() {
let dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh").unwrap();
{
let input = std::fs::read(dir.join("git-baseline.match")).unwrap();
let mut seen = BTreeSet::default();

for git_match in Baseline::new(&input) {
assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match);
assert!(
git_match.is_match,
"baseline for matches must indeed be matches - check baseline and git version: {:?}",
git_match
);
let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works");
assert!(pattern.matches(git_match.value))
}
}

{
let input = std::fs::read(dir.join("git-baseline.nmatch")).unwrap();
let mut seen = BTreeSet::default();

for git_match in Baseline::new(&input) {
assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match);
assert!(
!git_match.is_match,
"baseline for no-matches must indeed not be matches - check baseline and git version: {:?}",
git_match
);
let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works");
assert!(!pattern.matches(git_match.value))
}
}
}

#[test]
#[ignore]
fn check_case_insensitive() {}

#[test]
#[ignore]
fn negated_patterns() {}

0 comments on commit a7c3a63

Please sign in to comment.