From a30a5da60d67a4e52ba69727318edb9832b7cae2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 08:32:23 +0800 Subject: [PATCH 01/31] empty git-mailmap crate (#366) --- Cargo.lock | 4 ++++ Cargo.toml | 1 + README.md | 1 + crate-status.md | 5 +++++ git-mailmap/CHANGELOG.md | 10 ++++++++++ git-mailmap/Cargo.toml | 15 +++++++++++++++ git-mailmap/src/lib.rs | 1 + 7 files changed, 37 insertions(+) create mode 100644 git-mailmap/CHANGELOG.md create mode 100644 git-mailmap/Cargo.toml create mode 100644 git-mailmap/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 2e0f5c7719c..a44cad98fe8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1216,6 +1216,10 @@ dependencies = [ "tempfile", ] +[[package]] +name = "git-mailmap" +version = "0.0.0" + [[package]] name = "git-object" version = "0.17.1" diff --git a/Cargo.toml b/Cargo.toml index 58511e809a4..8837929ff9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,6 +142,7 @@ members = [ "git-worktree", "git-revision", "git-packetline", + "git-mailmap", "git-submodule", "git-transport", "git-protocol", diff --git a/README.md b/README.md index a5d5ec4095f..c70d4bb249e 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,7 @@ Follow linked crate name for detailed status. Please note that all crates follow * [git-attributes](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-attributes) * [git-quote](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-quote) * **idea** + * [git-mailmap](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-mailmap) * [git-pathspec](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-pathspec) * [git-subomdule](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-submodule) * [git-tui](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-tui) diff --git a/crate-status.md b/crate-status.md index 5ad7c1cfd35..2746849a72a 100644 --- a/crate-status.md +++ b/crate-status.md @@ -220,6 +220,11 @@ Check out the [performance discussion][git-traverse-performance] as well. * **ansi-c** * [x] quote * [ ] unquote + * +### git-mailmap + +* [ ] parsing +* [ ] lookup and mapping of author names ### git-pathspec diff --git a/git-mailmap/CHANGELOG.md b/git-mailmap/CHANGELOG.md new file mode 100644 index 00000000000..ea8f0d10c8a --- /dev/null +++ b/git-mailmap/CHANGELOG.md @@ -0,0 +1,10 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased + +An empty crate without any content to reserve the name for the gitoxide project. diff --git a/git-mailmap/Cargo.toml b/git-mailmap/Cargo.toml new file mode 100644 index 00000000000..407d5e4f8c5 --- /dev/null +++ b/git-mailmap/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "git-mailmap" +version = "0.0.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT/Apache-2.0" +description = "A WIP crate of the gitoxide project for parsing mailmap files" +authors = ["Sebastian Thiel "] +edition = "2018" + +[lib] +doctest = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs new file mode 100644 index 00000000000..d7a83e4f525 --- /dev/null +++ b/git-mailmap/src/lib.rs @@ -0,0 +1 @@ +#![forbid(unsafe_code, rust_2018_idioms)] From c43af35c92e5093349cdabd89f655b26070e6f84 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 08:32:58 +0800 Subject: [PATCH 02/31] Release git-mailmap v0.0.0 --- git-mailmap/CHANGELOG.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/git-mailmap/CHANGELOG.md b/git-mailmap/CHANGELOG.md index ea8f0d10c8a..92b8fba9ea4 100644 --- a/git-mailmap/CHANGELOG.md +++ b/git-mailmap/CHANGELOG.md @@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased +## 0.0.0 (2022-03-26) An empty crate without any content to reserve the name for the gitoxide project. + +### Commit Statistics + + + + - 1 commit contributed to the release. + - 0 commits where understood as [conventional](https://www.conventionalcommits.org). + - 1 unique issue was worked on: [#366](https://github.com/Byron/gitoxide/issues/366) + +### Commit Details + + + +
view details + + * **[#366](https://github.com/Byron/gitoxide/issues/366)** + - empty git-mailmap crate ([`a30a5da`](https://github.com/Byron/gitoxide/commit/a30a5da60d67a4e52ba69727318edb9832b7cae2)) +
+ From fc47c497f992e0f3fbef2f55d0e3b0909cac8290 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 10:46:44 +0800 Subject: [PATCH 03/31] the first empty test (#366) --- git-mailmap/Cargo.toml | 2 ++ git-mailmap/src/lib.rs | 23 +++++++++++++++++++++++ git-mailmap/tests/mailmap.rs | 1 + git-mailmap/tests/parse/mod.rs | 3 +++ 4 files changed, 29 insertions(+) create mode 100644 git-mailmap/tests/mailmap.rs create mode 100644 git-mailmap/tests/parse/mod.rs diff --git a/git-mailmap/Cargo.toml b/git-mailmap/Cargo.toml index 407d5e4f8c5..7ab0d8d2e47 100644 --- a/git-mailmap/Cargo.toml +++ b/git-mailmap/Cargo.toml @@ -13,3 +13,5 @@ doctest = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bstr = { version = "0.2.13", default-features = false, features = ["std"]} +quick-error = "2.0.0" diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs index d7a83e4f525..824fc7b3e1c 100644 --- a/git-mailmap/src/lib.rs +++ b/git-mailmap/src/lib.rs @@ -1 +1,24 @@ #![forbid(unsafe_code, rust_2018_idioms)] + +pub mod parse { + mod error { + use bstr::BString; + use quick_error::quick_error; + + quick_error! { + #[derive(Debug)] + pub enum Error { + UnconsumedInput { line_number: usize, line: BString } { + display("Line {} has too many names or emails: {}", line_number, line) + } + Malformed { line_number: usize, line: BString } { + display("Line {} is malformed, an email address lacks the closing '>' bracket: {}", line_number, line) + } + } + } + } + pub struct Lines<'a> { + lines: bstr::Lines<'a>, + line_no: usize, + } +} diff --git a/git-mailmap/tests/mailmap.rs b/git-mailmap/tests/mailmap.rs new file mode 100644 index 00000000000..06f1a3c69d4 --- /dev/null +++ b/git-mailmap/tests/mailmap.rs @@ -0,0 +1 @@ +mod parse; diff --git a/git-mailmap/tests/parse/mod.rs b/git-mailmap/tests/parse/mod.rs new file mode 100644 index 00000000000..3b21e98ae91 --- /dev/null +++ b/git-mailmap/tests/parse/mod.rs @@ -0,0 +1,3 @@ +#[test] +#[ignore] +fn empty_lines_and_comments_are_ignored() {} From 3e78ff53125be2a75142534b6fd6f356b6bc8c5f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 14:30:26 +0800 Subject: [PATCH 04/31] refactor --- git-attributes/src/parse/ignore.rs | 3 +++ git-attributes/tests/parse/attribute.rs | 7 ++++--- git-attributes/tests/parse/ignore.rs | 7 ++++--- git-mailmap/src/parse.rs | 0 git-mailmap/tests/fixtures/invalid.txt | 0 5 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 git-mailmap/src/parse.rs create mode 100644 git-mailmap/tests/fixtures/invalid.txt diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs index fde352d7342..95c51497f81 100644 --- a/git-attributes/src/parse/ignore.rs +++ b/git-attributes/src/parse/ignore.rs @@ -49,6 +49,9 @@ pub(crate) fn parse_line(mut line: &[u8]) -> Option<(BString, ignore::pattern::M line = &line[1..]; } } + if line.iter().all(|b| b.is_ascii_whitespace()) { + return None; + } let mut line = truncate_non_escaped_trailing_spaces(line); if line.last() == Some(&b'/') { mode |= ignore::pattern::Mode::MUST_BE_DIR; diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index ad1cf66c926..c9097ca152d 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -14,9 +14,9 @@ fn byte_order_marks_are_no_patterns() { #[test] fn line_numbers_are_counted_correctly() { - let ignore = std::fs::read(fixture_path("attributes/various.txt")).unwrap(); + let input = std::fs::read(fixture_path("attributes/various.txt")).unwrap(); assert_eq!( - try_lines(&String::from_utf8(ignore).unwrap()).unwrap(), + try_lines(&String::from_utf8(input).unwrap()).unwrap(), vec![ (pattern(r"*.[oa]", Mode::NO_SUB_DIR), vec![set("c")], 2), ( @@ -44,13 +44,14 @@ fn line_endings_can_be_windows_or_unix() { } #[test] -fn comment_lines_are_ignored() { +fn comment_lines_are_ignored_as_well_as_empty_ones() { assert!(git_attributes::parse(b"# hello world").next().is_none()); assert!(git_attributes::parse(b"# \"hello world\"").next().is_none()); assert!( git_attributes::parse(b" \t\r# \"hello world\"").next().is_none(), "also behind leading whitespace" ); + assert!(git_attributes::parse(b"\n\r\n\t\t \n").next().is_none()); } #[test] diff --git a/git-attributes/tests/parse/ignore.rs b/git-attributes/tests/parse/ignore.rs index 7ad310a54e7..07aa56371ac 100644 --- a/git-attributes/tests/parse/ignore.rs +++ b/git-attributes/tests/parse/ignore.rs @@ -11,8 +11,8 @@ fn byte_order_marks_are_no_patterns() { #[test] fn line_numbers_are_counted_correctly() { - let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); - let actual: Vec<_> = git_attributes::parse::ignore(&ignore).collect(); + let input = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); + let actual: Vec<_> = git_attributes::parse::ignore(&input).collect(); assert_eq!( actual, vec![ @@ -66,8 +66,9 @@ fn mark_ends_with_pattern_specifically() { } #[test] -fn comments_are_ignored() { +fn comments_are_ignored_as_well_as_empty_ones() { assert!(git_attributes::parse::ignore(b"# hello world").next().is_none()); + assert!(git_attributes::parse::ignore(b"\n\r\n\t\t \n").next().is_none()); } #[test] diff --git a/git-mailmap/src/parse.rs b/git-mailmap/src/parse.rs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/git-mailmap/tests/fixtures/invalid.txt b/git-mailmap/tests/fixtures/invalid.txt new file mode 100644 index 00000000000..e69de29bb2d From 903c5263a26d9a57d9fa9dc6649ef4ad0a6e2a94 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 14:30:43 +0800 Subject: [PATCH 05/31] basic testing of common cases for mailmap (#366) error handling is tested only by integration-level test, and more error handling tests would have to be added. --- Cargo.lock | 6 ++ Makefile | 2 + git-mailmap/Cargo.toml | 8 +++ git-mailmap/src/lib.rs | 79 ++++++++++++++++++++------ git-mailmap/src/parse.rs | 42 ++++++++++++++ git-mailmap/tests/fixtures/invalid.txt | 5 ++ git-mailmap/tests/parse/mod.rs | 54 +++++++++++++++++- 7 files changed, 178 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a44cad98fe8..8c7402ab148 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1219,6 +1219,12 @@ dependencies = [ [[package]] name = "git-mailmap" version = "0.0.0" +dependencies = [ + "bstr", + "git-testtools", + "quick-error", + "serde", +] [[package]] name = "git-object" diff --git a/Makefile b/Makefile index d004845649e..0efed8f5df0 100644 --- a/Makefile +++ b/Makefile @@ -89,6 +89,8 @@ check: ## Build all code in suitable configurations cd git-object && cargo check --all-features \ && cargo check --features verbose-object-parsing-errors cd git-index && cargo check --features serde1 + cd git-attributes && cargo check --features serde1 + cd git-mailmap && cargo check --features serde1 cd git-worktree && cargo check --features serde1 cd git-actor && cargo check --features serde1 cd git-pack && cargo check --features serde1 \ diff --git a/git-mailmap/Cargo.toml b/git-mailmap/Cargo.toml index 7ab0d8d2e47..aa9e990251f 100644 --- a/git-mailmap/Cargo.toml +++ b/git-mailmap/Cargo.toml @@ -10,8 +10,16 @@ edition = "2018" [lib] doctest = false +[features] +## Data structures implement `serde::Serialize` and `serde::Deserialize`. +serde1 = ["serde", "bstr/serde1"] + # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] bstr = { version = "0.2.13", default-features = false, features = ["std"]} quick-error = "2.0.0" +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} + +[dev-dependencies] +git-testtools = { path = "../tests/tools"} diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs index 824fc7b3e1c..ce3d9414ac9 100644 --- a/git-mailmap/src/lib.rs +++ b/git-mailmap/src/lib.rs @@ -1,24 +1,69 @@ #![forbid(unsafe_code, rust_2018_idioms)] -pub mod parse { - mod error { - use bstr::BString; - use quick_error::quick_error; +use bstr::BStr; - quick_error! { - #[derive(Debug)] - pub enum Error { - UnconsumedInput { line_number: usize, line: BString } { - display("Line {} has too many names or emails: {}", line_number, line) - } - Malformed { line_number: usize, line: BString } { - display("Line {} is malformed, an email address lacks the closing '>' bracket: {}", line_number, line) - } +pub mod parse; +pub fn parse(buf: &[u8]) -> parse::Lines<'_> { + parse::Lines::new(buf) +} + +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Entry<'a> { + /// The name to map to. + canonical_name: Option<&'a BStr>, + /// The email map to. + canonical_email: Option<&'a BStr>, + /// The name to look for and replace. + commit_name: Option<&'a BStr>, + /// The email to look for and replace. + commit_email: Option<&'a BStr>, +} + +mod entry { + use crate::Entry; + use bstr::BStr; + + impl<'a> Entry<'a> { + pub fn change_name_by_email(proper_name: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { + Entry { + canonical_name: Some(proper_name.into()), + commit_email: Some(commit_email.into()), + ..Default::default() + } + } + pub fn change_email_by_email(proper_email: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { + Entry { + canonical_email: Some(proper_email.into()), + commit_email: Some(commit_email.into()), + ..Default::default() + } + } + pub fn change_name_and_email_by_email( + proper_name: impl Into<&'a BStr>, + proper_email: impl Into<&'a BStr>, + commit_email: impl Into<&'a BStr>, + ) -> Self { + Entry { + canonical_name: Some(proper_name.into()), + canonical_email: Some(proper_email.into()), + commit_email: Some(commit_email.into()), + ..Default::default() + } + } + + pub fn change_name_and_email_by_name_and_email( + proper_name: impl Into<&'a BStr>, + proper_email: impl Into<&'a BStr>, + commit_name: impl Into<&'a BStr>, + commit_email: impl Into<&'a BStr>, + ) -> Self { + Entry { + canonical_name: Some(proper_name.into()), + canonical_email: Some(proper_email.into()), + commit_name: Some(commit_name.into()), + commit_email: Some(commit_email.into()), } } - } - pub struct Lines<'a> { - lines: bstr::Lines<'a>, - line_no: usize, } } diff --git a/git-mailmap/src/parse.rs b/git-mailmap/src/parse.rs index e69de29bb2d..ccdc4eb8818 100644 --- a/git-mailmap/src/parse.rs +++ b/git-mailmap/src/parse.rs @@ -0,0 +1,42 @@ +mod error { + use bstr::BString; + use quick_error::quick_error; + + quick_error! { + #[derive(Debug)] + pub enum Error { + UnconsumedInput { line_number: usize, line: BString } { + display("Line {} has too many names or emails: {}", line_number, line) + } + Malformed { line_number: usize, line: BString } { + display("Line {} is malformed, an email address lacks the closing '>' bracket: {}", line_number, line) + } + } + } +} + +use crate::Entry; +use bstr::ByteSlice; +pub use error::Error; + +pub struct Lines<'a> { + lines: bstr::Lines<'a>, + line_no: usize, +} + +impl<'a> Lines<'a> { + pub(crate) fn new(input: &'a [u8]) -> Self { + Lines { + lines: input.as_bstr().lines(), + line_no: 0, + } + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = Result, Error>; + + fn next(&mut self) -> Option { + todo!() + } +} diff --git a/git-mailmap/tests/fixtures/invalid.txt b/git-mailmap/tests/fixtures/invalid.txt index e69de29bb2d..c579c5bd580 100644 --- a/git-mailmap/tests/fixtures/invalid.txt +++ b/git-mailmap/tests/fixtures/invalid.txt @@ -0,0 +1,5 @@ +# comment + + = git_mailmap::parse(&input).collect(); + assert_eq!(actual.len(), 2); + + let err = actual.pop().expect("two items left").unwrap_err(); + assert!(matches!(err, parse::Error::Malformed { line_number: 3, .. })); + + let err = actual.pop().expect("one item left").unwrap_err(); + assert!(matches!(err, parse::Error::Malformed { line_number: 5, .. })); +} + +#[test] +#[ignore] +fn empty_lines_and_comments_are_ignored() { + assert!(git_mailmap::parse(b"# comment").next().is_none()); + assert!(git_mailmap::parse(b"\n\r\n\t\t \n").next().is_none()); + assert_eq!( + line(" # this is a name "), + Entry::change_name_by_email(" # this is a name", "email"), + "whitespace before hashes counts as name though" + ); +} + +#[test] +#[ignore] +fn valid_entries() { + assert_eq!( + line("proper name "), + Entry::change_name_by_email("proper name", "commit-email") + ); + assert_eq!( + line(" "), + Entry::change_email_by_email("proper email", "commit-email") + ); + assert_eq!( + line("proper name "), + Entry::change_name_and_email_by_email("proper name", "proper email", "commit-email") + ); + assert_eq!( + line("proper name commit name "), + Entry::change_name_and_email_by_name_and_email("proper name", "proper email", "commit name", "commit-email") + ); +} + +fn line(input: &str) -> Entry<'_> { + let mut lines = git_mailmap::parse(&input.as_bytes()); + let res = lines.next().expect("single line").unwrap(); + res +} From 9449bc7243c15b3cba88f02a3742784d6fe6b363 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 14:49:04 +0800 Subject: [PATCH 06/31] thanks clippy --- git-mailmap/src/entry.rs | 45 ++++++++++++++++++++++++++++++ git-mailmap/src/lib.rs | 50 ++-------------------------------- git-mailmap/src/parse.rs | 10 ++++++- git-mailmap/tests/parse/mod.rs | 13 ++++----- 4 files changed, 62 insertions(+), 56 deletions(-) create mode 100644 git-mailmap/src/entry.rs diff --git a/git-mailmap/src/entry.rs b/git-mailmap/src/entry.rs new file mode 100644 index 00000000000..5b2804b3e35 --- /dev/null +++ b/git-mailmap/src/entry.rs @@ -0,0 +1,45 @@ +use crate::Entry; +use bstr::BStr; + +impl<'a> Entry<'a> { + pub fn change_name_by_email(proper_name: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { + Entry { + canonical_name: Some(proper_name.into()), + commit_email: Some(commit_email.into()), + ..Default::default() + } + } + pub fn change_email_by_email(proper_email: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { + Entry { + canonical_email: Some(proper_email.into()), + commit_email: Some(commit_email.into()), + ..Default::default() + } + } + pub fn change_name_and_email_by_email( + proper_name: impl Into<&'a BStr>, + proper_email: impl Into<&'a BStr>, + commit_email: impl Into<&'a BStr>, + ) -> Self { + Entry { + canonical_name: Some(proper_name.into()), + canonical_email: Some(proper_email.into()), + commit_email: Some(commit_email.into()), + ..Default::default() + } + } + + pub fn change_name_and_email_by_name_and_email( + proper_name: impl Into<&'a BStr>, + proper_email: impl Into<&'a BStr>, + commit_name: impl Into<&'a BStr>, + commit_email: impl Into<&'a BStr>, + ) -> Self { + Entry { + canonical_name: Some(proper_name.into()), + canonical_email: Some(proper_email.into()), + commit_name: Some(commit_name.into()), + commit_email: Some(commit_email.into()), + } + } +} diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs index ce3d9414ac9..e6b8a0fccb3 100644 --- a/git-mailmap/src/lib.rs +++ b/git-mailmap/src/lib.rs @@ -7,6 +7,8 @@ pub fn parse(buf: &[u8]) -> parse::Lines<'_> { parse::Lines::new(buf) } +mod entry; + #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Entry<'a> { @@ -19,51 +21,3 @@ pub struct Entry<'a> { /// The email to look for and replace. commit_email: Option<&'a BStr>, } - -mod entry { - use crate::Entry; - use bstr::BStr; - - impl<'a> Entry<'a> { - pub fn change_name_by_email(proper_name: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { - Entry { - canonical_name: Some(proper_name.into()), - commit_email: Some(commit_email.into()), - ..Default::default() - } - } - pub fn change_email_by_email(proper_email: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { - Entry { - canonical_email: Some(proper_email.into()), - commit_email: Some(commit_email.into()), - ..Default::default() - } - } - pub fn change_name_and_email_by_email( - proper_name: impl Into<&'a BStr>, - proper_email: impl Into<&'a BStr>, - commit_email: impl Into<&'a BStr>, - ) -> Self { - Entry { - canonical_name: Some(proper_name.into()), - canonical_email: Some(proper_email.into()), - commit_email: Some(commit_email.into()), - ..Default::default() - } - } - - pub fn change_name_and_email_by_name_and_email( - proper_name: impl Into<&'a BStr>, - proper_email: impl Into<&'a BStr>, - commit_name: impl Into<&'a BStr>, - commit_email: impl Into<&'a BStr>, - ) -> Self { - Entry { - canonical_name: Some(proper_name.into()), - canonical_email: Some(proper_email.into()), - commit_name: Some(commit_name.into()), - commit_email: Some(commit_email.into()), - } - } - } -} diff --git a/git-mailmap/src/parse.rs b/git-mailmap/src/parse.rs index ccdc4eb8818..97689391e55 100644 --- a/git-mailmap/src/parse.rs +++ b/git-mailmap/src/parse.rs @@ -37,6 +37,14 @@ impl<'a> Iterator for Lines<'a> { type Item = Result, Error>; fn next(&mut self) -> Option { - todo!() + for line in self.lines.by_ref() { + self.line_no += 1; + match line.first() { + None => continue, + Some(b) if *b == b'#' => continue, + Some(_) => {} + } + } + None } } diff --git a/git-mailmap/tests/parse/mod.rs b/git-mailmap/tests/parse/mod.rs index 49d72e4cab0..8bec992d1f5 100644 --- a/git-mailmap/tests/parse/mod.rs +++ b/git-mailmap/tests/parse/mod.rs @@ -31,25 +31,24 @@ fn empty_lines_and_comments_are_ignored() { #[ignore] fn valid_entries() { assert_eq!( - line("proper name "), + line(" \t proper name "), Entry::change_name_by_email("proper name", "commit-email") ); assert_eq!( - line(" "), + line(" \t "), Entry::change_email_by_email("proper email", "commit-email") ); assert_eq!( - line("proper name "), + line(" proper name \t \t "), Entry::change_name_and_email_by_email("proper name", "proper email", "commit-email") ); assert_eq!( - line("proper name commit name "), + line(" proper name \tcommit name\t\t"), Entry::change_name_and_email_by_name_and_email("proper name", "proper email", "commit name", "commit-email") ); } fn line(input: &str) -> Entry<'_> { - let mut lines = git_mailmap::parse(&input.as_bytes()); - let res = lines.next().expect("single line").unwrap(); - res + let mut lines = git_mailmap::parse(input.as_bytes()); + lines.next().expect("single line").unwrap() } From 2fb43102cf8bbfa9c26877d81d8fd3208fc5e183 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 14:52:39 +0800 Subject: [PATCH 07/31] fix serde support (#366) --- git-attributes/src/ignore.rs | 1 + git-attributes/src/lib.rs | 4 +++- git-mailmap/src/lib.rs | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/git-attributes/src/ignore.rs b/git-attributes/src/ignore.rs index 1bde72dc586..77a48989d78 100644 --- a/git-attributes/src/ignore.rs +++ b/git-attributes/src/ignore.rs @@ -2,6 +2,7 @@ pub mod pattern { use bitflags::bitflags; bitflags! { + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Mode: u32 { /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. const NO_SUB_DIR = 1 << 0; diff --git a/git-attributes/src/lib.rs b/git-attributes/src/lib.rs index 1e429b1a1d1..0d08f1e1963 100644 --- a/git-attributes/src/lib.rs +++ b/git-attributes/src/lib.rs @@ -1,4 +1,5 @@ -#![forbid(unsafe_code, rust_2018_idioms)] +#![forbid(unsafe_code)] +#![deny(rust_2018_idioms)] use bstr::BStr; @@ -11,6 +12,7 @@ pub enum State<'a> { Unset, /// The attribute is set to the given value, which followed the `=` sign. /// Note that values can be empty. + #[cfg_attr(feature = "serde1", serde(borrow))] Value(&'a BStr), /// The attribute isn't mentioned with a given path or is explicitly set to `Unspecified` using the `!` sign. Unspecified, diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs index e6b8a0fccb3..5ebd02df096 100644 --- a/git-mailmap/src/lib.rs +++ b/git-mailmap/src/lib.rs @@ -1,4 +1,5 @@ -#![forbid(unsafe_code, rust_2018_idioms)] +#![forbid(unsafe_code)] +#![deny(rust_2018_idioms)] use bstr::BStr; @@ -12,6 +13,7 @@ mod entry; #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Entry<'a> { + #[cfg_attr(feature = "serde1", serde(borrow))] /// The name to map to. canonical_name: Option<&'a BStr>, /// The email map to. From f458817005b884e966bcc894a0cf7c9958882ba4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 18:00:41 +0800 Subject: [PATCH 08/31] high-level parsing to deal with allowed mailmap lines (#366) --- git-mailmap/src/parse.rs | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/git-mailmap/src/parse.rs b/git-mailmap/src/parse.rs index 97689391e55..2a828125932 100644 --- a/git-mailmap/src/parse.rs +++ b/git-mailmap/src/parse.rs @@ -16,7 +16,7 @@ mod error { } use crate::Entry; -use bstr::ByteSlice; +use bstr::{BStr, ByteSlice}; pub use error::Error; pub struct Lines<'a> { @@ -40,11 +40,44 @@ impl<'a> Iterator for Lines<'a> { for line in self.lines.by_ref() { self.line_no += 1; match line.first() { - None => continue, - Some(b) if *b == b'#' => continue, + None => return None, + Some(b) if *b == b'#' => return None, Some(_) => {} } + + return parse_line(line.into(), self.line_no).into(); } None } } + +fn parse_line(line: &BStr, line_number: usize) -> Result, Error> { + let (name1, email1, rest) = parse_name_and_email(line, line_number)?; + let (name2, email2, rest) = parse_name_and_email(rest, line_number)?; + if !rest.trim().is_empty() { + return Err(Error::UnconsumedInput { + line_number, + line: line.into(), + }); + } + Ok(match (name1, email1, name2, email2) { + (Some(proper_name), Some(commit_email), None, None) => Entry::change_name_by_email(proper_name, commit_email), + (None, Some(proper_email), None, Some(commit_email)) => { + Entry::change_email_by_email(proper_email, commit_email) + } + (Some(proper_name), Some(proper_email), None, Some(commit_email)) => { + Entry::change_name_and_email_by_email(proper_name, proper_email, commit_email) + } + (Some(proper_name), Some(proper_email), Some(commit_name), Some(commit_email)) => { + Entry::change_name_and_email_by_name_and_email(proper_name, proper_email, commit_name, commit_email) + } + _ => todo!(), + }) +} + +fn parse_name_and_email( + line: &BStr, + line_number: usize, +) -> Result<(Option<&'_ BStr>, Option<&'_ BStr>, &'_ BStr), Error> { + todo!() +} From 67a2050156cc809767ca026f467f35b552bea043 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 21:46:34 +0800 Subject: [PATCH 09/31] all tests (so far) green (#366) --- git-attributes/tests/parse/attribute.rs | 5 +-- git-mailmap/src/parse.rs | 40 +++++++++++++++++----- git-mailmap/tests/parse/mod.rs | 44 ++++++++++++++++++++----- 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index c9097ca152d..773830985e1 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -267,10 +267,7 @@ fn try_line(input: &str) -> Result { } fn line(input: &str) -> ExpandedAttribute { - let mut lines = git_attributes::parse(input.as_bytes()); - let res = expand(lines.next().expect("single line")).unwrap(); - assert!(lines.next().is_none(), "expected only one line"); - res + try_line(input).unwrap() } fn try_lines(input: &str) -> Result, parse::Error> { diff --git a/git-mailmap/src/parse.rs b/git-mailmap/src/parse.rs index 2a828125932..e90968d346c 100644 --- a/git-mailmap/src/parse.rs +++ b/git-mailmap/src/parse.rs @@ -6,10 +6,10 @@ mod error { #[derive(Debug)] pub enum Error { UnconsumedInput { line_number: usize, line: BString } { - display("Line {} has too many names or emails: {}", line_number, line) + display("Line {} has too many names or emails, or none at all: {}", line_number, line) } - Malformed { line_number: usize, line: BString } { - display("Line {} is malformed, an email address lacks the closing '>' bracket: {}", line_number, line) + Malformed { line_number: usize, line: BString, message: String } { + display("{}: {:?}: {}", line_number, line, message) } } } @@ -40,11 +40,14 @@ impl<'a> Iterator for Lines<'a> { for line in self.lines.by_ref() { self.line_no += 1; match line.first() { - None => return None, - Some(b) if *b == b'#' => return None, + None => continue, + Some(b) if *b == b'#' => continue, Some(_) => {} } - + let line = line.trim(); + if line.is_empty() { + continue; + } return parse_line(line.into(), self.line_no).into(); } None @@ -71,7 +74,7 @@ fn parse_line(line: &BStr, line_number: usize) -> Result, Error> { (Some(proper_name), Some(proper_email), Some(commit_name), Some(commit_email)) => { Entry::change_name_and_email_by_name_and_email(proper_name, proper_email, commit_name, commit_email) } - _ => todo!(), + _ => unreachable!("{:?}", line), }) } @@ -79,5 +82,26 @@ fn parse_name_and_email( line: &BStr, line_number: usize, ) -> Result<(Option<&'_ BStr>, Option<&'_ BStr>, &'_ BStr), Error> { - todo!() + match line.find_byte(b'<') { + Some(start_bracket) => { + let email = &line[start_bracket + 1..]; + let closing_bracket = email.find_byte(b'>').ok_or_else(|| Error::Malformed { + line_number, + line: line.into(), + message: "Missing closing bracket '>' in email".into(), + })?; + let email = email[..closing_bracket].trim().as_bstr(); + if email.is_empty() { + return Err(Error::Malformed { + line_number, + line: line.into(), + message: "Email must not be empty".into(), + }); + } + let name = line[..start_bracket].trim().as_bstr(); + let rest = line[start_bracket + closing_bracket + 2..].as_bstr(); + Ok(((!name.is_empty()).then(|| name), Some(email), rest)) + } + None => Ok((None, None, line)), + } } diff --git a/git-mailmap/tests/parse/mod.rs b/git-mailmap/tests/parse/mod.rs index 8bec992d1f5..9a02bd7b562 100644 --- a/git-mailmap/tests/parse/mod.rs +++ b/git-mailmap/tests/parse/mod.rs @@ -2,33 +2,30 @@ use git_mailmap::{parse, Entry}; use git_testtools::fixture_path; #[test] -#[ignore] fn line_numbers_are_counted_correctly_in_errors() { let input = std::fs::read(fixture_path("invalid.txt")).unwrap(); - let mut actual: Vec<_> = git_mailmap::parse(&input).collect(); + let mut actual = git_mailmap::parse(&input).collect::>().into_iter(); assert_eq!(actual.len(), 2); - let err = actual.pop().expect("two items left").unwrap_err(); + let err = actual.next().expect("two items left").unwrap_err(); assert!(matches!(err, parse::Error::Malformed { line_number: 3, .. })); - let err = actual.pop().expect("one item left").unwrap_err(); - assert!(matches!(err, parse::Error::Malformed { line_number: 5, .. })); + let err = actual.next().expect("one item left").unwrap_err(); + assert!(matches!(err, parse::Error::UnconsumedInput { line_number: 5, .. })); } #[test] -#[ignore] fn empty_lines_and_comments_are_ignored() { assert!(git_mailmap::parse(b"# comment").next().is_none()); assert!(git_mailmap::parse(b"\n\r\n\t\t \n").next().is_none()); assert_eq!( line(" # this is a name "), - Entry::change_name_by_email(" # this is a name", "email"), + Entry::change_name_by_email("# this is a name", "email"), "whitespace before hashes counts as name though" ); } #[test] -#[ignore] fn valid_entries() { assert_eq!( line(" \t proper name "), @@ -47,8 +44,37 @@ fn valid_entries() { Entry::change_name_and_email_by_name_and_email("proper name", "proper email", "commit name", "commit-email") ); } +#[test] +fn error_if_there_is_just_a_name() { + assert!(matches!( + try_line("just a name"), + Err(parse::Error::UnconsumedInput { line_number: 1, .. }) + )); +} + +#[test] +fn error_if_email_is_empty() { + assert!(matches!( + try_line("hello <"), + Err(parse::Error::Malformed { line_number: 1, .. }) + )); + assert!(matches!( + try_line("hello < \t"), + Err(parse::Error::Malformed { line_number: 1, .. }) + )); + assert!(matches!( + try_line("hello < \t\r >"), + Err(parse::Error::Malformed { line_number: 1, .. }) + )); +} fn line(input: &str) -> Entry<'_> { + try_line(input).unwrap() +} + +fn try_line(input: &str) -> Result, parse::Error> { let mut lines = git_mailmap::parse(input.as_bytes()); - lines.next().expect("single line").unwrap() + let res = lines.next().expect("single line"); + assert!(lines.next().is_none(), "only one line provided"); + res } From fb5593a7272498ae042b6c8c7605faa3d253fa10 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 21:55:05 +0800 Subject: [PATCH 10/31] quickfix for unintentionally using 'unicode' feature of bytecode (#366) It pulls in a regex engine or parts of it, and for now we avoid that. --- git-mailmap/src/parse.rs | 21 ++++++++++++++------- tests/tools/Cargo.toml | 2 +- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/git-mailmap/src/parse.rs b/git-mailmap/src/parse.rs index e90968d346c..864c2356d8f 100644 --- a/git-mailmap/src/parse.rs +++ b/git-mailmap/src/parse.rs @@ -44,20 +44,22 @@ impl<'a> Iterator for Lines<'a> { Some(b) if *b == b'#' => continue, Some(_) => {} } - let line = line.trim(); - if line.is_empty() { - continue; - } + let line = match line.as_bstr().find_not_byteset(BLANKS) { + None => continue, + Some(pos) => &line[pos..], + }; return parse_line(line.into(), self.line_no).into(); } None } } +const BLANKS: &[u8] = b" \t\r"; + fn parse_line(line: &BStr, line_number: usize) -> Result, Error> { let (name1, email1, rest) = parse_name_and_email(line, line_number)?; let (name2, email2, rest) = parse_name_and_email(rest, line_number)?; - if !rest.trim().is_empty() { + if !trim(rest).is_empty() { return Err(Error::UnconsumedInput { line_number, line: line.into(), @@ -90,7 +92,7 @@ fn parse_name_and_email( line: line.into(), message: "Missing closing bracket '>' in email".into(), })?; - let email = email[..closing_bracket].trim().as_bstr(); + let email = trim(&email[..closing_bracket]); if email.is_empty() { return Err(Error::Malformed { line_number, @@ -98,10 +100,15 @@ fn parse_name_and_email( message: "Email must not be empty".into(), }); } - let name = line[..start_bracket].trim().as_bstr(); + let name = trim(&line[..start_bracket]); let rest = line[start_bracket + closing_bracket + 2..].as_bstr(); Ok(((!name.is_empty()).then(|| name), Some(email), rest)) } None => Ok((None, None, line)), } } + +fn trim(line: &[u8]) -> &BStr { + // TODO: without str, use byteset find + std::str::from_utf8(line).unwrap().trim().as_bytes().as_bstr() +} diff --git a/tests/tools/Cargo.toml b/tests/tools/Cargo.toml index 1a9094fe71e..a1e3bcef9b7 100644 --- a/tests/tools/Cargo.toml +++ b/tests/tools/Cargo.toml @@ -15,7 +15,7 @@ path = "src/main.rs" [dependencies] git-hash = { version = "^0.9.2", path = "../../git-hash" } nom = { version = "7", default-features = false, features = ["std"]} -bstr = "0.2.15" +bstr = { version = "0.2.15", default-features = false } crc = "2.0.0" once_cell = "1.8.0" tempfile = "3.2.0" From 8ff53af9876a5e35bcfd076124ad776e1b6ff331 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 21:59:38 +0800 Subject: [PATCH 11/31] cleanup bstr usage to not accidentally pull in unicode (#366) --- git-features/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git-features/Cargo.toml b/git-features/Cargo.toml index ca76445e17c..44cab7d999c 100644 --- a/git-features/Cargo.toml +++ b/git-features/Cargo.toml @@ -125,12 +125,12 @@ time = { version = "0.3.2", optional = true, default-features = false, features # path ## make bstr utilities available in the `path` modules, which itself is gated by the `path` feature. -bstr = { version = "0.2.17", optional = true } +bstr = { version = "0.2.17", optional = true, default-features = false } document-features = { version = "0.2.0", optional = true } [dev-dependencies] -bstr = "0.2.15" +bstr = { version = "0.2.15", default-features = false } [package.metadata.docs.rs] features = ["document-features"] From 471fa62b142ba744541d7472464d62826f5c6b93 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 26 Mar 2022 22:00:19 +0800 Subject: [PATCH 12/31] Commit to using 'unicode' feature of bstr as git-object wants it too --- git-features/Cargo.toml | 2 +- git-mailmap/Cargo.toml | 2 +- git-mailmap/src/parse.rs | 21 +++++++-------------- tests/tools/Cargo.toml | 2 +- 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/git-features/Cargo.toml b/git-features/Cargo.toml index 44cab7d999c..f25bccaf46b 100644 --- a/git-features/Cargo.toml +++ b/git-features/Cargo.toml @@ -125,7 +125,7 @@ time = { version = "0.3.2", optional = true, default-features = false, features # path ## make bstr utilities available in the `path` modules, which itself is gated by the `path` feature. -bstr = { version = "0.2.17", optional = true, default-features = false } +bstr = { version = "0.2.17", optional = true, default-features = false, features = ["std"] } document-features = { version = "0.2.0", optional = true } diff --git a/git-mailmap/Cargo.toml b/git-mailmap/Cargo.toml index aa9e990251f..cc8c54ea73f 100644 --- a/git-mailmap/Cargo.toml +++ b/git-mailmap/Cargo.toml @@ -17,7 +17,7 @@ serde1 = ["serde", "bstr/serde1"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -bstr = { version = "0.2.13", default-features = false, features = ["std"]} +bstr = { version = "0.2.13", default-features = false, features = ["std", "unicode"]} quick-error = "2.0.0" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} diff --git a/git-mailmap/src/parse.rs b/git-mailmap/src/parse.rs index 864c2356d8f..e90968d346c 100644 --- a/git-mailmap/src/parse.rs +++ b/git-mailmap/src/parse.rs @@ -44,22 +44,20 @@ impl<'a> Iterator for Lines<'a> { Some(b) if *b == b'#' => continue, Some(_) => {} } - let line = match line.as_bstr().find_not_byteset(BLANKS) { - None => continue, - Some(pos) => &line[pos..], - }; + let line = line.trim(); + if line.is_empty() { + continue; + } return parse_line(line.into(), self.line_no).into(); } None } } -const BLANKS: &[u8] = b" \t\r"; - fn parse_line(line: &BStr, line_number: usize) -> Result, Error> { let (name1, email1, rest) = parse_name_and_email(line, line_number)?; let (name2, email2, rest) = parse_name_and_email(rest, line_number)?; - if !trim(rest).is_empty() { + if !rest.trim().is_empty() { return Err(Error::UnconsumedInput { line_number, line: line.into(), @@ -92,7 +90,7 @@ fn parse_name_and_email( line: line.into(), message: "Missing closing bracket '>' in email".into(), })?; - let email = trim(&email[..closing_bracket]); + let email = email[..closing_bracket].trim().as_bstr(); if email.is_empty() { return Err(Error::Malformed { line_number, @@ -100,15 +98,10 @@ fn parse_name_and_email( message: "Email must not be empty".into(), }); } - let name = trim(&line[..start_bracket]); + let name = line[..start_bracket].trim().as_bstr(); let rest = line[start_bracket + closing_bracket + 2..].as_bstr(); Ok(((!name.is_empty()).then(|| name), Some(email), rest)) } None => Ok((None, None, line)), } } - -fn trim(line: &[u8]) -> &BStr { - // TODO: without str, use byteset find - std::str::from_utf8(line).unwrap().trim().as_bytes().as_bstr() -} diff --git a/tests/tools/Cargo.toml b/tests/tools/Cargo.toml index a1e3bcef9b7..1a9094fe71e 100644 --- a/tests/tools/Cargo.toml +++ b/tests/tools/Cargo.toml @@ -15,7 +15,7 @@ path = "src/main.rs" [dependencies] git-hash = { version = "^0.9.2", path = "../../git-hash" } nom = { version = "7", default-features = false, features = ["std"]} -bstr = { version = "0.2.15", default-features = false } +bstr = "0.2.15" crc = "2.0.0" once_cell = "1.8.0" tempfile = "3.2.0" From b67b0f90dd947508ab9ab0b7d68ce0093ae415ae Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 27 Mar 2022 21:39:12 +0800 Subject: [PATCH 13/31] Add typical malmap example (#366) --- git-mailmap/tests/fixtures/typical.txt | 8 ++++++++ git-mailmap/tests/parse/mod.rs | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 git-mailmap/tests/fixtures/typical.txt diff --git a/git-mailmap/tests/fixtures/typical.txt b/git-mailmap/tests/fixtures/typical.txt new file mode 100644 index 00000000000..563b9ddd90a --- /dev/null +++ b/git-mailmap/tests/fixtures/typical.txt @@ -0,0 +1,8 @@ +# from the mailmap docs + +Joe R. Developer +Joe R. Developer Joe + +Jane Doe +Jane Doe +Jane Doe Jane diff --git a/git-mailmap/tests/parse/mod.rs b/git-mailmap/tests/parse/mod.rs index 9a02bd7b562..627456dbdfb 100644 --- a/git-mailmap/tests/parse/mod.rs +++ b/git-mailmap/tests/parse/mod.rs @@ -14,6 +14,27 @@ fn line_numbers_are_counted_correctly_in_errors() { assert!(matches!(err, parse::Error::UnconsumedInput { line_number: 5, .. })); } +#[test] +fn a_typical_mailmap() { + let input = std::fs::read(fixture_path("typical.txt")).unwrap(); + let actual = git_mailmap::parse(&input).map(Result::unwrap).collect::>(); + assert_eq!( + actual, + vec![ + Entry::change_name_by_email("Joe R. Developer", "joe@example.com"), + Entry::change_name_and_email_by_name_and_email( + "Joe R. Developer", + "joe@example.com", + "Joe", + "bugs@example.com" + ), + Entry::change_name_and_email_by_email("Jane Doe", "jane@example.com", "jane@laptop.(none)"), + Entry::change_name_and_email_by_email("Jane Doe", "jane@example.com", "jane@desktop.(none)"), + Entry::change_name_and_email_by_name_and_email("Jane Doe", "jane@example.com", "Jane", "bugs@example.com"), + ] + ); +} + #[test] fn empty_lines_and_comments_are_ignored() { assert!(git_mailmap::parse(b"# comment").next().is_none()); From d71d0670cd89a2dac2ea84cbc538f67fef3ee451 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 28 Mar 2022 12:16:32 +0800 Subject: [PATCH 14/31] sketch of mailmap snapshot for lookups (#366) --- git-mailmap/src/entry.rs | 22 +++++----- git-mailmap/src/lib.rs | 18 +++++--- git-mailmap/src/snapshot.rs | 84 +++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 16 deletions(-) create mode 100644 git-mailmap/src/snapshot.rs diff --git a/git-mailmap/src/entry.rs b/git-mailmap/src/entry.rs index 5b2804b3e35..cb83d510483 100644 --- a/git-mailmap/src/entry.rs +++ b/git-mailmap/src/entry.rs @@ -4,15 +4,15 @@ use bstr::BStr; impl<'a> Entry<'a> { pub fn change_name_by_email(proper_name: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { Entry { - canonical_name: Some(proper_name.into()), - commit_email: Some(commit_email.into()), + new_name: Some(proper_name.into()), + old_email: Some(commit_email.into()), ..Default::default() } } pub fn change_email_by_email(proper_email: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { Entry { - canonical_email: Some(proper_email.into()), - commit_email: Some(commit_email.into()), + new_email: Some(proper_email.into()), + old_email: Some(commit_email.into()), ..Default::default() } } @@ -22,9 +22,9 @@ impl<'a> Entry<'a> { commit_email: impl Into<&'a BStr>, ) -> Self { Entry { - canonical_name: Some(proper_name.into()), - canonical_email: Some(proper_email.into()), - commit_email: Some(commit_email.into()), + new_name: Some(proper_name.into()), + new_email: Some(proper_email.into()), + old_email: Some(commit_email.into()), ..Default::default() } } @@ -36,10 +36,10 @@ impl<'a> Entry<'a> { commit_email: impl Into<&'a BStr>, ) -> Self { Entry { - canonical_name: Some(proper_name.into()), - canonical_email: Some(proper_email.into()), - commit_name: Some(commit_name.into()), - commit_email: Some(commit_email.into()), + new_name: Some(proper_name.into()), + new_email: Some(proper_email.into()), + old_name: Some(commit_name.into()), + old_email: Some(commit_email.into()), } } } diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs index 5ebd02df096..c077cb7bcd5 100644 --- a/git-mailmap/src/lib.rs +++ b/git-mailmap/src/lib.rs @@ -10,16 +10,24 @@ pub fn parse(buf: &[u8]) -> parse::Lines<'_> { mod entry; -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +mod snapshot; + +#[derive(Default, Clone)] +pub struct Snapshot { + /// Sorted by `old_email` + entries_by_old_email: Vec, +} + +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy, Default)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Entry<'a> { #[cfg_attr(feature = "serde1", serde(borrow))] /// The name to map to. - canonical_name: Option<&'a BStr>, + new_name: Option<&'a BStr>, /// The email map to. - canonical_email: Option<&'a BStr>, + new_email: Option<&'a BStr>, /// The name to look for and replace. - commit_name: Option<&'a BStr>, + old_name: Option<&'a BStr>, /// The email to look for and replace. - commit_email: Option<&'a BStr>, + old_email: Option<&'a BStr>, } diff --git a/git-mailmap/src/snapshot.rs b/git-mailmap/src/snapshot.rs new file mode 100644 index 00000000000..87dae649206 --- /dev/null +++ b/git-mailmap/src/snapshot.rs @@ -0,0 +1,84 @@ +use bstr::{BString, ByteSlice}; +use std::cmp::Ordering; +use std::ops::Deref; + +#[cfg_attr(test, derive(Debug))] +#[derive(Clone)] +enum EncodedString { + Utf8(String), + Unknown(BString), +} + +impl Eq for EncodedString {} + +impl PartialEq for EncodedString { + fn eq(&self, other: &Self) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl PartialOrd for EncodedString { + fn partial_cmp(&self, other: &Self) -> Option { + self.cmp(other).into() + } +} + +impl Ord for EncodedString { + fn cmp(&self, other: &Self) -> Ordering { + use EncodedString::*; + match (self, other) { + (Utf8(a), Utf8(b)) => { + let a = a.chars().map(|c| c.to_ascii_lowercase()); + let b = b.chars().map(|c| c.to_ascii_lowercase()); + a.cmp(b) + } + (Unknown(a), Unknown(b)) => a.cmp(b), + (Utf8(a), Unknown(b)) => a.as_bytes().cmp(b.as_ref()), + (Unknown(a), Utf8(b)) => a.deref().as_bytes().cmp(b.as_bytes()), + } + } +} + +#[derive(Clone)] +pub(crate) struct NameEntry { + new_name: Option, + new_email: Option, + old_name: EncodedString, +} + +#[derive(Clone)] +pub(crate) struct EmailEntry { + new_name: Option, + new_email: Option, + old_email: EncodedString, + + entries_by_old_name: Vec, +} + +#[cfg(test)] +mod encoded_string { + use crate::snapshot::EncodedString; + + #[test] + fn basic_ascii_case_folding() { + assert_eq!( + EncodedString::Utf8("FooBar".into()), + EncodedString::Utf8("foobar".into()) + ) + } + + #[test] + fn no_advanced_unicode_folding() { + assert_ne!(EncodedString::Utf8("Masse".into()), EncodedString::Utf8("Maße".into())) + } + + #[test] + fn unknown_encoding_pairs_do_not_try_to_ignore_cases() { + assert_ne!(EncodedString::Utf8("Foo".into()), EncodedString::Unknown("foo".into())); + assert_ne!(EncodedString::Unknown("Foo".into()), EncodedString::Utf8("foo".into())); + assert_ne!( + EncodedString::Unknown("Foo".into()), + EncodedString::Unknown("foo".into()) + ); + } +} From 77ef2cb819f21ddc5d1ee9e94b5961e3ca5b3139 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 28 Mar 2022 16:18:06 +0800 Subject: [PATCH 15/31] feat: `Time::default()` (#366) --- git-actor/src/lib.rs | 5 ++--- git-actor/src/signature/mod.rs | 18 ++---------------- git-actor/src/time.rs | 10 ++++++++++ git-mailmap/tests/snapshot/mod.rs | 0 4 files changed, 14 insertions(+), 19 deletions(-) create mode 100644 git-mailmap/tests/snapshot/mod.rs diff --git a/git-actor/src/lib.rs b/git-actor/src/lib.rs index 4ab2f728475..b40cd9feacb 100644 --- a/git-actor/src/lib.rs +++ b/git-actor/src/lib.rs @@ -11,13 +11,14 @@ use bstr::{BStr, BString}; /// pub mod signature; +mod time; const SPACE: &[u8; 1] = b" "; /// A mutable signature is created by an actor at a certain time. /// /// Note that this is not a cryptographical signature. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Signature { /// The actors name. @@ -63,5 +64,3 @@ pub struct Time { /// the sign of `offset`, used to encode `-0000` which would otherwise loose sign information. pub sign: Sign, } - -mod time; diff --git a/git-actor/src/signature/mod.rs b/git-actor/src/signature/mod.rs index 0ad157ddda0..52dbb3a2dde 100644 --- a/git-actor/src/signature/mod.rs +++ b/git-actor/src/signature/mod.rs @@ -22,26 +22,12 @@ mod _ref { } mod convert { - use crate::{Sign, Signature, SignatureRef, Time}; - - impl Default for Signature { - fn default() -> Self { - Signature::empty() - } - } + use crate::{Signature, SignatureRef}; impl Signature { /// An empty signature, similar to 'null'. pub fn empty() -> Self { - Signature { - name: Default::default(), - email: Default::default(), - time: Time { - time: 0, - offset: 0, - sign: Sign::Plus, - }, - } + Signature::default() } /// Borrow this instance as immutable diff --git a/git-actor/src/time.rs b/git-actor/src/time.rs index d0ddb64d1f4..65d8ba409db 100644 --- a/git-actor/src/time.rs +++ b/git-actor/src/time.rs @@ -12,6 +12,16 @@ impl From for Sign { } } +impl Default for Time { + fn default() -> Self { + Time { + time: 0, + offset: 0, + sign: Sign::Plus, + } + } +} + impl Time { /// Serialize this instance to `out` in a format suitable for use in header fields of serialized git commits or tags. pub fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { diff --git a/git-mailmap/tests/snapshot/mod.rs b/git-mailmap/tests/snapshot/mod.rs new file mode 100644 index 00000000000..e69de29bb2d From 85e3820caa106a32c3406fd1e9e4c67fb0033bc5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 28 Mar 2022 16:18:47 +0800 Subject: [PATCH 16/31] add `fixture_bytes` to test tools --- git-attributes/tests/parse/attribute.rs | 4 ++-- git-attributes/tests/parse/ignore.rs | 4 ++-- tests/tools/src/lib.rs | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 773830985e1..ae1463da537 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -1,7 +1,7 @@ use bstr::{BStr, ByteSlice}; use git_attributes::ignore::pattern::Mode; use git_attributes::{parse, State}; -use git_testtools::fixture_path; +use git_testtools::fixture_bytes; #[test] fn byte_order_marks_are_no_patterns() { @@ -14,7 +14,7 @@ fn byte_order_marks_are_no_patterns() { #[test] fn line_numbers_are_counted_correctly() { - let input = std::fs::read(fixture_path("attributes/various.txt")).unwrap(); + let input = fixture_bytes("attributes/various.txt"); assert_eq!( try_lines(&String::from_utf8(input).unwrap()).unwrap(), vec![ diff --git a/git-attributes/tests/parse/ignore.rs b/git-attributes/tests/parse/ignore.rs index 07aa56371ac..e39a194e493 100644 --- a/git-attributes/tests/parse/ignore.rs +++ b/git-attributes/tests/parse/ignore.rs @@ -1,5 +1,5 @@ use git_attributes::ignore::pattern::Mode; -use git_testtools::fixture_path; +use git_testtools::fixture_bytes; #[test] fn byte_order_marks_are_no_patterns() { @@ -11,7 +11,7 @@ fn byte_order_marks_are_no_patterns() { #[test] fn line_numbers_are_counted_correctly() { - let input = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); + let input = fixture_bytes("ignore/various.txt"); let actual: Vec<_> = git_attributes::parse::ignore(&input).collect(); assert_eq!( actual, diff --git a/tests/tools/src/lib.rs b/tests/tools/src/lib.rs index c3123ce2faa..a880da99cc6 100644 --- a/tests/tools/src/lib.rs +++ b/tests/tools/src/lib.rs @@ -26,6 +26,9 @@ pub fn hex_to_id(hex: &str) -> git_hash::ObjectId { pub fn fixture_path(path: impl AsRef) -> PathBuf { PathBuf::from("tests").join("fixtures").join(path.as_ref()) } +pub fn fixture_bytes(path: impl AsRef) -> Vec { + std::fs::read(fixture_path(path.as_ref())).expect(&format!("File at '{}' not found", path.as_ref().display())) +} pub fn scripted_fixture_repo_read_only( script_name: impl AsRef, ) -> std::result::Result> { From 1890db73b5fd66467c66efd9ddc365871041c7c3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 28 Mar 2022 16:18:59 +0800 Subject: [PATCH 17/31] sketch `Snapshot` API to implement map building and signature resolution (#366) --- Cargo.lock | 1 + git-mailmap/Cargo.toml | 3 +- git-mailmap/src/lib.rs | 4 ++ git-mailmap/src/snapshot.rs | 71 +++++++++++++++++++++++++------ git-mailmap/tests/mailmap.rs | 1 + git-mailmap/tests/parse/mod.rs | 6 +-- git-mailmap/tests/snapshot/mod.rs | 31 ++++++++++++++ 7 files changed, 100 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8c7402ab148..63d6278527c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1221,6 +1221,7 @@ name = "git-mailmap" version = "0.0.0" dependencies = [ "bstr", + "git-actor", "git-testtools", "quick-error", "serde", diff --git a/git-mailmap/Cargo.toml b/git-mailmap/Cargo.toml index cc8c54ea73f..41eb8930128 100644 --- a/git-mailmap/Cargo.toml +++ b/git-mailmap/Cargo.toml @@ -12,11 +12,12 @@ doctest = false [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde1 = ["serde", "bstr/serde1"] +serde1 = ["serde", "bstr/serde1", "git-actor/serde1"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +git-actor = { version = "^0.8.1", path = "../git-actor" } bstr = { version = "0.2.13", default-features = false, features = ["std", "unicode"]} quick-error = "2.0.0" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs index c077cb7bcd5..addbacfc32b 100644 --- a/git-mailmap/src/lib.rs +++ b/git-mailmap/src/lib.rs @@ -8,6 +8,10 @@ pub fn parse(buf: &[u8]) -> parse::Lines<'_> { parse::Lines::new(buf) } +pub fn parse_ignore_errors(buf: &[u8]) -> impl Iterator> { + parse(buf).filter_map(Result::ok) +} + mod entry; mod snapshot; diff --git a/git-mailmap/src/snapshot.rs b/git-mailmap/src/snapshot.rs index 87dae649206..2ffe45603a4 100644 --- a/git-mailmap/src/snapshot.rs +++ b/git-mailmap/src/snapshot.rs @@ -1,4 +1,5 @@ -use bstr::{BString, ByteSlice}; +use crate::Snapshot; +use bstr::{BStr, BString, ByteSlice}; use std::cmp::Ordering; use std::ops::Deref; @@ -9,6 +10,36 @@ enum EncodedString { Unknown(BString), } +impl EncodedString { + fn cmp_ref(&self, other: EncodedStringRef<'_>) -> Ordering { + match (self, other) { + (EncodedString::Utf8(a), EncodedStringRef::Utf8(b)) => { + let a = a.chars().map(|c| c.to_ascii_lowercase()); + let b = b.chars().map(|c| c.to_ascii_lowercase()); + a.cmp(b) + } + (EncodedString::Unknown(a), EncodedStringRef::Unknown(b)) => a.deref().as_bstr().cmp(b), + (EncodedString::Utf8(a), EncodedStringRef::Unknown(b)) => a.as_bytes().cmp(b.as_ref()), + (EncodedString::Unknown(a), EncodedStringRef::Utf8(b)) => a.deref().as_bytes().cmp(b.as_bytes()), + } + } +} + +#[cfg_attr(test, derive(Debug))] +enum EncodedStringRef<'a> { + Utf8(&'a str), + Unknown(&'a BStr), +} + +impl EncodedString { + fn to_ref(&self) -> EncodedStringRef<'_> { + match self { + EncodedString::Unknown(v) => EncodedStringRef::Unknown(v.deref().as_bstr()), + EncodedString::Utf8(v) => EncodedStringRef::Utf8(v), + } + } +} + impl Eq for EncodedString {} impl PartialEq for EncodedString { @@ -25,22 +56,12 @@ impl PartialOrd for EncodedString { impl Ord for EncodedString { fn cmp(&self, other: &Self) -> Ordering { - use EncodedString::*; - match (self, other) { - (Utf8(a), Utf8(b)) => { - let a = a.chars().map(|c| c.to_ascii_lowercase()); - let b = b.chars().map(|c| c.to_ascii_lowercase()); - a.cmp(b) - } - (Unknown(a), Unknown(b)) => a.cmp(b), - (Utf8(a), Unknown(b)) => a.as_bytes().cmp(b.as_ref()), - (Unknown(a), Utf8(b)) => a.deref().as_bytes().cmp(b.as_bytes()), - } + self.cmp_ref(other.to_ref()) } } #[derive(Clone)] -pub(crate) struct NameEntry { +struct NameEntry { new_name: Option, new_email: Option, old_name: EncodedString, @@ -55,6 +76,30 @@ pub(crate) struct EmailEntry { entries_by_old_name: Vec, } +impl Snapshot { + pub fn from_bytes(buf: &[u8]) -> Self { + Self::new(crate::parse_ignore_errors(buf)) + } + + pub fn new<'a>(entries: impl IntoIterator>) -> Self { + let mut snapshot = Self::default(); + snapshot.extend(entries); + snapshot + } + + pub fn extend<'a>(&mut self, entries: impl IntoIterator>) -> &mut Self { + todo!() + } + + pub fn try_resolve(&self, signature: &git_actor::SignatureRef<'_>) -> Option { + todo!() + } + + pub fn resolve(&self, signature: &git_actor::SignatureRef<'_>) -> git_actor::Signature { + self.try_resolve(signature).unwrap_or_else(|| signature.to_owned()) + } +} + #[cfg(test)] mod encoded_string { use crate::snapshot::EncodedString; diff --git a/git-mailmap/tests/mailmap.rs b/git-mailmap/tests/mailmap.rs index 06f1a3c69d4..58111c7695e 100644 --- a/git-mailmap/tests/mailmap.rs +++ b/git-mailmap/tests/mailmap.rs @@ -1 +1,2 @@ mod parse; +mod snapshot; diff --git a/git-mailmap/tests/parse/mod.rs b/git-mailmap/tests/parse/mod.rs index 627456dbdfb..c911de76a4b 100644 --- a/git-mailmap/tests/parse/mod.rs +++ b/git-mailmap/tests/parse/mod.rs @@ -1,9 +1,9 @@ use git_mailmap::{parse, Entry}; -use git_testtools::fixture_path; +use git_testtools::fixture_bytes; #[test] fn line_numbers_are_counted_correctly_in_errors() { - let input = std::fs::read(fixture_path("invalid.txt")).unwrap(); + let input = fixture_bytes("invalid.txt"); let mut actual = git_mailmap::parse(&input).collect::>().into_iter(); assert_eq!(actual.len(), 2); @@ -16,7 +16,7 @@ fn line_numbers_are_counted_correctly_in_errors() { #[test] fn a_typical_mailmap() { - let input = std::fs::read(fixture_path("typical.txt")).unwrap(); + let input = fixture_bytes("typical.txt"); let actual = git_mailmap::parse(&input).map(Result::unwrap).collect::>(); assert_eq!( actual, diff --git a/git-mailmap/tests/snapshot/mod.rs b/git-mailmap/tests/snapshot/mod.rs index e69de29bb2d..243a271e92a 100644 --- a/git-mailmap/tests/snapshot/mod.rs +++ b/git-mailmap/tests/snapshot/mod.rs @@ -0,0 +1,31 @@ +use git_mailmap::Snapshot; +use git_testtools::fixture_bytes; + +#[test] +#[ignore] +fn try_resolve() { + let snapshot = snapshot(); + let sig = signature("Foo", "Joe@example.com"); + assert_eq!( + snapshot.try_resolve(&sig.to_ref()), + Some(signature("Joe R. Developer", "Joe@example.com")), + "resolved signatures contain all original fields, but normalizes only what's in the mapping, lookup is case-insensitive" + ); +} + +fn snapshot() -> Snapshot { + Snapshot::from_bytes(&fixture_bytes("typical.txt")) +} + +fn signature(name: &str, email: &str) -> git_actor::Signature { + git_actor::Signature { + name: name.into(), + email: email.into(), + time: git_actor::Time { + // marker + time: 42, + offset: 53, + sign: git_actor::Sign::Minus, + }, + } +} From 81166646e3ecc9ab7383de62a0a216d0229c90bd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 28 Mar 2022 17:53:17 +0800 Subject: [PATCH 18/31] actual lookup and tests, all seems to be working (#366) --- git-mailmap/src/entry.rs | 8 +- git-mailmap/src/lib.rs | 2 +- git-mailmap/src/snapshot.rs | 201 +++++++++++++++++++++++++----- git-mailmap/tests/snapshot/mod.rs | 54 +++++++- 4 files changed, 227 insertions(+), 38 deletions(-) diff --git a/git-mailmap/src/entry.rs b/git-mailmap/src/entry.rs index cb83d510483..f956160a2fe 100644 --- a/git-mailmap/src/entry.rs +++ b/git-mailmap/src/entry.rs @@ -5,14 +5,14 @@ impl<'a> Entry<'a> { pub fn change_name_by_email(proper_name: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { Entry { new_name: Some(proper_name.into()), - old_email: Some(commit_email.into()), + old_email: commit_email.into(), ..Default::default() } } pub fn change_email_by_email(proper_email: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { Entry { new_email: Some(proper_email.into()), - old_email: Some(commit_email.into()), + old_email: commit_email.into(), ..Default::default() } } @@ -24,7 +24,7 @@ impl<'a> Entry<'a> { Entry { new_name: Some(proper_name.into()), new_email: Some(proper_email.into()), - old_email: Some(commit_email.into()), + old_email: commit_email.into(), ..Default::default() } } @@ -39,7 +39,7 @@ impl<'a> Entry<'a> { new_name: Some(proper_name.into()), new_email: Some(proper_email.into()), old_name: Some(commit_name.into()), - old_email: Some(commit_email.into()), + old_email: commit_email.into(), } } } diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs index addbacfc32b..fad7f22a5de 100644 --- a/git-mailmap/src/lib.rs +++ b/git-mailmap/src/lib.rs @@ -33,5 +33,5 @@ pub struct Entry<'a> { /// The name to look for and replace. old_name: Option<&'a BStr>, /// The email to look for and replace. - old_email: Option<&'a BStr>, + old_email: &'a BStr, } diff --git a/git-mailmap/src/snapshot.rs b/git-mailmap/src/snapshot.rs index 2ffe45603a4..ab543cd7be9 100644 --- a/git-mailmap/src/snapshot.rs +++ b/git-mailmap/src/snapshot.rs @@ -1,5 +1,6 @@ use crate::Snapshot; use bstr::{BStr, BString, ByteSlice}; +use git_actor::{Signature, SignatureRef}; use std::cmp::Ordering; use std::ops::Deref; @@ -26,37 +27,36 @@ impl EncodedString { } #[cfg_attr(test, derive(Debug))] +#[derive(Clone, Copy)] enum EncodedStringRef<'a> { Utf8(&'a str), Unknown(&'a BStr), } -impl EncodedString { - fn to_ref(&self) -> EncodedStringRef<'_> { - match self { - EncodedString::Unknown(v) => EncodedStringRef::Unknown(v.deref().as_bstr()), - EncodedString::Utf8(v) => EncodedStringRef::Utf8(v), +impl<'a> From<&'a BStr> for EncodedStringRef<'a> { + fn from(v: &'a BStr) -> Self { + match v.to_str() { + Ok(v) => EncodedStringRef::Utf8(v), + Err(_) => EncodedStringRef::Unknown(v), } } } -impl Eq for EncodedString {} - -impl PartialEq for EncodedString { - fn eq(&self, other: &Self) -> bool { - self.cmp(other) == Ordering::Equal - } -} - -impl PartialOrd for EncodedString { - fn partial_cmp(&self, other: &Self) -> Option { - self.cmp(other).into() +impl<'a> From> for EncodedString { + fn from(v: EncodedStringRef<'a>) -> Self { + match v { + EncodedStringRef::Utf8(v) => EncodedString::Utf8(v.to_owned()), + EncodedStringRef::Unknown(v) => EncodedString::Unknown(v.to_owned()), + } } } -impl Ord for EncodedString { - fn cmp(&self, other: &Self) -> Ordering { - self.cmp_ref(other.to_ref()) +impl<'a> From<&'a BStr> for EncodedString { + fn from(v: &'a BStr) -> Self { + match v.to_str() { + Ok(v) => EncodedString::Utf8(v.to_owned()), + Err(_) => EncodedString::Unknown(v.to_owned()), + } } } @@ -76,6 +76,77 @@ pub(crate) struct EmailEntry { entries_by_old_name: Vec, } +impl EmailEntry { + fn merge( + &mut self, + crate::Entry { + new_name, + new_email, + old_name, + old_email: _, + }: crate::Entry<'_>, + ) { + let new_email = new_email.map(ToOwned::to_owned); + let new_name = new_name.map(ToOwned::to_owned); + match old_name { + None => { + self.new_email = new_email; + self.new_name = new_name; + } + Some(old_name) => { + let old_name: EncodedStringRef<'_> = old_name.into(); + match self + .entries_by_old_name + .binary_search_by(|e| e.old_name.cmp_ref(old_name)) + { + Ok(pos) => { + let entry = &mut self.entries_by_old_name[pos]; + entry.new_name = new_name; + entry.new_email = new_email; + } + Err(insert_pos) => self.entries_by_old_name.insert( + insert_pos, + NameEntry { + new_name, + new_email, + old_name: old_name.into(), + }, + ), + } + } + } + } +} + +impl<'a> From> for EmailEntry { + fn from( + crate::Entry { + new_name, + new_email, + old_name, + old_email, + }: crate::Entry<'a>, + ) -> Self { + let mut new_name = new_name.map(ToOwned::to_owned); + let mut new_email = new_email.map(ToOwned::to_owned); + let entries_by_old_name = old_name + .map(|name| { + vec![NameEntry { + new_name: new_name.take(), + new_email: new_email.take(), + old_name: name.into(), + }] + }) + .unwrap_or_default(); + EmailEntry { + new_name, + new_email, + old_email: old_email.into(), + entries_by_old_name, + } + } +} + impl Snapshot { pub fn from_bytes(buf: &[u8]) -> Self { Self::new(crate::parse_ignore_errors(buf)) @@ -88,11 +159,42 @@ impl Snapshot { } pub fn extend<'a>(&mut self, entries: impl IntoIterator>) -> &mut Self { - todo!() + for entry in entries { + let old_email: EncodedStringRef<'_> = entry.old_email.into(); + assert!( + entry.new_name.is_some() || entry.new_email.is_some(), + "BUG: encountered entry without any mapped/new name or email." + ); + match self + .entries_by_old_email + .binary_search_by(|e| e.old_email.cmp_ref(old_email)) + { + Ok(pos) => self.entries_by_old_email[pos].merge(entry), + Err(insert_pos) => { + self.entries_by_old_email.insert(insert_pos, entry.into()); + } + }; + } + self } pub fn try_resolve(&self, signature: &git_actor::SignatureRef<'_>) -> Option { - todo!() + let email: EncodedStringRef<'_> = signature.email.into(); + let pos = self + .entries_by_old_email + .binary_search_by(|e| e.old_email.cmp_ref(email)) + .ok()?; + let entry = &self.entries_by_old_email[pos]; + + let name: EncodedStringRef<'_> = signature.name.into(); + + match entry.entries_by_old_name.binary_search_by(|e| e.old_name.cmp_ref(name)) { + Ok(pos) => { + let entry = &entry.entries_by_old_name[pos]; + enriched_signature(signature, entry.new_email.as_ref(), entry.new_name.as_ref()) + } + Err(_) => enriched_signature(signature, entry.new_email.as_ref(), entry.new_name.as_ref()), + } } pub fn resolve(&self, signature: &git_actor::SignatureRef<'_>) -> git_actor::Signature { @@ -100,30 +202,69 @@ impl Snapshot { } } +fn enriched_signature( + SignatureRef { name, email, time }: &SignatureRef<'_>, + new_email: Option<&BString>, + new_name: Option<&BString>, +) -> Option { + let time = time.clone(); + match (new_email, new_name) { + (Some(new_email), Some(new_name)) => git_actor::Signature { + email: new_email.to_owned(), + name: new_name.to_owned(), + time, + } + .into(), + (Some(new_email), None) => git_actor::Signature { + email: new_email.to_owned(), + name: (*name).to_owned(), + time, + } + .into(), + (None, Some(new_name)) => git_actor::Signature { + email: (*email).to_owned(), + name: new_name.to_owned(), + time, + } + .into(), + (None, None) => None, + } +} + #[cfg(test)] mod encoded_string { - use crate::snapshot::EncodedString; + use crate::snapshot::{EncodedString, EncodedStringRef}; + use std::cmp::Ordering; #[test] fn basic_ascii_case_folding() { assert_eq!( - EncodedString::Utf8("FooBar".into()), - EncodedString::Utf8("foobar".into()) - ) + EncodedString::Utf8("FooBar".into()).cmp_ref(EncodedStringRef::Utf8("foobar".into())), + Ordering::Equal + ); } #[test] fn no_advanced_unicode_folding() { - assert_ne!(EncodedString::Utf8("Masse".into()), EncodedString::Utf8("Maße".into())) + assert_ne!( + EncodedString::Utf8("Masse".into()).cmp_ref(EncodedStringRef::Utf8("Maße".into())), + Ordering::Equal + ); } #[test] fn unknown_encoding_pairs_do_not_try_to_ignore_cases() { - assert_ne!(EncodedString::Utf8("Foo".into()), EncodedString::Unknown("foo".into())); - assert_ne!(EncodedString::Unknown("Foo".into()), EncodedString::Utf8("foo".into())); assert_ne!( - EncodedString::Unknown("Foo".into()), - EncodedString::Unknown("foo".into()) + EncodedString::Utf8("Foo".into()).cmp_ref(EncodedStringRef::Unknown("foo".into())), + Ordering::Equal + ); + assert_ne!( + EncodedString::Unknown("Foo".into()).cmp_ref(EncodedStringRef::Utf8("foo".into())), + Ordering::Equal + ); + assert_ne!( + EncodedString::Unknown("Foo".into()).cmp_ref(EncodedStringRef::Unknown("foo".into())), + Ordering::Equal ); } } diff --git a/git-mailmap/tests/snapshot/mod.rs b/git-mailmap/tests/snapshot/mod.rs index 243a271e92a..fb96f9ce5d3 100644 --- a/git-mailmap/tests/snapshot/mod.rs +++ b/git-mailmap/tests/snapshot/mod.rs @@ -2,15 +2,63 @@ use git_mailmap::Snapshot; use git_testtools::fixture_bytes; #[test] -#[ignore] fn try_resolve() { let snapshot = snapshot(); - let sig = signature("Foo", "Joe@example.com"); assert_eq!( - snapshot.try_resolve(&sig.to_ref()), + snapshot.try_resolve(&signature("Foo", "Joe@example.com").to_ref()), Some(signature("Joe R. Developer", "Joe@example.com")), "resolved signatures contain all original fields, but normalizes only what's in the mapping, lookup is case-insensitive" ); + assert_eq!( + snapshot.try_resolve(&signature("Joe", "bugs@example.com").to_ref()), + Some(signature("Joe R. Developer", "joe@example.com")), + "name and email can be mapped specifically" + ); + + assert_eq!( + snapshot.try_resolve(&signature("Jane", "jane@laptop.(none)").to_ref()), + Some(signature("Jane Doe", "jane@example.com")), + "fix name and email by email" + ); + assert_eq!( + snapshot.try_resolve(&signature("Jane", "jane@desktop.(none)").to_ref()), + Some(signature("Jane Doe", "jane@example.com")), + "fix name and email by email" + ); + + assert_eq!( + snapshot.try_resolve(&signature("janE", "Bugs@example.com").to_ref()), + Some(signature("Jane Doe", "jane@example.com")), + "name and email can be mapped specifically, case insensitive matching of name" + ); + + let sig = signature("Jane", "other@example.com"); + assert_eq!(snapshot.try_resolve(&sig.to_ref()), None, "unmatched email"); + + assert_eq!( + snapshot.resolve(&sig.to_ref()), + sig, + "resolution always works here, returning a copy of the original" + ); + + let sig = signature("Jean", "bugs@example.com"); + assert_eq!( + snapshot.try_resolve(&sig.to_ref()), + None, + "matched email, unmatched name" + ); + assert_eq!(snapshot.resolve(&sig.to_ref()), sig); +} + +#[test] +#[ignore] +fn non_name_and_name_mappings_will_not_clash() { + // add mapping from email + // add mapping from name and email + // both should be accessible + + // the same the other way around + todo!() } fn snapshot() -> Snapshot { From 1038dab842b32ec1359a53236b241a91427ccb65 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 28 Mar 2022 17:56:23 +0800 Subject: [PATCH 19/31] thanks clippy --- git-mailmap/src/snapshot.rs | 12 ++++++------ tests/tools/src/lib.rs | 5 ++++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/git-mailmap/src/snapshot.rs b/git-mailmap/src/snapshot.rs index ab543cd7be9..e19dfba59d5 100644 --- a/git-mailmap/src/snapshot.rs +++ b/git-mailmap/src/snapshot.rs @@ -154,11 +154,11 @@ impl Snapshot { pub fn new<'a>(entries: impl IntoIterator>) -> Self { let mut snapshot = Self::default(); - snapshot.extend(entries); + snapshot.merge(entries); snapshot } - pub fn extend<'a>(&mut self, entries: impl IntoIterator>) -> &mut Self { + pub fn merge<'a>(&mut self, entries: impl IntoIterator>) -> &mut Self { for entry in entries { let old_email: EncodedStringRef<'_> = entry.old_email.into(); assert!( @@ -207,7 +207,7 @@ fn enriched_signature( new_email: Option<&BString>, new_name: Option<&BString>, ) -> Option { - let time = time.clone(); + let time = *time; match (new_email, new_name) { (Some(new_email), Some(new_name)) => git_actor::Signature { email: new_email.to_owned(), @@ -239,7 +239,7 @@ mod encoded_string { #[test] fn basic_ascii_case_folding() { assert_eq!( - EncodedString::Utf8("FooBar".into()).cmp_ref(EncodedStringRef::Utf8("foobar".into())), + EncodedString::Utf8("FooBar".into()).cmp_ref(EncodedStringRef::Utf8("foobar")), Ordering::Equal ); } @@ -247,7 +247,7 @@ mod encoded_string { #[test] fn no_advanced_unicode_folding() { assert_ne!( - EncodedString::Utf8("Masse".into()).cmp_ref(EncodedStringRef::Utf8("Maße".into())), + EncodedString::Utf8("Masse".into()).cmp_ref(EncodedStringRef::Utf8("Maße")), Ordering::Equal ); } @@ -259,7 +259,7 @@ mod encoded_string { Ordering::Equal ); assert_ne!( - EncodedString::Unknown("Foo".into()).cmp_ref(EncodedStringRef::Utf8("foo".into())), + EncodedString::Unknown("Foo".into()).cmp_ref(EncodedStringRef::Utf8("foo")), Ordering::Equal ); assert_ne!( diff --git a/tests/tools/src/lib.rs b/tests/tools/src/lib.rs index a880da99cc6..f39132cacad 100644 --- a/tests/tools/src/lib.rs +++ b/tests/tools/src/lib.rs @@ -27,7 +27,10 @@ pub fn fixture_path(path: impl AsRef) -> PathBuf { PathBuf::from("tests").join("fixtures").join(path.as_ref()) } pub fn fixture_bytes(path: impl AsRef) -> Vec { - std::fs::read(fixture_path(path.as_ref())).expect(&format!("File at '{}' not found", path.as_ref().display())) + match std::fs::read(fixture_path(path.as_ref())) { + Ok(res) => res, + Err(_) => panic!("File at '{}' not found", path.as_ref().display()), + } } pub fn scripted_fixture_repo_read_only( script_name: impl AsRef, From 87fb932239e5f5a55046f8edb073373cd4957422 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 28 Mar 2022 19:57:45 +0800 Subject: [PATCH 20/31] Add method to return borrowed values for new name/email (#366) That way, one _can_ optimize for performance and be zero copy after the mail map has been created. Maybe this can be useful in the estimate-hours program. --- git-mailmap/src/lib.rs | 2 +- git-mailmap/src/snapshot.rs | 39 ++++++++++++++++++++++++------- git-mailmap/tests/snapshot/mod.rs | 2 +- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs index fad7f22a5de..d97c8c3d925 100644 --- a/git-mailmap/src/lib.rs +++ b/git-mailmap/src/lib.rs @@ -14,7 +14,7 @@ pub fn parse_ignore_errors(buf: &[u8]) -> impl Iterator> { mod entry; -mod snapshot; +pub mod snapshot; #[derive(Default, Clone)] pub struct Snapshot { diff --git a/git-mailmap/src/snapshot.rs b/git-mailmap/src/snapshot.rs index e19dfba59d5..57472962421 100644 --- a/git-mailmap/src/snapshot.rs +++ b/git-mailmap/src/snapshot.rs @@ -1,9 +1,26 @@ use crate::Snapshot; use bstr::{BStr, BString, ByteSlice}; -use git_actor::{Signature, SignatureRef}; +use git_actor::SignatureRef; use std::cmp::Ordering; use std::ops::Deref; +pub struct ResolvedSignature<'a> { + pub name: Option<&'a BStr>, + pub email: Option<&'a BStr>, +} + +impl<'a> ResolvedSignature<'a> { + fn try_new(new_email: Option<&'a BString>, new_name: Option<&'a BString>) -> Option { + match (new_email, new_name) { + (None, None) => None, + (new_email, new_name) => Some(ResolvedSignature { + email: new_email.map(|v| v.as_bstr()), + name: new_name.map(|v| v.as_bstr()), + }), + } + } +} + #[cfg_attr(test, derive(Debug))] #[derive(Clone)] enum EncodedString { @@ -178,7 +195,7 @@ impl Snapshot { self } - pub fn try_resolve(&self, signature: &git_actor::SignatureRef<'_>) -> Option { + pub fn try_resolve_ref<'a>(&'a self, signature: &git_actor::SignatureRef<'_>) -> Option> { let email: EncodedStringRef<'_> = signature.email.into(); let pos = self .entries_by_old_email @@ -191,12 +208,17 @@ impl Snapshot { match entry.entries_by_old_name.binary_search_by(|e| e.old_name.cmp_ref(name)) { Ok(pos) => { let entry = &entry.entries_by_old_name[pos]; - enriched_signature(signature, entry.new_email.as_ref(), entry.new_name.as_ref()) + ResolvedSignature::try_new(entry.new_email.as_ref(), entry.new_name.as_ref()) } - Err(_) => enriched_signature(signature, entry.new_email.as_ref(), entry.new_name.as_ref()), + Err(_) => ResolvedSignature::try_new(entry.new_email.as_ref(), entry.new_name.as_ref()), } } + pub fn try_resolve(&self, signature: &git_actor::SignatureRef<'_>) -> Option { + let new = self.try_resolve_ref(signature)?; + enriched_signature(signature, new) + } + pub fn resolve(&self, signature: &git_actor::SignatureRef<'_>) -> git_actor::Signature { self.try_resolve(signature).unwrap_or_else(|| signature.to_owned()) } @@ -204,11 +226,10 @@ impl Snapshot { fn enriched_signature( SignatureRef { name, email, time }: &SignatureRef<'_>, - new_email: Option<&BString>, - new_name: Option<&BString>, -) -> Option { + new: ResolvedSignature<'_>, +) -> Option { let time = *time; - match (new_email, new_name) { + match (new.email, new.name) { (Some(new_email), Some(new_name)) => git_actor::Signature { email: new_email.to_owned(), name: new_name.to_owned(), @@ -227,7 +248,7 @@ fn enriched_signature( time, } .into(), - (None, None) => None, + (None, None) => unreachable!("BUG: ResolvedSignatures don't exist here when nothing is set"), } } diff --git a/git-mailmap/tests/snapshot/mod.rs b/git-mailmap/tests/snapshot/mod.rs index fb96f9ce5d3..57f4454bd06 100644 --- a/git-mailmap/tests/snapshot/mod.rs +++ b/git-mailmap/tests/snapshot/mod.rs @@ -23,7 +23,7 @@ fn try_resolve() { assert_eq!( snapshot.try_resolve(&signature("Jane", "jane@desktop.(none)").to_ref()), Some(signature("Jane Doe", "jane@example.com")), - "fix name and email by email" + "fix name and email by other email" ); assert_eq!( From deaeb7d1730d90d06789c47adad0e25f9b74fd13 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 28 Mar 2022 20:14:11 +0800 Subject: [PATCH 21/31] Another test to validate correct merging and overwriting (#366) --- git-mailmap/src/lib.rs | 8 ++-- git-mailmap/tests/fixtures/overwrite.txt | 11 +++++ git-mailmap/tests/snapshot/mod.rs | 57 +++++++++++++++++++++--- 3 files changed, 66 insertions(+), 10 deletions(-) create mode 100644 git-mailmap/tests/fixtures/overwrite.txt diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs index d97c8c3d925..694869e94fe 100644 --- a/git-mailmap/src/lib.rs +++ b/git-mailmap/src/lib.rs @@ -27,11 +27,11 @@ pub struct Snapshot { pub struct Entry<'a> { #[cfg_attr(feature = "serde1", serde(borrow))] /// The name to map to. - new_name: Option<&'a BStr>, + pub new_name: Option<&'a BStr>, /// The email map to. - new_email: Option<&'a BStr>, + pub new_email: Option<&'a BStr>, /// The name to look for and replace. - old_name: Option<&'a BStr>, + pub old_name: Option<&'a BStr>, /// The email to look for and replace. - old_email: &'a BStr, + pub old_email: &'a BStr, } diff --git a/git-mailmap/tests/fixtures/overwrite.txt b/git-mailmap/tests/fixtures/overwrite.txt new file mode 100644 index 00000000000..aaaf9ffc0d9 --- /dev/null +++ b/git-mailmap/tests/fixtures/overwrite.txt @@ -0,0 +1,11 @@ +# overwrite previously seen values + +A +A-overwritten + +B +B-overwritten + +C old-C +C-overwritten old-C + diff --git a/git-mailmap/tests/snapshot/mod.rs b/git-mailmap/tests/snapshot/mod.rs index 57f4454bd06..42a3838d45d 100644 --- a/git-mailmap/tests/snapshot/mod.rs +++ b/git-mailmap/tests/snapshot/mod.rs @@ -51,14 +51,59 @@ fn try_resolve() { } #[test] -#[ignore] fn non_name_and_name_mappings_will_not_clash() { - // add mapping from email - // add mapping from name and email - // both should be accessible + let entries = vec![ + // add mapping from email + git_mailmap::Entry { + new_name: Some("new-name".into()), + new_email: Some("new-email".into()), + old_name: None, + old_email: "old-email".into(), + }, + // add mapping from name and email + git_mailmap::Entry { + new_name: Some("other-new-name".into()), + new_email: Some("other-new-email".into()), + old_name: Some("old-name".into()), + old_email: "old-email".into(), + }, + ]; + for entries in vec![entries.clone().into_iter().rev().collect::>(), entries] { + let snapshot = Snapshot::new(entries); + + assert_eq!( + snapshot.try_resolve(&signature("replace-by-email", "old-email").to_ref()), + Some(signature("new-name", "new-email")), + "it can match by email only" + ); + assert_eq!( + snapshot.try_resolve(&signature("old-name", "old-email").to_ref()), + Some(signature("other-new-name", "other-new-email")), + "it can match by email and name as well" + ); + } +} + +#[test] +fn overwrite_entries() { + let snapshot = Snapshot::from_bytes(&fixture_bytes("overwrite.txt")); + assert_eq!( + snapshot.try_resolve(&signature("does not matter", "old-a-email").to_ref()), + Some(signature("A-overwritten", "old-a-email")), + "email only by email" + ); - // the same the other way around - todo!() + assert_eq!( + snapshot.try_resolve(&signature("to be replaced", "old-b-EMAIL").to_ref()), + Some(signature("B-overwritten", "new-b-email-overwritten")), + "name and email by email" + ); + + assert_eq!( + snapshot.try_resolve(&signature("old-c", "old-C-email").to_ref()), + Some(signature("C-overwritten", "new-c-email-overwritten")), + "name and email by name and email" + ); } fn snapshot() -> Snapshot { From 1c768a5511ba4e2f7f860b48429ddd3930a6b501 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 28 Mar 2022 21:01:32 +0800 Subject: [PATCH 22/31] add all docs (#366) --- git-mailmap/src/entry.rs | 4 ++++ git-mailmap/src/lib.rs | 30 ++++++++++++++++++++---- git-mailmap/src/parse.rs | 3 +++ git-mailmap/src/snapshot.rs | 23 ++++++++++++++++++ git-mailmap/tests/fixtures/overwrite.txt | 2 ++ git-mailmap/tests/snapshot/mod.rs | 25 ++++++++++---------- 6 files changed, 70 insertions(+), 17 deletions(-) diff --git a/git-mailmap/src/entry.rs b/git-mailmap/src/entry.rs index f956160a2fe..667cd3c79f6 100644 --- a/git-mailmap/src/entry.rs +++ b/git-mailmap/src/entry.rs @@ -1,6 +1,10 @@ use crate::Entry; use bstr::BStr; +/// Constructors indicating what kind of mapping is created. +/// +/// Only these combinations of values are valid. +#[allow(missing_docs)] impl<'a> Entry<'a> { pub fn change_name_by_email(proper_name: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { Entry { diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs index 694869e94fe..c471b5b10bf 100644 --- a/git-mailmap/src/lib.rs +++ b/git-mailmap/src/lib.rs @@ -1,37 +1,57 @@ #![forbid(unsafe_code)] -#![deny(rust_2018_idioms)] +#![deny(rust_2018_idioms, missing_docs)] +//! [Parse][parse()] .mailmap files as used in git repositories and remap names and emails +//! using an [accelerated data-structure][Snapshot]. use bstr::BStr; +/// pub mod parse; + +/// Parse the given `buf` of bytes line by line into mapping [Entries][Entry]. +/// +/// Errors may occour per line, but it's up to the caller to stop iteration when +/// one is encountered. pub fn parse(buf: &[u8]) -> parse::Lines<'_> { parse::Lines::new(buf) } +/// Similar to [parse()], but will skip all lines that didn't parse correctly, silently squelching all errors. pub fn parse_ignore_errors(buf: &[u8]) -> impl Iterator> { parse(buf).filter_map(Result::ok) } mod entry; +/// pub mod snapshot; +/// A data-structure to efficiently store a list of entries for optimal, case-insensitive lookup by email and +/// optionally name to find mappings to new names and/or emails. +/// +/// The memory layout is efficient, even though lots of small allocations are performed to store strings of emails and names. #[derive(Default, Clone)] pub struct Snapshot { /// Sorted by `old_email` entries_by_old_email: Vec, } +/// An typical entry of a mailmap, which always contains an `old_email` by which +/// the mapping is performed to replace the given `new_name` and `new_email`. +/// +/// Optionally, `old_name` is also used for lookup. +/// +/// Typically created by [parse()]. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy, Default)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Entry<'a> { #[cfg_attr(feature = "serde1", serde(borrow))] /// The name to map to. - pub new_name: Option<&'a BStr>, + pub(crate) new_name: Option<&'a BStr>, /// The email map to. - pub new_email: Option<&'a BStr>, + pub(crate) new_email: Option<&'a BStr>, /// The name to look for and replace. - pub old_name: Option<&'a BStr>, + pub(crate) old_name: Option<&'a BStr>, /// The email to look for and replace. - pub old_email: &'a BStr, + pub(crate) old_email: &'a BStr, } diff --git a/git-mailmap/src/parse.rs b/git-mailmap/src/parse.rs index e90968d346c..a0ba7d0258d 100644 --- a/git-mailmap/src/parse.rs +++ b/git-mailmap/src/parse.rs @@ -3,7 +3,9 @@ mod error { use quick_error::quick_error; quick_error! { + /// The error returned by [`parse()`][crate::parse()]. #[derive(Debug)] + #[allow(missing_docs)] pub enum Error { UnconsumedInput { line_number: usize, line: BString } { display("Line {} has too many names or emails, or none at all: {}", line_number, line) @@ -19,6 +21,7 @@ use crate::Entry; use bstr::{BStr, ByteSlice}; pub use error::Error; +/// An iterator to parse mailmap lines on-demand. pub struct Lines<'a> { lines: bstr::Lines<'a>, line_no: usize, diff --git a/git-mailmap/src/snapshot.rs b/git-mailmap/src/snapshot.rs index 57472962421..f885d4e98a0 100644 --- a/git-mailmap/src/snapshot.rs +++ b/git-mailmap/src/snapshot.rs @@ -4,8 +4,11 @@ use git_actor::SignatureRef; use std::cmp::Ordering; use std::ops::Deref; +/// A resolved signature with borrowed fields for a mapped `name` and/or `email`. pub struct ResolvedSignature<'a> { + /// The mapped name. pub name: Option<&'a BStr>, + /// The mapped email. pub email: Option<&'a BStr>, } @@ -165,16 +168,24 @@ impl<'a> From> for EmailEntry { } impl Snapshot { + /// Create a new snapshot from the given bytes buffer, ignoring all parse errors that may occour on a line-by-line basis. + /// + /// This is similar to what git does. pub fn from_bytes(buf: &[u8]) -> Self { Self::new(crate::parse_ignore_errors(buf)) } + /// Create a new instance from `entries`. + /// + /// These can be obtained using [crate::parse()]. pub fn new<'a>(entries: impl IntoIterator>) -> Self { let mut snapshot = Self::default(); snapshot.merge(entries); snapshot } + /// Merge the given `entries` into this instance, possibly overwriting existing mappings with + /// new ones should they collide. pub fn merge<'a>(&mut self, entries: impl IntoIterator>) -> &mut Self { for entry in entries { let old_email: EncodedStringRef<'_> = entry.old_email.into(); @@ -195,6 +206,10 @@ impl Snapshot { self } + /// Try to resolve `signature` by its contained email and name and provide resolved/mapped names as reference. + /// Return `None` if no such mapping was found. + /// + /// This is the fastest possible lookup as there is no allocation. pub fn try_resolve_ref<'a>(&'a self, signature: &git_actor::SignatureRef<'_>) -> Option> { let email: EncodedStringRef<'_> = signature.email.into(); let pos = self @@ -214,11 +229,19 @@ impl Snapshot { } } + /// Try to resolve `signature` by its contained email and name and provide resolved/mapped names as owned signature, + /// with the mapped name and/or email replaced accordingly. + /// + /// Return `None` if no such mapping was found. pub fn try_resolve(&self, signature: &git_actor::SignatureRef<'_>) -> Option { let new = self.try_resolve_ref(signature)?; enriched_signature(signature, new) } + /// Like [`try_resolve()`][Snapshot::try_resolve()], but always returns an owned signature, which might be a copy + /// of `signature` if no mapping was found. + /// + /// Note that this method will always allocate. pub fn resolve(&self, signature: &git_actor::SignatureRef<'_>) -> git_actor::Signature { self.try_resolve(signature).unwrap_or_else(|| signature.to_owned()) } diff --git a/git-mailmap/tests/fixtures/overwrite.txt b/git-mailmap/tests/fixtures/overwrite.txt index aaaf9ffc0d9..5ab9312f2be 100644 --- a/git-mailmap/tests/fixtures/overwrite.txt +++ b/git-mailmap/tests/fixtures/overwrite.txt @@ -9,3 +9,5 @@ B-overwritten C old-C C-overwritten old-C + + diff --git a/git-mailmap/tests/snapshot/mod.rs b/git-mailmap/tests/snapshot/mod.rs index 42a3838d45d..bf814838c34 100644 --- a/git-mailmap/tests/snapshot/mod.rs +++ b/git-mailmap/tests/snapshot/mod.rs @@ -54,19 +54,14 @@ fn try_resolve() { fn non_name_and_name_mappings_will_not_clash() { let entries = vec![ // add mapping from email - git_mailmap::Entry { - new_name: Some("new-name".into()), - new_email: Some("new-email".into()), - old_name: None, - old_email: "old-email".into(), - }, + git_mailmap::Entry::change_name_and_email_by_email("new-name", "new-email", "old-email"), // add mapping from name and email - git_mailmap::Entry { - new_name: Some("other-new-name".into()), - new_email: Some("other-new-email".into()), - old_name: Some("old-name".into()), - old_email: "old-email".into(), - }, + git_mailmap::Entry::change_name_and_email_by_name_and_email( + "other-new-name", + "other-new-email", + "old-name", + "old-email", + ), ]; for entries in vec![entries.clone().into_iter().rev().collect::>(), entries] { let snapshot = Snapshot::new(entries); @@ -104,6 +99,12 @@ fn overwrite_entries() { Some(signature("C-overwritten", "new-c-email-overwritten")), "name and email by name and email" ); + + assert_eq!( + snapshot.try_resolve(&signature("unchanged", "old-d-email").to_ref()), + Some(signature("unchanged", "new-d-email-overwritten")), + "email by email" + ); } fn snapshot() -> Snapshot { From e31754d3d9c58f82ff4fad11ed7985dffc99b586 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 29 Mar 2022 10:11:17 +0800 Subject: [PATCH 23/31] fix email-only case (#366) --- git-mailmap/src/parse.rs | 8 +++++++- git-mailmap/tests/parse/mod.rs | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/git-mailmap/src/parse.rs b/git-mailmap/src/parse.rs index a0ba7d0258d..15209256b8a 100644 --- a/git-mailmap/src/parse.rs +++ b/git-mailmap/src/parse.rs @@ -77,7 +77,13 @@ fn parse_line(line: &BStr, line_number: usize) -> Result, Error> { (Some(proper_name), Some(proper_email), Some(commit_name), Some(commit_email)) => { Entry::change_name_and_email_by_name_and_email(proper_name, proper_email, commit_name, commit_email) } - _ => unreachable!("{:?}", line), + _ => { + return Err(Error::Malformed { + line_number, + line: line.into(), + message: "Emails without a name or email to map to are invalid".into(), + }) + } }) } diff --git a/git-mailmap/tests/parse/mod.rs b/git-mailmap/tests/parse/mod.rs index c911de76a4b..43b85030f38 100644 --- a/git-mailmap/tests/parse/mod.rs +++ b/git-mailmap/tests/parse/mod.rs @@ -65,6 +65,7 @@ fn valid_entries() { Entry::change_name_and_email_by_name_and_email("proper name", "proper email", "commit name", "commit-email") ); } + #[test] fn error_if_there_is_just_a_name() { assert!(matches!( @@ -73,6 +74,19 @@ fn error_if_there_is_just_a_name() { )); } +#[test] +fn error_if_there_is_just_an_email() { + assert!(matches!( + try_line(""), + Err(parse::Error::Malformed { line_number: 1, .. }) + )); + + assert!(matches!( + try_line(" \t "), + Err(parse::Error::Malformed { line_number: 1, .. }) + )); +} + #[test] fn error_if_email_is_empty() { assert!(matches!( From f498dacfc34c3d0b7c3bdbf100e456ad3ade419e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 29 Mar 2022 10:17:26 +0800 Subject: [PATCH 24/31] verify universal line-endings are supported (#366) --- git-mailmap/tests/parse/mod.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/git-mailmap/tests/parse/mod.rs b/git-mailmap/tests/parse/mod.rs index 43b85030f38..12166a2d615 100644 --- a/git-mailmap/tests/parse/mod.rs +++ b/git-mailmap/tests/parse/mod.rs @@ -46,6 +46,21 @@ fn empty_lines_and_comments_are_ignored() { ); } +#[test] +fn windows_and_unix_line_endings_are_supported() { + let actual = git_mailmap::parse(b"a \n\r\nc ") + .map(Result::unwrap) + .collect::>(); + assert_eq!( + actual, + vec![ + Entry::change_name_by_email("a", "a@example.com"), + Entry::change_email_by_email("b-new", "b-old"), + Entry::change_name_by_email("c", "c@example.com") + ] + ); +} + #[test] fn valid_entries() { assert_eq!( From e3bc1b410409a9e27894a5cac48b06d8c3295e36 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 29 Mar 2022 11:01:48 +0800 Subject: [PATCH 25/31] feat: unstable mailmap module (#366) --- Cargo.lock | 1 + git-repository/Cargo.toml | 3 ++- git-repository/src/lib.rs | 3 +++ gitoxide-core/src/mailmap.rs | 0 4 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 gitoxide-core/src/mailmap.rs diff --git a/Cargo.lock b/Cargo.lock index f9564ce5837..ca37122dca9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1386,6 +1386,7 @@ dependencies = [ "git-hash", "git-index", "git-lock", + "git-mailmap", "git-object", "git-odb", "git-pack", diff --git a/git-repository/Cargo.toml b/git-repository/Cargo.toml index 58a924766f8..4453240d713 100644 --- a/git-repository/Cargo.toml +++ b/git-repository/Cargo.toml @@ -49,7 +49,7 @@ max-performance = ["git-features/parallel", "git-features/zlib-ng-compat", "git- local-time-support = ["git-actor/local-time-support"] ## Re-export stability tier 2 crates for convenience and make `Repository` struct fields with types from these crates publicly accessible. ## Doing so is less stable than the stability tier 1 that `git-repository` is a member of. -unstable = ["git-index", "git-worktree"] +unstable = ["git-index", "git-worktree", "git-mailmap"] ## Print debugging information about usage of object database caches, useful for tuning cache sizes. cache-efficiency-debug = ["git-features/cache-efficiency-debug"] @@ -73,6 +73,7 @@ git-traverse = { version = "^0.12.0", path = "../git-traverse" } git-protocol = { version = "^0.14.0", path = "../git-protocol", optional = true } git-transport = { version = "^0.15.0", path = "../git-transport", optional = true } git-diff = { version = "^0.13.0", path = "../git-diff", optional = true } +git-mailmap = { version = "0.0.0", path = "../git-mailmap", optional = true } git-features = { version = "^0.19.1", path = "../git-features", features = ["progress"] } # unstable only diff --git a/git-repository/src/lib.rs b/git-repository/src/lib.rs index 5fc2ef6fb51..37d68c22119 100644 --- a/git-repository/src/lib.rs +++ b/git-repository/src/lib.rs @@ -89,6 +89,7 @@ //! * [`bstr`][bstr] //! * [`index`] //! * [`worktree`] +//! * [`mailmap`] //! * [`objs`] //! * [`odb`] //! * [`pack`][odb::pack] @@ -129,6 +130,8 @@ pub use git_hash as hash; #[cfg(all(feature = "unstable", feature = "git-index"))] pub use git_index as index; pub use git_lock as lock; +#[cfg(all(feature = "unstable", feature = "git-worktree"))] +pub use git_mailmap as mailmap; pub use git_object as objs; pub use git_object::bstr; #[cfg(feature = "unstable")] diff --git a/gitoxide-core/src/mailmap.rs b/gitoxide-core/src/mailmap.rs new file mode 100644 index 00000000000..e69de29bb2d From 384ed665c7423feca1b1ee1f81db10867fa813a8 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 29 Mar 2022 11:02:10 +0800 Subject: [PATCH 26/31] feat: `gix mailmap verify` command (#366) --- gitoxide-core/src/lib.rs | 1 + gitoxide-core/src/mailmap.rs | 26 ++++++++++++++++++++++++++ src/plumbing/main.rs | 11 +++++++++++ src/plumbing/options.rs | 24 ++++++++++++++++++++++++ 4 files changed, 62 insertions(+) diff --git a/gitoxide-core/src/lib.rs b/gitoxide-core/src/lib.rs index ed27b601f77..30af836bfce 100644 --- a/gitoxide-core/src/lib.rs +++ b/gitoxide-core/src/lib.rs @@ -46,6 +46,7 @@ pub mod commitgraph; #[cfg(feature = "estimate-hours")] pub mod hours; pub mod index; +pub mod mailmap; #[cfg(feature = "organize")] pub mod organize; pub mod pack; diff --git a/gitoxide-core/src/mailmap.rs b/gitoxide-core/src/mailmap.rs index e69de29bb2d..04d985853c8 100644 --- a/gitoxide-core/src/mailmap.rs +++ b/gitoxide-core/src/mailmap.rs @@ -0,0 +1,26 @@ +use crate::OutputFormat; +use anyhow::{bail, Context}; +use git_repository as git; +use std::io::Write; +use std::path::Path; + +pub const PROGRESS_RANGE: std::ops::RangeInclusive = 1..=2; + +pub fn verify(path: impl AsRef, format: OutputFormat, mut out: impl Write) -> anyhow::Result<()> { + if format != OutputFormat::Human { + bail!("Only 'human' format is currently supported"); + } + let path = path.as_ref(); + let buf = std::fs::read(path).with_context(|| format!("Failed to read mailmap file at '{}'", path.display()))?; + let mut err_count = 0; + for err in git::mailmap::parse(&buf).filter_map(Result::err) { + err_count += 1; + writeln!(out, "{}", err)?; + } + if err_count == 0 { + writeln!(out, "{} lines OK", git::mailmap::parse(&buf).count())?; + Ok(()) + } else { + bail!("{} lines in '{}' could not be parsed", err_count, path.display()); + } +} diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index dd72bd6f48c..11b47ccf4e0 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -12,6 +12,7 @@ use clap::Parser; use gitoxide_core as core; use gitoxide_core::pack::verify; +use crate::plumbing::options::mailmap; use crate::plumbing::options::pack::multi_index; #[cfg(any(feature = "gitoxide-core-async-client", feature = "gitoxide-core-blocking-client"))] use crate::plumbing::options::remote; @@ -74,6 +75,16 @@ pub fn main() -> Result<()> { })?; match cmd { + Subcommands::Mailmap(mailmap::Platform { path, cmd }) => match cmd { + mailmap::Subcommands::Verify => prepare_and_run( + "mailmap-verify", + verbose, + progress, + progress_keep_open, + core::mailmap::PROGRESS_RANGE, + move |_progress, out, _err| core::mailmap::verify(path, format, out), + ), + }, Subcommands::Index(index::Platform { object_hash, index_path, diff --git a/src/plumbing/options.rs b/src/plumbing/options.rs index 4a75745dcf6..37290700461 100644 --- a/src/plumbing/options.rs +++ b/src/plumbing/options.rs @@ -60,6 +60,8 @@ pub enum Subcommands { Index(index::Platform), /// Subcommands for interacting with entire git repositories Repository(repo::Platform), + /// Subcommands for interacting with mailmaps + Mailmap(mailmap::Platform), } /// @@ -448,6 +450,28 @@ pub mod index { } } +/// +pub mod mailmap { + use std::path::PathBuf; + + #[derive(Debug, clap::Parser)] + pub struct Platform { + /// The path to the mailmap file. + #[clap(short = 'p', long, default_value = ".mailmap")] + pub path: PathBuf, + + /// Subcommands + #[clap(subcommand)] + pub cmd: Subcommands, + } + + #[derive(Debug, clap::Subcommand)] + pub enum Subcommands { + /// Parse all entries in the mailmap and report malformed lines. + Verify, + } +} + /// pub mod commitgraph { use std::path::PathBuf; From f89fe2f867fa792db5d9e003ce342a337a6ac973 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 29 Mar 2022 11:10:22 +0800 Subject: [PATCH 27/31] gix mailmap verify can now detect collisions (#366) --- git-mailmap/src/entry.rs | 20 ++++++++++++++++++++ gitoxide-core/src/mailmap.rs | 16 ++++++++++++++++ src/plumbing/options.rs | 2 +- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/git-mailmap/src/entry.rs b/git-mailmap/src/entry.rs index 667cd3c79f6..7b2d43963f1 100644 --- a/git-mailmap/src/entry.rs +++ b/git-mailmap/src/entry.rs @@ -1,6 +1,26 @@ use crate::Entry; use bstr::BStr; +/// Acccess +impl<'a> Entry<'a> { + /// The name to map to. + pub fn new_name(&self) -> Option<&'a BStr> { + self.new_name + } + /// The email map to. + pub fn new_email(&self) -> Option<&'a BStr> { + self.new_email + } + /// The name to look for and replace. + pub fn old_name(&self) -> Option<&'a BStr> { + self.old_name + } + /// The email to look for and replace. + pub fn old_email(&self) -> &'a BStr { + self.old_email + } +} + /// Constructors indicating what kind of mapping is created. /// /// Only these combinations of values are valid. diff --git a/gitoxide-core/src/mailmap.rs b/gitoxide-core/src/mailmap.rs index 04d985853c8..95c6abccbb7 100644 --- a/gitoxide-core/src/mailmap.rs +++ b/gitoxide-core/src/mailmap.rs @@ -1,6 +1,7 @@ use crate::OutputFormat; use anyhow::{bail, Context}; use git_repository as git; +use std::collections::HashSet; use std::io::Write; use std::path::Path; @@ -17,6 +18,21 @@ pub fn verify(path: impl AsRef, format: OutputFormat, mut out: impl Write) err_count += 1; writeln!(out, "{}", err)?; } + + let mut seen = HashSet::<(_, _)>::default(); + for entry in git::mailmap::parse(&buf).filter_map(Result::ok) { + if !seen.insert((entry.old_email(), entry.old_name())) { + writeln!( + out, + "NOTE: entry ({:?}, {:?}) -> ({:?}, {:?}) is being overwritten", + entry.old_email(), + entry.old_name(), + entry.new_email(), + entry.new_name() + )?; + } + } + if err_count == 0 { writeln!(out, "{} lines OK", git::mailmap::parse(&buf).count())?; Ok(()) diff --git a/src/plumbing/options.rs b/src/plumbing/options.rs index 37290700461..f345bfcc5ca 100644 --- a/src/plumbing/options.rs +++ b/src/plumbing/options.rs @@ -467,7 +467,7 @@ pub mod mailmap { #[derive(Debug, clap::Subcommand)] pub enum Subcommands { - /// Parse all entries in the mailmap and report malformed lines. + /// Parse all entries in the mailmap and report malformed lines or collisions. Verify, } } From c8c87ec12f7ff5061132f9e67828d59ac51a8043 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 29 Mar 2022 12:07:45 +0800 Subject: [PATCH 28/31] frame for `Repository::load_mailmap_into()` (#366) What's new is that it is made to be very resilient to errors and does as much as possible. --- git-repository/src/lib.rs | 23 ++++++- git-repository/src/repository/location.rs | 24 ++----- git-repository/src/repository/mod.rs | 2 + git-repository/src/repository/snapshots.rs | 77 ++++++++++++++++++++++ 4 files changed, 105 insertions(+), 21 deletions(-) create mode 100644 git-repository/src/repository/snapshots.rs diff --git a/git-repository/src/lib.rs b/git-repository/src/lib.rs index 37d68c22119..106c2ce79fc 100644 --- a/git-repository/src/lib.rs +++ b/git-repository/src/lib.rs @@ -130,8 +130,6 @@ pub use git_hash as hash; #[cfg(all(feature = "unstable", feature = "git-index"))] pub use git_index as index; pub use git_lock as lock; -#[cfg(all(feature = "unstable", feature = "git-worktree"))] -pub use git_mailmap as mailmap; pub use git_object as objs; pub use git_object::bstr; #[cfg(feature = "unstable")] @@ -242,6 +240,27 @@ mod config { } } +/// +pub mod mailmap { + #[cfg(all(feature = "unstable", feature = "git-worktree"))] + pub use git_mailmap::*; + + /// + pub mod load { + /// The error returned by [`crate::Repository::load_mailmap()`]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("A mailmap file could not be loaded from disk")] + Io(#[from] std::io::Error), + #[error("The configured mailmap.blob could not be parsed")] + BlobSpec(#[from] git_hash::decode::Error), + #[error(transparent)] + PathInterpolate(#[from] git_config::values::path::interpolate::Error), + } + } +} + /// pub mod init { use std::{convert::TryInto, path::Path}; diff --git a/git-repository/src/repository/location.rs b/git-repository/src/repository/location.rs index d00c2051ef5..0d05f779773 100644 --- a/git-repository/src/repository/location.rs +++ b/git-repository/src/repository/location.rs @@ -9,6 +9,11 @@ impl crate::Repository { self.work_tree.as_deref() } + /// The directory of the binary path of the current process. + pub fn install_directory(&self) -> std::io::Result { + std::env::current_exe() + } + /// Return the kind of repository, either bare or one with a work tree. pub fn kind(&self) -> crate::Kind { match self.work_tree { @@ -23,23 +28,4 @@ impl crate::Repository { pub fn git_dir(&self) -> &std::path::Path { self.refs.base() } - - // TODO: tests - /// Load the index file of this repository's workspace, if present. - /// - /// Note that it is loaded into memory each time this method is called, but also is independent of the workspace. - #[cfg(feature = "git-index")] - pub fn load_index(&self) -> Option> { - // TODO: choose better/correct options - let opts = git_index::decode::Options { - object_hash: self.object_hash, - thread_limit: None, - min_extension_block_in_bytes_for_threading: 1024 * 256, - }; - match git_index::File::at(self.git_dir().join("index"), opts) { - Ok(index) => Some(Ok(index)), - Err(git_index::file::init::Error::Io(err)) if err.kind() == std::io::ErrorKind::NotFound => None, - Err(err) => Some(Err(err)), - } - } } diff --git a/git-repository/src/repository/mod.rs b/git-repository/src/repository/mod.rs index 7a2253d912a..e13f2483387 100644 --- a/git-repository/src/repository/mod.rs +++ b/git-repository/src/repository/mod.rs @@ -78,6 +78,8 @@ mod init { mod location; +mod snapshots; + mod trait_impls; mod cache; diff --git a/git-repository/src/repository/snapshots.rs b/git-repository/src/repository/snapshots.rs new file mode 100644 index 00000000000..2aa5782051b --- /dev/null +++ b/git-repository/src/repository/snapshots.rs @@ -0,0 +1,77 @@ +impl crate::Repository { + // TODO: tests + /// Load the index file of this repository's workspace, if present. + /// + /// Note that it is loaded into memory each time this method is called, but also is independent of the workspace. + #[cfg(feature = "git-index")] + pub fn load_index(&self) -> Option> { + // TODO: choose better/correct options + let opts = git_index::decode::Options { + object_hash: self.object_hash, + thread_limit: None, + min_extension_block_in_bytes_for_threading: 1024 * 256, + }; + match git_index::File::at(self.git_dir().join("index"), opts) { + Ok(index) => Some(Ok(index)), + Err(git_index::file::init::Error::Io(err)) if err.kind() == std::io::ErrorKind::NotFound => None, + Err(err) => Some(Err(err)), + } + } + + // TODO: tests + /// Try to merge mailmaps from the following locations into `target`: + /// + /// - read `HEAD:.mailmap` if this repository is bare (i.e. has no working tree), if the blob is not configured. + /// - OR read the `.mailmap` file without following symlinks from the working tree, if present + /// - AND read the mailmap as configured in `mailmap.blob`, if set. + /// - AND read the file as configured by `mailmap.file` if set. + /// + /// Only the first error will be reported, and as many source mailmaps will be merged into `target` as possible. + /// Parsing errors will be ignored. + #[cfg(feature = "git-mailmap")] + pub fn load_mailmap_into(&self, target: &mut git_mailmap::Snapshot) -> Result<(), crate::mailmap::load::Error> { + let mut err = None::; + self.config + .get_raw_value("mailmap", None, "blob") + .ok() + .and_then(|spec| { + // TODO: actually resolve the spec when available + match git_hash::ObjectId::from_hex(spec.as_ref()) { + Ok(id) => Some(id), + Err(e) => { + err.get_or_insert(e.into()); + None + } + } + }); + match self.work_tree() { + None => { + todo!("read blob") + } + Some(root) => { + todo!("read mailmap file in tree") + } + } + + let configured_path = self + .config + .value::>("mailmap", None, "file") + .ok() + .and_then(|path| { + let install_dir = self.install_directory().ok()?; + match path.interpolate(Some(install_dir.as_path())) { + Ok(path) => Some(path), + Err(e) => { + err.get_or_insert(e.into()); + None + } + } + }); + + if let Some(path) = configured_path { + todo!("read mailmap file as configured") + } + + err.map(Err).unwrap_or(Ok(())) + } +} From 98d745e8080975a91cff1ce75e187258c851d3f4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 29 Mar 2022 14:03:39 +0800 Subject: [PATCH 29/31] the first possibly working version of loading a mailmap with multiple sources (#366) --- Cargo.lock | 2 +- git-features/Cargo.toml | 5 ++ git-features/src/fs.rs | 16 ++++++ git-repository/src/lib.rs | 2 + git-repository/src/repository/snapshots.rs | 57 +++++++++++++++------- git-worktree/Cargo.toml | 3 -- git-worktree/src/index/entry.rs | 10 +--- 7 files changed, 65 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca37122dca9..8cb67f5c273 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1167,6 +1167,7 @@ dependencies = [ "flate2", "git-hash", "jwalk", + "libc", "num_cpus", "parking_lot 0.12.0", "prodash", @@ -1523,7 +1524,6 @@ dependencies = [ "git-odb", "git-testtools", "io-close", - "libc", "serde", "symlink", "tempfile", diff --git a/git-features/Cargo.toml b/git-features/Cargo.toml index f25bccaf46b..8715ad6766b 100644 --- a/git-features/Cargo.toml +++ b/git-features/Cargo.toml @@ -129,6 +129,11 @@ bstr = { version = "0.2.17", optional = true, default-features = false, features document-features = { version = "0.2.0", optional = true } +[target.'cfg(unix)'.dependencies] +## for fs::open_options_no_follow() +# TODO: ideally we conditionally import it only if a plumbing crate wants to use the underlying feature. Wanted to avoid to add to feature-toggle complexity. +libc = { version = "0.2.119" } + [dev-dependencies] bstr = { version = "0.2.15", default-features = false } diff --git a/git-features/src/fs.rs b/git-features/src/fs.rs index c969d7fd051..7e60d246c99 100644 --- a/git-features/src/fs.rs +++ b/git-features/src/fs.rs @@ -53,3 +53,19 @@ pub mod walkdir { #[cfg(any(feature = "walkdir", feature = "jwalk"))] pub use self::walkdir::{walkdir_new, walkdir_sorted_new, WalkDir}; + +/// Prepare open options which won't follow symlinks when the file is opened. +/// +/// Note: only effective on unix currently. +pub fn open_options_no_follow() -> std::fs::OpenOptions { + let mut options = std::fs::OpenOptions::new(); + #[cfg(unix)] + { + /// Make sure that it's impossible to follow through to the target of symlinks. + /// Note that this will still follow symlinks in the path, which is what we assume + /// has been checked separately. + use std::os::unix::fs::OpenOptionsExt; + options.custom_flags(libc::O_NOFOLLOW); + } + options +} diff --git a/git-repository/src/lib.rs b/git-repository/src/lib.rs index 106c2ce79fc..14a427d049a 100644 --- a/git-repository/src/lib.rs +++ b/git-repository/src/lib.rs @@ -257,6 +257,8 @@ pub mod mailmap { BlobSpec(#[from] git_hash::decode::Error), #[error(transparent)] PathInterpolate(#[from] git_config::values::path::interpolate::Error), + #[error("Could not find object configured in `mailmap.blob`")] + FindExisting(#[from] crate::object::find::existing::OdbError), } } } diff --git a/git-repository/src/repository/snapshots.rs b/git-repository/src/repository/snapshots.rs index 2aa5782051b..0d23d0c294a 100644 --- a/git-repository/src/repository/snapshots.rs +++ b/git-repository/src/repository/snapshots.rs @@ -21,38 +21,57 @@ impl crate::Repository { // TODO: tests /// Try to merge mailmaps from the following locations into `target`: /// - /// - read `HEAD:.mailmap` if this repository is bare (i.e. has no working tree), if the blob is not configured. - /// - OR read the `.mailmap` file without following symlinks from the working tree, if present - /// - AND read the mailmap as configured in `mailmap.blob`, if set. - /// - AND read the file as configured by `mailmap.file` if set. + /// - read the `.mailmap` file without following symlinks from the working tree, if present + /// - OR read `HEAD:.mailmap` if this repository is bare (i.e. has no working tree), if the `mailmap.blob` is not set. + /// - read the mailmap as configured in `mailmap.blob`, if set. + /// - read the file as configured by `mailmap.file`, following symlinks, if set. /// /// Only the first error will be reported, and as many source mailmaps will be merged into `target` as possible. /// Parsing errors will be ignored. #[cfg(feature = "git-mailmap")] pub fn load_mailmap_into(&self, target: &mut git_mailmap::Snapshot) -> Result<(), crate::mailmap::load::Error> { let mut err = None::; - self.config + let mut buf = Vec::new(); + let mut blob_id = self + .config .get_raw_value("mailmap", None, "blob") .ok() .and_then(|spec| { - // TODO: actually resolve the spec when available - match git_hash::ObjectId::from_hex(spec.as_ref()) { - Ok(id) => Some(id), - Err(e) => { - err.get_or_insert(e.into()); - None - } - } + // TODO: actually resolve this as spec (once we can do that) + git_hash::ObjectId::from_hex(spec.as_ref()) + .map_err(|e| err.get_or_insert(e.into())) + .ok() }); match self.work_tree() { None => { - todo!("read blob") + // TODO: replace with ref-spec `HEAD:.mailmap` for less verbose way of getting the blob id + blob_id = blob_id.or_else(|| { + self.head().ok().and_then(|mut head| { + let commit = head.peel_to_commit_in_place().ok()?; + let tree = commit.tree().ok()?; + tree.lookup_path(".mailmap").ok()?.map(|e| e.oid) + }) + }); } Some(root) => { - todo!("read mailmap file in tree") + if let Ok(mut file) = git_features::fs::open_options_no_follow() + .read(true) + .open(root.join(".mailmap")) + .map_err(|e| { + err.get_or_insert(e.into()); + }) + { + buf.clear(); + std::io::copy(&mut file, &mut buf).map_err(|e| err.get_or_insert(e.into())); + target.merge(git_mailmap::parse_ignore_errors(&buf)); + } } } + if let Some(blob) = blob_id.and_then(|id| self.find_object(id).map_err(|e| err.get_or_insert(e.into())).ok()) { + target.merge(git_mailmap::parse_ignore_errors(&blob.data)); + } + let configured_path = self .config .value::>("mailmap", None, "file") @@ -68,8 +87,12 @@ impl crate::Repository { } }); - if let Some(path) = configured_path { - todo!("read mailmap file as configured") + if let Some(mut file) = + configured_path.and_then(|path| std::fs::File::open(path).map_err(|e| err.get_or_insert(e.into())).ok()) + { + buf.clear(); + std::io::copy(&mut file, &mut buf).map_err(|e| err.get_or_insert(e.into())); + target.merge(git_mailmap::parse_ignore_errors(&buf)); } err.map(Err).unwrap_or(Ok(())) diff --git a/git-worktree/Cargo.toml b/git-worktree/Cargo.toml index fb1d7268b65..688ec6dc470 100644 --- a/git-worktree/Cargo.toml +++ b/git-worktree/Cargo.toml @@ -42,9 +42,6 @@ bstr = { version = "0.2.13", default-features = false } document-features = { version = "0.2.0", optional = true } io-close = "0.3.7" -[target.'cfg(unix)'.dependencies] -libc = "0.2.119" - [dev-dependencies] git-testtools = { path = "../tests/tools" } git-odb = { path = "../git-odb" } diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index bc55df893b5..2cdb507c914 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -155,19 +155,11 @@ fn open_options(path: &Path, destination_is_initially_empty: bool, overwrite_exi if overwrite_existing || !destination_is_initially_empty { debug_assert_dest_is_no_symlink(path); } - let mut options = OpenOptions::new(); + let mut options = git_features::fs::open_options_no_follow(); options .create_new(destination_is_initially_empty && !overwrite_existing) .create(!destination_is_initially_empty || overwrite_existing) .write(true); - #[cfg(unix)] - { - /// Make sure that it's impossible to follow through to the target of symlinks. - /// Note that this will still follow symlinks in the path, which is what we assume - /// has been checked separately. - use std::os::unix::fs::OpenOptionsExt; - options.custom_flags(libc::O_NOFOLLOW); - } options } From 2a01f4728ae858b47280b587501d343fdb86655d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 29 Mar 2022 14:20:07 +0800 Subject: [PATCH 30/31] frame for printing mailmap entries using git-repository (#366) --- git-repository/src/repository/snapshots.rs | 18 +++++++++++++---- gitoxide-core/src/repository/mailmap.rs | 23 ++++++++++++++++++++++ gitoxide-core/src/repository/mod.rs | 2 ++ src/plumbing/main.rs | 10 ++++++++++ src/plumbing/options.rs | 13 ++++++++++++ 5 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 gitoxide-core/src/repository/mailmap.rs diff --git a/git-repository/src/repository/snapshots.rs b/git-repository/src/repository/snapshots.rs index 0d23d0c294a..e166a81d164 100644 --- a/git-repository/src/repository/snapshots.rs +++ b/git-repository/src/repository/snapshots.rs @@ -49,7 +49,7 @@ impl crate::Repository { self.head().ok().and_then(|mut head| { let commit = head.peel_to_commit_in_place().ok()?; let tree = commit.tree().ok()?; - tree.lookup_path(".mailmap").ok()?.map(|e| e.oid) + tree.lookup_path(std::iter::once(".mailmap")).ok()?.map(|e| e.oid) }) }); } @@ -58,11 +58,15 @@ impl crate::Repository { .read(true) .open(root.join(".mailmap")) .map_err(|e| { - err.get_or_insert(e.into()); + if e.kind() != std::io::ErrorKind::NotFound { + err.get_or_insert(e.into()); + } }) { buf.clear(); - std::io::copy(&mut file, &mut buf).map_err(|e| err.get_or_insert(e.into())); + std::io::copy(&mut file, &mut buf) + .map_err(|e| err.get_or_insert(e.into())) + .ok(); target.merge(git_mailmap::parse_ignore_errors(&buf)); } } @@ -91,7 +95,13 @@ impl crate::Repository { configured_path.and_then(|path| std::fs::File::open(path).map_err(|e| err.get_or_insert(e.into())).ok()) { buf.clear(); - std::io::copy(&mut file, &mut buf).map_err(|e| err.get_or_insert(e.into())); + std::io::copy(&mut file, &mut buf) + .map_err(|e| { + if e.kind() != std::io::ErrorKind::NotFound { + err.get_or_insert(e.into()); + } + }) + .ok(); target.merge(git_mailmap::parse_ignore_errors(&buf)); } diff --git a/gitoxide-core/src/repository/mailmap.rs b/gitoxide-core/src/repository/mailmap.rs new file mode 100644 index 00000000000..a3f10a872e9 --- /dev/null +++ b/gitoxide-core/src/repository/mailmap.rs @@ -0,0 +1,23 @@ +use crate::OutputFormat; +use git_repository as git; +use std::io; +use std::path::PathBuf; + +pub fn entries( + repository: PathBuf, + format: OutputFormat, + mut out: impl io::Write, + mut err: impl io::Write, +) -> anyhow::Result<()> { + if format == OutputFormat::Human { + writeln!(err, "Defaulting to JSON as human format isn't implemented").ok(); + } + + let repo = git::open(repository)?.apply_environment(); + let mut mailmap = git::mailmap::Snapshot::default(); + if let Err(e) = repo.load_mailmap_into(&mut mailmap) { + writeln!(err, "Error while loading mailmap: {}", e).ok(); + } + + Ok(()) +} diff --git a/gitoxide-core/src/repository/mod.rs b/gitoxide-core/src/repository/mod.rs index 0ebbe799e36..afd9e668a42 100644 --- a/gitoxide-core/src/repository/mod.rs +++ b/gitoxide-core/src/repository/mod.rs @@ -12,3 +12,5 @@ pub mod tree; pub mod verify; pub mod odb; + +pub mod mailmap; diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 11b47ccf4e0..56735c157e7 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -158,6 +158,16 @@ pub fn main() -> Result<()> { ), }, Subcommands::Repository(repo::Platform { repository, cmd }) => match cmd { + repo::Subcommands::Mailmap { cmd } => match cmd { + repo::mailmap::Subcommands::Entries => prepare_and_run( + "repository-mailmap-entries", + verbose, + progress, + progress_keep_open, + None, + move |_progress, out, err| core::repository::mailmap::entries(repository, format, out, err), + ), + }, repo::Subcommands::Odb { cmd } => match cmd { repo::odb::Subcommands::Entries => prepare_and_run( "repository-odb-entries", diff --git a/src/plumbing/options.rs b/src/plumbing/options.rs index f345bfcc5ca..c329fafd8d9 100644 --- a/src/plumbing/options.rs +++ b/src/plumbing/options.rs @@ -360,6 +360,19 @@ pub mod repo { #[clap(subcommand)] cmd: odb::Subcommands, }, + /// Interact with the mailmap. + Mailmap { + #[clap(subcommand)] + cmd: mailmap::Subcommands, + }, + } + + pub mod mailmap { + #[derive(Debug, clap::Subcommand)] + pub enum Subcommands { + /// Print all entries in configured mailmaps, inform about errors as well. + Entries, + } } pub mod odb { From d2388d8d80f379eccc9ee84ebe07acd67d154630 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 29 Mar 2022 14:50:04 +0800 Subject: [PATCH 31/31] feat: `gix repository mailmap entries` (#366) --- git-mailmap/src/snapshot.rs | 34 ++++++++++++++++++++++ git-mailmap/tests/snapshot/mod.rs | 10 ++++--- git-repository/Cargo.toml | 2 +- git-repository/src/lib.rs | 4 +-- git-repository/src/repository/snapshots.rs | 6 +--- gitoxide-core/src/repository/mailmap.rs | 33 +++++++++++++++++++-- 6 files changed, 75 insertions(+), 14 deletions(-) diff --git a/git-mailmap/src/snapshot.rs b/git-mailmap/src/snapshot.rs index f885d4e98a0..06de41e9b3f 100644 --- a/git-mailmap/src/snapshot.rs +++ b/git-mailmap/src/snapshot.rs @@ -32,6 +32,12 @@ enum EncodedString { } impl EncodedString { + fn as_bstr(&self) -> &BStr { + match self { + EncodedString::Utf8(v) => v.as_str().into(), + EncodedString::Unknown(v) => v.as_bstr(), + } + } fn cmp_ref(&self, other: EncodedStringRef<'_>) -> Ordering { match (self, other) { (EncodedString::Utf8(a), EncodedStringRef::Utf8(b)) => { @@ -206,6 +212,34 @@ impl Snapshot { self } + /// Transform our acceleration structure into a list of entries. + /// + /// Note that the order is different from how they were obtained initially, and are explicitly ordered by + /// (old_email, old_name). + pub fn entries(&self) -> Vec> { + let mut out = Vec::with_capacity(self.entries_by_old_email.len()); + for entry in &self.entries_by_old_email { + if entry.new_email.is_some() || entry.new_name.is_some() { + out.push(crate::Entry { + new_name: entry.new_name.as_ref().map(|b| b.as_bstr()), + new_email: entry.new_email.as_ref().map(|b| b.as_bstr()), + old_name: None, + old_email: entry.old_email.as_bstr(), + }); + } + + for name_entry in &entry.entries_by_old_name { + out.push(crate::Entry { + new_name: name_entry.new_name.as_ref().map(|b| b.as_bstr()), + new_email: name_entry.new_email.as_ref().map(|b| b.as_bstr()), + old_name: name_entry.old_name.as_bstr().into(), + old_email: entry.old_email.as_bstr(), + }); + } + } + out + } + /// Try to resolve `signature` by its contained email and name and provide resolved/mapped names as reference. /// Return `None` if no such mapping was found. /// diff --git a/git-mailmap/tests/snapshot/mod.rs b/git-mailmap/tests/snapshot/mod.rs index 33c662f4ec1..db52a924707 100644 --- a/git-mailmap/tests/snapshot/mod.rs +++ b/git-mailmap/tests/snapshot/mod.rs @@ -3,7 +3,7 @@ use git_testtools::fixture_bytes; #[test] fn try_resolve() { - let snapshot = snapshot(); + let snapshot = Snapshot::from_bytes(&fixture_bytes("typical.txt")); assert_eq!( snapshot.try_resolve(&signature("Foo", "Joe@example.com").to_ref()), Some(signature("Joe R. Developer", "Joe@example.com")), @@ -48,6 +48,8 @@ fn try_resolve() { "matched email, unmatched name" ); assert_eq!(snapshot.resolve(&sig.to_ref()), sig); + + assert_eq!(snapshot.entries().len(), 5); } #[test] @@ -76,6 +78,8 @@ fn non_name_and_name_mappings_will_not_clash() { Some(signature("other-new-name", "other-new-email")), "it can match by email and name as well" ); + + assert_eq!(snapshot.entries().len(), 2); } } @@ -105,10 +109,8 @@ fn overwrite_entries() { Some(signature("unchanged", "new-d-email-overwritten")), "email by email" ); -} -fn snapshot() -> Snapshot { - Snapshot::from_bytes(&fixture_bytes("typical.txt")) + assert_eq!(snapshot.entries().len(), 4); } fn signature(name: &str, email: &str) -> git_actor::Signature { diff --git a/git-repository/Cargo.toml b/git-repository/Cargo.toml index 4453240d713..49d6350de29 100644 --- a/git-repository/Cargo.toml +++ b/git-repository/Cargo.toml @@ -41,7 +41,7 @@ one-stop-shop = [ "local", "local-time-support" ] #! ### Other ## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde1 = ["git-pack/serde1", "git-object/serde1", "git-protocol/serde1", "git-transport/serde1", "git-ref/serde1", "git-odb/serde1", "git-index/serde1"] +serde1 = ["git-pack/serde1", "git-object/serde1", "git-protocol/serde1", "git-transport/serde1", "git-ref/serde1", "git-odb/serde1", "git-index/serde1", "git-mailmap/serde1"] ## Activate other features that maximize performance, like usage of threads, `zlib-ng` and access to caching in object databases. ## **Note** that max-performance = ["git-features/parallel", "git-features/zlib-ng-compat", "git-pack/pack-cache-lru-static", "git-pack/pack-cache-lru-dynamic"] diff --git a/git-repository/src/lib.rs b/git-repository/src/lib.rs index 14a427d049a..eb0a52d9227 100644 --- a/git-repository/src/lib.rs +++ b/git-repository/src/lib.rs @@ -247,11 +247,11 @@ pub mod mailmap { /// pub mod load { - /// The error returned by [`crate::Repository::load_mailmap()`]. + /// The error returned by [`crate::Repository::load_mailmap_into()`]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { - #[error("A mailmap file could not be loaded from disk")] + #[error("The mailmap file declared in `mailmap.file` could not be read")] Io(#[from] std::io::Error), #[error("The configured mailmap.blob could not be parsed")] BlobSpec(#[from] git_hash::decode::Error), diff --git a/git-repository/src/repository/snapshots.rs b/git-repository/src/repository/snapshots.rs index e166a81d164..f6d6778331e 100644 --- a/git-repository/src/repository/snapshots.rs +++ b/git-repository/src/repository/snapshots.rs @@ -96,11 +96,7 @@ impl crate::Repository { { buf.clear(); std::io::copy(&mut file, &mut buf) - .map_err(|e| { - if e.kind() != std::io::ErrorKind::NotFound { - err.get_or_insert(e.into()); - } - }) + .map_err(|e| err.get_or_insert(e.into())) .ok(); target.merge(git_mailmap::parse_ignore_errors(&buf)); } diff --git a/gitoxide-core/src/repository/mailmap.rs b/gitoxide-core/src/repository/mailmap.rs index a3f10a872e9..96ddb1a9c0e 100644 --- a/gitoxide-core/src/repository/mailmap.rs +++ b/gitoxide-core/src/repository/mailmap.rs @@ -1,12 +1,35 @@ use crate::OutputFormat; use git_repository as git; +use git_repository::bstr::ByteSlice; +use git_repository::mailmap::Entry; use std::io; use std::path::PathBuf; +#[cfg(feature = "serde1")] +#[cfg_attr(feature = "serde1", derive(serde::Serialize))] +struct JsonEntry { + new_name: Option, + new_email: Option, + old_name: Option, + old_email: String, +} + +#[cfg(feature = "serde1")] +impl<'a> From> for JsonEntry { + fn from(v: Entry<'a>) -> Self { + JsonEntry { + new_name: v.new_name().map(|s| s.to_str_lossy().into_owned()), + new_email: v.new_email().map(|s| s.to_str_lossy().into_owned()), + old_name: v.old_name().map(|s| s.to_str_lossy().into_owned()), + old_email: v.old_email().to_str_lossy().into_owned(), + } + } +} + pub fn entries( repository: PathBuf, format: OutputFormat, - mut out: impl io::Write, + out: impl io::Write, mut err: impl io::Write, ) -> anyhow::Result<()> { if format == OutputFormat::Human { @@ -16,8 +39,14 @@ pub fn entries( let repo = git::open(repository)?.apply_environment(); let mut mailmap = git::mailmap::Snapshot::default(); if let Err(e) = repo.load_mailmap_into(&mut mailmap) { - writeln!(err, "Error while loading mailmap: {}", e).ok(); + writeln!(err, "Error while loading mailmap, the first error is: {}", e).ok(); } + #[cfg(feature = "serde1")] + serde_json::to_writer_pretty( + out, + &mailmap.entries().into_iter().map(JsonEntry::from).collect::>(), + )?; + Ok(()) }