diff --git a/Cargo.lock b/Cargo.lock index 0ce2d6277e2..8cb67f5c273 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1167,6 +1167,7 @@ dependencies = [ "flate2", "git-hash", "jwalk", + "libc", "num_cpus", "parking_lot 0.12.0", "prodash", @@ -1217,6 +1218,17 @@ dependencies = [ "tempfile", ] +[[package]] +name = "git-mailmap" +version = "0.0.0" +dependencies = [ + "bstr", + "git-actor", + "git-testtools", + "quick-error", + "serde", +] + [[package]] name = "git-object" version = "0.17.1" @@ -1375,6 +1387,7 @@ dependencies = [ "git-hash", "git-index", "git-lock", + "git-mailmap", "git-object", "git-odb", "git-pack", @@ -1511,7 +1524,6 @@ dependencies = [ "git-odb", "git-testtools", "io-close", - "libc", "serde", "symlink", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index 58511e809a4..8837929ff9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,6 +142,7 @@ members = [ "git-worktree", "git-revision", "git-packetline", + "git-mailmap", "git-submodule", "git-transport", "git-protocol", diff --git a/Makefile b/Makefile index 9f5c9ebff41..08877c8d273 100644 --- a/Makefile +++ b/Makefile @@ -90,6 +90,8 @@ check: ## Build all code in suitable configurations && cargo check --features verbose-object-parsing-errors cd git-index && cargo check --features serde1 cd git-revision && cargo check --features serde1 + cd git-attributes && cargo check --features serde1 + cd git-mailmap && cargo check --features serde1 cd git-worktree && cargo check --features serde1 cd git-actor && cargo check --features serde1 cd git-pack && cargo check --features serde1 \ diff --git a/README.md b/README.md index a5d5ec4095f..c70d4bb249e 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,7 @@ Follow linked crate name for detailed status. Please note that all crates follow * [git-attributes](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-attributes) * [git-quote](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-quote) * **idea** + * [git-mailmap](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-mailmap) * [git-pathspec](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-pathspec) * [git-subomdule](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-submodule) * [git-tui](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-tui) diff --git a/crate-status.md b/crate-status.md index 5ad7c1cfd35..2746849a72a 100644 --- a/crate-status.md +++ b/crate-status.md @@ -220,6 +220,11 @@ Check out the [performance discussion][git-traverse-performance] as well. * **ansi-c** * [x] quote * [ ] unquote + * +### git-mailmap + +* [ ] parsing +* [ ] lookup and mapping of author names ### git-pathspec diff --git a/git-actor/src/lib.rs b/git-actor/src/lib.rs index 5304d81a8ce..884c8f988b0 100644 --- a/git-actor/src/lib.rs +++ b/git-actor/src/lib.rs @@ -11,13 +11,14 @@ use bstr::{BStr, BString}; /// pub mod signature; +mod time; const SPACE: &[u8; 1] = b" "; /// A mutable signature is created by an actor at a certain time. /// /// Note that this is not a cryptographical signature. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Signature { /// The actors name. @@ -63,5 +64,3 @@ pub struct Time { /// the sign of `offset`, used to encode `-0000` which would otherwise loose sign information. pub sign: Sign, } - -mod time; diff --git a/git-actor/src/signature/mod.rs b/git-actor/src/signature/mod.rs index 7141188b166..08e3c8e05d4 100644 --- a/git-actor/src/signature/mod.rs +++ b/git-actor/src/signature/mod.rs @@ -22,26 +22,12 @@ mod _ref { } mod convert { - use crate::{Sign, Signature, SignatureRef, Time}; - - impl Default for Signature { - fn default() -> Self { - Signature::empty() - } - } + use crate::{Signature, SignatureRef}; impl Signature { /// An empty signature, similar to 'null'. pub fn empty() -> Self { - Signature { - name: Default::default(), - email: Default::default(), - time: Time { - seconds_since_unix_epoch: 0, - offset_in_seconds: 0, - sign: Sign::Plus, - }, - } + Signature::default() } /// Borrow this instance as immutable diff --git a/git-actor/src/time.rs b/git-actor/src/time.rs index 61dca1839fc..47c924a24fd 100644 --- a/git-actor/src/time.rs +++ b/git-actor/src/time.rs @@ -12,6 +12,16 @@ impl From for Sign { } } +impl Default for Time { + fn default() -> Self { + Time { + seconds_since_unix_epoch: 0, + offset_in_seconds: 0, + sign: Sign::Plus, + } + } +} + impl Time { /// Serialize this instance to `out` in a format suitable for use in header fields of serialized git commits or tags. pub fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { diff --git a/git-attributes/src/ignore.rs b/git-attributes/src/ignore.rs index 1bde72dc586..77a48989d78 100644 --- a/git-attributes/src/ignore.rs +++ b/git-attributes/src/ignore.rs @@ -2,6 +2,7 @@ pub mod pattern { use bitflags::bitflags; bitflags! { + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Mode: u32 { /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. const NO_SUB_DIR = 1 << 0; diff --git a/git-attributes/src/lib.rs b/git-attributes/src/lib.rs index 1e429b1a1d1..0d08f1e1963 100644 --- a/git-attributes/src/lib.rs +++ b/git-attributes/src/lib.rs @@ -1,4 +1,5 @@ -#![forbid(unsafe_code, rust_2018_idioms)] +#![forbid(unsafe_code)] +#![deny(rust_2018_idioms)] use bstr::BStr; @@ -11,6 +12,7 @@ pub enum State<'a> { Unset, /// The attribute is set to the given value, which followed the `=` sign. /// Note that values can be empty. + #[cfg_attr(feature = "serde1", serde(borrow))] Value(&'a BStr), /// The attribute isn't mentioned with a given path or is explicitly set to `Unspecified` using the `!` sign. Unspecified, diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs index fde352d7342..95c51497f81 100644 --- a/git-attributes/src/parse/ignore.rs +++ b/git-attributes/src/parse/ignore.rs @@ -49,6 +49,9 @@ pub(crate) fn parse_line(mut line: &[u8]) -> Option<(BString, ignore::pattern::M line = &line[1..]; } } + if line.iter().all(|b| b.is_ascii_whitespace()) { + return None; + } let mut line = truncate_non_escaped_trailing_spaces(line); if line.last() == Some(&b'/') { mode |= ignore::pattern::Mode::MUST_BE_DIR; diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index ad1cf66c926..ae1463da537 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -1,7 +1,7 @@ use bstr::{BStr, ByteSlice}; use git_attributes::ignore::pattern::Mode; use git_attributes::{parse, State}; -use git_testtools::fixture_path; +use git_testtools::fixture_bytes; #[test] fn byte_order_marks_are_no_patterns() { @@ -14,9 +14,9 @@ fn byte_order_marks_are_no_patterns() { #[test] fn line_numbers_are_counted_correctly() { - let ignore = std::fs::read(fixture_path("attributes/various.txt")).unwrap(); + let input = fixture_bytes("attributes/various.txt"); assert_eq!( - try_lines(&String::from_utf8(ignore).unwrap()).unwrap(), + try_lines(&String::from_utf8(input).unwrap()).unwrap(), vec![ (pattern(r"*.[oa]", Mode::NO_SUB_DIR), vec![set("c")], 2), ( @@ -44,13 +44,14 @@ fn line_endings_can_be_windows_or_unix() { } #[test] -fn comment_lines_are_ignored() { +fn comment_lines_are_ignored_as_well_as_empty_ones() { assert!(git_attributes::parse(b"# hello world").next().is_none()); assert!(git_attributes::parse(b"# \"hello world\"").next().is_none()); assert!( git_attributes::parse(b" \t\r# \"hello world\"").next().is_none(), "also behind leading whitespace" ); + assert!(git_attributes::parse(b"\n\r\n\t\t \n").next().is_none()); } #[test] @@ -266,10 +267,7 @@ fn try_line(input: &str) -> Result { } fn line(input: &str) -> ExpandedAttribute { - let mut lines = git_attributes::parse(input.as_bytes()); - let res = expand(lines.next().expect("single line")).unwrap(); - assert!(lines.next().is_none(), "expected only one line"); - res + try_line(input).unwrap() } fn try_lines(input: &str) -> Result, parse::Error> { diff --git a/git-attributes/tests/parse/ignore.rs b/git-attributes/tests/parse/ignore.rs index 7ad310a54e7..e39a194e493 100644 --- a/git-attributes/tests/parse/ignore.rs +++ b/git-attributes/tests/parse/ignore.rs @@ -1,5 +1,5 @@ use git_attributes::ignore::pattern::Mode; -use git_testtools::fixture_path; +use git_testtools::fixture_bytes; #[test] fn byte_order_marks_are_no_patterns() { @@ -11,8 +11,8 @@ fn byte_order_marks_are_no_patterns() { #[test] fn line_numbers_are_counted_correctly() { - let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); - let actual: Vec<_> = git_attributes::parse::ignore(&ignore).collect(); + let input = fixture_bytes("ignore/various.txt"); + let actual: Vec<_> = git_attributes::parse::ignore(&input).collect(); assert_eq!( actual, vec![ @@ -66,8 +66,9 @@ fn mark_ends_with_pattern_specifically() { } #[test] -fn comments_are_ignored() { +fn comments_are_ignored_as_well_as_empty_ones() { assert!(git_attributes::parse::ignore(b"# hello world").next().is_none()); + assert!(git_attributes::parse::ignore(b"\n\r\n\t\t \n").next().is_none()); } #[test] diff --git a/git-features/Cargo.toml b/git-features/Cargo.toml index ca76445e17c..8715ad6766b 100644 --- a/git-features/Cargo.toml +++ b/git-features/Cargo.toml @@ -125,12 +125,17 @@ time = { version = "0.3.2", optional = true, default-features = false, features # path ## make bstr utilities available in the `path` modules, which itself is gated by the `path` feature. -bstr = { version = "0.2.17", optional = true } +bstr = { version = "0.2.17", optional = true, default-features = false, features = ["std"] } document-features = { version = "0.2.0", optional = true } +[target.'cfg(unix)'.dependencies] +## for fs::open_options_no_follow() +# TODO: ideally we conditionally import it only if a plumbing crate wants to use the underlying feature. Wanted to avoid to add to feature-toggle complexity. +libc = { version = "0.2.119" } + [dev-dependencies] -bstr = "0.2.15" +bstr = { version = "0.2.15", default-features = false } [package.metadata.docs.rs] features = ["document-features"] diff --git a/git-features/src/fs.rs b/git-features/src/fs.rs index c969d7fd051..7e60d246c99 100644 --- a/git-features/src/fs.rs +++ b/git-features/src/fs.rs @@ -53,3 +53,19 @@ pub mod walkdir { #[cfg(any(feature = "walkdir", feature = "jwalk"))] pub use self::walkdir::{walkdir_new, walkdir_sorted_new, WalkDir}; + +/// Prepare open options which won't follow symlinks when the file is opened. +/// +/// Note: only effective on unix currently. +pub fn open_options_no_follow() -> std::fs::OpenOptions { + let mut options = std::fs::OpenOptions::new(); + #[cfg(unix)] + { + /// Make sure that it's impossible to follow through to the target of symlinks. + /// Note that this will still follow symlinks in the path, which is what we assume + /// has been checked separately. + use std::os::unix::fs::OpenOptionsExt; + options.custom_flags(libc::O_NOFOLLOW); + } + options +} diff --git a/git-mailmap/CHANGELOG.md b/git-mailmap/CHANGELOG.md new file mode 100644 index 00000000000..92b8fba9ea4 --- /dev/null +++ b/git-mailmap/CHANGELOG.md @@ -0,0 +1,29 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## 0.0.0 (2022-03-26) + +An empty crate without any content to reserve the name for the gitoxide project. + +### Commit Statistics + + + + - 1 commit contributed to the release. + - 0 commits where understood as [conventional](https://www.conventionalcommits.org). + - 1 unique issue was worked on: [#366](https://github.com/Byron/gitoxide/issues/366) + +### Commit Details + + + +
view details + + * **[#366](https://github.com/Byron/gitoxide/issues/366)** + - empty git-mailmap crate ([`a30a5da`](https://github.com/Byron/gitoxide/commit/a30a5da60d67a4e52ba69727318edb9832b7cae2)) +
+ diff --git a/git-mailmap/Cargo.toml b/git-mailmap/Cargo.toml new file mode 100644 index 00000000000..41eb8930128 --- /dev/null +++ b/git-mailmap/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "git-mailmap" +version = "0.0.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT/Apache-2.0" +description = "A WIP crate of the gitoxide project for parsing mailmap files" +authors = ["Sebastian Thiel "] +edition = "2018" + +[lib] +doctest = false + +[features] +## Data structures implement `serde::Serialize` and `serde::Deserialize`. +serde1 = ["serde", "bstr/serde1", "git-actor/serde1"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +git-actor = { version = "^0.8.1", path = "../git-actor" } +bstr = { version = "0.2.13", default-features = false, features = ["std", "unicode"]} +quick-error = "2.0.0" +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} + +[dev-dependencies] +git-testtools = { path = "../tests/tools"} diff --git a/git-mailmap/src/entry.rs b/git-mailmap/src/entry.rs new file mode 100644 index 00000000000..7b2d43963f1 --- /dev/null +++ b/git-mailmap/src/entry.rs @@ -0,0 +1,69 @@ +use crate::Entry; +use bstr::BStr; + +/// Acccess +impl<'a> Entry<'a> { + /// The name to map to. + pub fn new_name(&self) -> Option<&'a BStr> { + self.new_name + } + /// The email map to. + pub fn new_email(&self) -> Option<&'a BStr> { + self.new_email + } + /// The name to look for and replace. + pub fn old_name(&self) -> Option<&'a BStr> { + self.old_name + } + /// The email to look for and replace. + pub fn old_email(&self) -> &'a BStr { + self.old_email + } +} + +/// Constructors indicating what kind of mapping is created. +/// +/// Only these combinations of values are valid. +#[allow(missing_docs)] +impl<'a> Entry<'a> { + pub fn change_name_by_email(proper_name: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { + Entry { + new_name: Some(proper_name.into()), + old_email: commit_email.into(), + ..Default::default() + } + } + pub fn change_email_by_email(proper_email: impl Into<&'a BStr>, commit_email: impl Into<&'a BStr>) -> Self { + Entry { + new_email: Some(proper_email.into()), + old_email: commit_email.into(), + ..Default::default() + } + } + pub fn change_name_and_email_by_email( + proper_name: impl Into<&'a BStr>, + proper_email: impl Into<&'a BStr>, + commit_email: impl Into<&'a BStr>, + ) -> Self { + Entry { + new_name: Some(proper_name.into()), + new_email: Some(proper_email.into()), + old_email: commit_email.into(), + ..Default::default() + } + } + + pub fn change_name_and_email_by_name_and_email( + proper_name: impl Into<&'a BStr>, + proper_email: impl Into<&'a BStr>, + commit_name: impl Into<&'a BStr>, + commit_email: impl Into<&'a BStr>, + ) -> Self { + Entry { + new_name: Some(proper_name.into()), + new_email: Some(proper_email.into()), + old_name: Some(commit_name.into()), + old_email: commit_email.into(), + } + } +} diff --git a/git-mailmap/src/lib.rs b/git-mailmap/src/lib.rs new file mode 100644 index 00000000000..c471b5b10bf --- /dev/null +++ b/git-mailmap/src/lib.rs @@ -0,0 +1,57 @@ +#![forbid(unsafe_code)] +#![deny(rust_2018_idioms, missing_docs)] +//! [Parse][parse()] .mailmap files as used in git repositories and remap names and emails +//! using an [accelerated data-structure][Snapshot]. + +use bstr::BStr; + +/// +pub mod parse; + +/// Parse the given `buf` of bytes line by line into mapping [Entries][Entry]. +/// +/// Errors may occour per line, but it's up to the caller to stop iteration when +/// one is encountered. +pub fn parse(buf: &[u8]) -> parse::Lines<'_> { + parse::Lines::new(buf) +} + +/// Similar to [parse()], but will skip all lines that didn't parse correctly, silently squelching all errors. +pub fn parse_ignore_errors(buf: &[u8]) -> impl Iterator> { + parse(buf).filter_map(Result::ok) +} + +mod entry; + +/// +pub mod snapshot; + +/// A data-structure to efficiently store a list of entries for optimal, case-insensitive lookup by email and +/// optionally name to find mappings to new names and/or emails. +/// +/// The memory layout is efficient, even though lots of small allocations are performed to store strings of emails and names. +#[derive(Default, Clone)] +pub struct Snapshot { + /// Sorted by `old_email` + entries_by_old_email: Vec, +} + +/// An typical entry of a mailmap, which always contains an `old_email` by which +/// the mapping is performed to replace the given `new_name` and `new_email`. +/// +/// Optionally, `old_name` is also used for lookup. +/// +/// Typically created by [parse()]. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy, Default)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Entry<'a> { + #[cfg_attr(feature = "serde1", serde(borrow))] + /// The name to map to. + pub(crate) new_name: Option<&'a BStr>, + /// The email map to. + pub(crate) new_email: Option<&'a BStr>, + /// The name to look for and replace. + pub(crate) old_name: Option<&'a BStr>, + /// The email to look for and replace. + pub(crate) old_email: &'a BStr, +} diff --git a/git-mailmap/src/parse.rs b/git-mailmap/src/parse.rs new file mode 100644 index 00000000000..15209256b8a --- /dev/null +++ b/git-mailmap/src/parse.rs @@ -0,0 +1,116 @@ +mod error { + use bstr::BString; + use quick_error::quick_error; + + quick_error! { + /// The error returned by [`parse()`][crate::parse()]. + #[derive(Debug)] + #[allow(missing_docs)] + pub enum Error { + UnconsumedInput { line_number: usize, line: BString } { + display("Line {} has too many names or emails, or none at all: {}", line_number, line) + } + Malformed { line_number: usize, line: BString, message: String } { + display("{}: {:?}: {}", line_number, line, message) + } + } + } +} + +use crate::Entry; +use bstr::{BStr, ByteSlice}; +pub use error::Error; + +/// An iterator to parse mailmap lines on-demand. +pub struct Lines<'a> { + lines: bstr::Lines<'a>, + line_no: usize, +} + +impl<'a> Lines<'a> { + pub(crate) fn new(input: &'a [u8]) -> Self { + Lines { + lines: input.as_bstr().lines(), + line_no: 0, + } + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = Result, Error>; + + fn next(&mut self) -> Option { + for line in self.lines.by_ref() { + self.line_no += 1; + match line.first() { + None => continue, + Some(b) if *b == b'#' => continue, + Some(_) => {} + } + let line = line.trim(); + if line.is_empty() { + continue; + } + return parse_line(line.into(), self.line_no).into(); + } + None + } +} + +fn parse_line(line: &BStr, line_number: usize) -> Result, Error> { + let (name1, email1, rest) = parse_name_and_email(line, line_number)?; + let (name2, email2, rest) = parse_name_and_email(rest, line_number)?; + if !rest.trim().is_empty() { + return Err(Error::UnconsumedInput { + line_number, + line: line.into(), + }); + } + Ok(match (name1, email1, name2, email2) { + (Some(proper_name), Some(commit_email), None, None) => Entry::change_name_by_email(proper_name, commit_email), + (None, Some(proper_email), None, Some(commit_email)) => { + Entry::change_email_by_email(proper_email, commit_email) + } + (Some(proper_name), Some(proper_email), None, Some(commit_email)) => { + Entry::change_name_and_email_by_email(proper_name, proper_email, commit_email) + } + (Some(proper_name), Some(proper_email), Some(commit_name), Some(commit_email)) => { + Entry::change_name_and_email_by_name_and_email(proper_name, proper_email, commit_name, commit_email) + } + _ => { + return Err(Error::Malformed { + line_number, + line: line.into(), + message: "Emails without a name or email to map to are invalid".into(), + }) + } + }) +} + +fn parse_name_and_email( + line: &BStr, + line_number: usize, +) -> Result<(Option<&'_ BStr>, Option<&'_ BStr>, &'_ BStr), Error> { + match line.find_byte(b'<') { + Some(start_bracket) => { + let email = &line[start_bracket + 1..]; + let closing_bracket = email.find_byte(b'>').ok_or_else(|| Error::Malformed { + line_number, + line: line.into(), + message: "Missing closing bracket '>' in email".into(), + })?; + let email = email[..closing_bracket].trim().as_bstr(); + if email.is_empty() { + return Err(Error::Malformed { + line_number, + line: line.into(), + message: "Email must not be empty".into(), + }); + } + let name = line[..start_bracket].trim().as_bstr(); + let rest = line[start_bracket + closing_bracket + 2..].as_bstr(); + Ok(((!name.is_empty()).then(|| name), Some(email), rest)) + } + None => Ok((None, None, line)), + } +} diff --git a/git-mailmap/src/snapshot.rs b/git-mailmap/src/snapshot.rs new file mode 100644 index 00000000000..06de41e9b3f --- /dev/null +++ b/git-mailmap/src/snapshot.rs @@ -0,0 +1,348 @@ +use crate::Snapshot; +use bstr::{BStr, BString, ByteSlice}; +use git_actor::SignatureRef; +use std::cmp::Ordering; +use std::ops::Deref; + +/// A resolved signature with borrowed fields for a mapped `name` and/or `email`. +pub struct ResolvedSignature<'a> { + /// The mapped name. + pub name: Option<&'a BStr>, + /// The mapped email. + pub email: Option<&'a BStr>, +} + +impl<'a> ResolvedSignature<'a> { + fn try_new(new_email: Option<&'a BString>, new_name: Option<&'a BString>) -> Option { + match (new_email, new_name) { + (None, None) => None, + (new_email, new_name) => Some(ResolvedSignature { + email: new_email.map(|v| v.as_bstr()), + name: new_name.map(|v| v.as_bstr()), + }), + } + } +} + +#[cfg_attr(test, derive(Debug))] +#[derive(Clone)] +enum EncodedString { + Utf8(String), + Unknown(BString), +} + +impl EncodedString { + fn as_bstr(&self) -> &BStr { + match self { + EncodedString::Utf8(v) => v.as_str().into(), + EncodedString::Unknown(v) => v.as_bstr(), + } + } + fn cmp_ref(&self, other: EncodedStringRef<'_>) -> Ordering { + match (self, other) { + (EncodedString::Utf8(a), EncodedStringRef::Utf8(b)) => { + let a = a.chars().map(|c| c.to_ascii_lowercase()); + let b = b.chars().map(|c| c.to_ascii_lowercase()); + a.cmp(b) + } + (EncodedString::Unknown(a), EncodedStringRef::Unknown(b)) => a.deref().as_bstr().cmp(b), + (EncodedString::Utf8(a), EncodedStringRef::Unknown(b)) => a.as_bytes().cmp(b.as_ref()), + (EncodedString::Unknown(a), EncodedStringRef::Utf8(b)) => a.deref().as_bytes().cmp(b.as_bytes()), + } + } +} + +#[cfg_attr(test, derive(Debug))] +#[derive(Clone, Copy)] +enum EncodedStringRef<'a> { + Utf8(&'a str), + Unknown(&'a BStr), +} + +impl<'a> From<&'a BStr> for EncodedStringRef<'a> { + fn from(v: &'a BStr) -> Self { + match v.to_str() { + Ok(v) => EncodedStringRef::Utf8(v), + Err(_) => EncodedStringRef::Unknown(v), + } + } +} + +impl<'a> From> for EncodedString { + fn from(v: EncodedStringRef<'a>) -> Self { + match v { + EncodedStringRef::Utf8(v) => EncodedString::Utf8(v.to_owned()), + EncodedStringRef::Unknown(v) => EncodedString::Unknown(v.to_owned()), + } + } +} + +impl<'a> From<&'a BStr> for EncodedString { + fn from(v: &'a BStr) -> Self { + match v.to_str() { + Ok(v) => EncodedString::Utf8(v.to_owned()), + Err(_) => EncodedString::Unknown(v.to_owned()), + } + } +} + +#[derive(Clone)] +struct NameEntry { + new_name: Option, + new_email: Option, + old_name: EncodedString, +} + +#[derive(Clone)] +pub(crate) struct EmailEntry { + new_name: Option, + new_email: Option, + old_email: EncodedString, + + entries_by_old_name: Vec, +} + +impl EmailEntry { + fn merge( + &mut self, + crate::Entry { + new_name, + new_email, + old_name, + old_email: _, + }: crate::Entry<'_>, + ) { + let new_email = new_email.map(ToOwned::to_owned); + let new_name = new_name.map(ToOwned::to_owned); + match old_name { + None => { + self.new_email = new_email; + self.new_name = new_name; + } + Some(old_name) => { + let old_name: EncodedStringRef<'_> = old_name.into(); + match self + .entries_by_old_name + .binary_search_by(|e| e.old_name.cmp_ref(old_name)) + { + Ok(pos) => { + let entry = &mut self.entries_by_old_name[pos]; + entry.new_name = new_name; + entry.new_email = new_email; + } + Err(insert_pos) => self.entries_by_old_name.insert( + insert_pos, + NameEntry { + new_name, + new_email, + old_name: old_name.into(), + }, + ), + } + } + } + } +} + +impl<'a> From> for EmailEntry { + fn from( + crate::Entry { + new_name, + new_email, + old_name, + old_email, + }: crate::Entry<'a>, + ) -> Self { + let mut new_name = new_name.map(ToOwned::to_owned); + let mut new_email = new_email.map(ToOwned::to_owned); + let entries_by_old_name = old_name + .map(|name| { + vec![NameEntry { + new_name: new_name.take(), + new_email: new_email.take(), + old_name: name.into(), + }] + }) + .unwrap_or_default(); + EmailEntry { + new_name, + new_email, + old_email: old_email.into(), + entries_by_old_name, + } + } +} + +impl Snapshot { + /// Create a new snapshot from the given bytes buffer, ignoring all parse errors that may occour on a line-by-line basis. + /// + /// This is similar to what git does. + pub fn from_bytes(buf: &[u8]) -> Self { + Self::new(crate::parse_ignore_errors(buf)) + } + + /// Create a new instance from `entries`. + /// + /// These can be obtained using [crate::parse()]. + pub fn new<'a>(entries: impl IntoIterator>) -> Self { + let mut snapshot = Self::default(); + snapshot.merge(entries); + snapshot + } + + /// Merge the given `entries` into this instance, possibly overwriting existing mappings with + /// new ones should they collide. + pub fn merge<'a>(&mut self, entries: impl IntoIterator>) -> &mut Self { + for entry in entries { + let old_email: EncodedStringRef<'_> = entry.old_email.into(); + assert!( + entry.new_name.is_some() || entry.new_email.is_some(), + "BUG: encountered entry without any mapped/new name or email." + ); + match self + .entries_by_old_email + .binary_search_by(|e| e.old_email.cmp_ref(old_email)) + { + Ok(pos) => self.entries_by_old_email[pos].merge(entry), + Err(insert_pos) => { + self.entries_by_old_email.insert(insert_pos, entry.into()); + } + }; + } + self + } + + /// Transform our acceleration structure into a list of entries. + /// + /// Note that the order is different from how they were obtained initially, and are explicitly ordered by + /// (old_email, old_name). + pub fn entries(&self) -> Vec> { + let mut out = Vec::with_capacity(self.entries_by_old_email.len()); + for entry in &self.entries_by_old_email { + if entry.new_email.is_some() || entry.new_name.is_some() { + out.push(crate::Entry { + new_name: entry.new_name.as_ref().map(|b| b.as_bstr()), + new_email: entry.new_email.as_ref().map(|b| b.as_bstr()), + old_name: None, + old_email: entry.old_email.as_bstr(), + }); + } + + for name_entry in &entry.entries_by_old_name { + out.push(crate::Entry { + new_name: name_entry.new_name.as_ref().map(|b| b.as_bstr()), + new_email: name_entry.new_email.as_ref().map(|b| b.as_bstr()), + old_name: name_entry.old_name.as_bstr().into(), + old_email: entry.old_email.as_bstr(), + }); + } + } + out + } + + /// Try to resolve `signature` by its contained email and name and provide resolved/mapped names as reference. + /// Return `None` if no such mapping was found. + /// + /// This is the fastest possible lookup as there is no allocation. + pub fn try_resolve_ref<'a>(&'a self, signature: &git_actor::SignatureRef<'_>) -> Option> { + let email: EncodedStringRef<'_> = signature.email.into(); + let pos = self + .entries_by_old_email + .binary_search_by(|e| e.old_email.cmp_ref(email)) + .ok()?; + let entry = &self.entries_by_old_email[pos]; + + let name: EncodedStringRef<'_> = signature.name.into(); + + match entry.entries_by_old_name.binary_search_by(|e| e.old_name.cmp_ref(name)) { + Ok(pos) => { + let entry = &entry.entries_by_old_name[pos]; + ResolvedSignature::try_new(entry.new_email.as_ref(), entry.new_name.as_ref()) + } + Err(_) => ResolvedSignature::try_new(entry.new_email.as_ref(), entry.new_name.as_ref()), + } + } + + /// Try to resolve `signature` by its contained email and name and provide resolved/mapped names as owned signature, + /// with the mapped name and/or email replaced accordingly. + /// + /// Return `None` if no such mapping was found. + pub fn try_resolve(&self, signature: &git_actor::SignatureRef<'_>) -> Option { + let new = self.try_resolve_ref(signature)?; + enriched_signature(signature, new) + } + + /// Like [`try_resolve()`][Snapshot::try_resolve()], but always returns an owned signature, which might be a copy + /// of `signature` if no mapping was found. + /// + /// Note that this method will always allocate. + pub fn resolve(&self, signature: &git_actor::SignatureRef<'_>) -> git_actor::Signature { + self.try_resolve(signature).unwrap_or_else(|| signature.to_owned()) + } +} + +fn enriched_signature( + SignatureRef { name, email, time }: &SignatureRef<'_>, + new: ResolvedSignature<'_>, +) -> Option { + let time = *time; + match (new.email, new.name) { + (Some(new_email), Some(new_name)) => git_actor::Signature { + email: new_email.to_owned(), + name: new_name.to_owned(), + time, + } + .into(), + (Some(new_email), None) => git_actor::Signature { + email: new_email.to_owned(), + name: (*name).to_owned(), + time, + } + .into(), + (None, Some(new_name)) => git_actor::Signature { + email: (*email).to_owned(), + name: new_name.to_owned(), + time, + } + .into(), + (None, None) => unreachable!("BUG: ResolvedSignatures don't exist here when nothing is set"), + } +} + +#[cfg(test)] +mod encoded_string { + use crate::snapshot::{EncodedString, EncodedStringRef}; + use std::cmp::Ordering; + + #[test] + fn basic_ascii_case_folding() { + assert_eq!( + EncodedString::Utf8("FooBar".into()).cmp_ref(EncodedStringRef::Utf8("foobar")), + Ordering::Equal + ); + } + + #[test] + fn no_advanced_unicode_folding() { + assert_ne!( + EncodedString::Utf8("Masse".into()).cmp_ref(EncodedStringRef::Utf8("Maße")), + Ordering::Equal + ); + } + + #[test] + fn unknown_encoding_pairs_do_not_try_to_ignore_cases() { + assert_ne!( + EncodedString::Utf8("Foo".into()).cmp_ref(EncodedStringRef::Unknown("foo".into())), + Ordering::Equal + ); + assert_ne!( + EncodedString::Unknown("Foo".into()).cmp_ref(EncodedStringRef::Utf8("foo")), + Ordering::Equal + ); + assert_ne!( + EncodedString::Unknown("Foo".into()).cmp_ref(EncodedStringRef::Unknown("foo".into())), + Ordering::Equal + ); + } +} diff --git a/git-mailmap/tests/fixtures/invalid.txt b/git-mailmap/tests/fixtures/invalid.txt new file mode 100644 index 00000000000..c579c5bd580 --- /dev/null +++ b/git-mailmap/tests/fixtures/invalid.txt @@ -0,0 +1,5 @@ +# comment + + +A-overwritten + +B +B-overwritten + +C old-C +C-overwritten old-C + + + diff --git a/git-mailmap/tests/fixtures/typical.txt b/git-mailmap/tests/fixtures/typical.txt new file mode 100644 index 00000000000..563b9ddd90a --- /dev/null +++ b/git-mailmap/tests/fixtures/typical.txt @@ -0,0 +1,8 @@ +# from the mailmap docs + +Joe R. Developer +Joe R. Developer Joe + +Jane Doe +Jane Doe +Jane Doe Jane diff --git a/git-mailmap/tests/mailmap.rs b/git-mailmap/tests/mailmap.rs new file mode 100644 index 00000000000..58111c7695e --- /dev/null +++ b/git-mailmap/tests/mailmap.rs @@ -0,0 +1,2 @@ +mod parse; +mod snapshot; diff --git a/git-mailmap/tests/parse/mod.rs b/git-mailmap/tests/parse/mod.rs new file mode 100644 index 00000000000..12166a2d615 --- /dev/null +++ b/git-mailmap/tests/parse/mod.rs @@ -0,0 +1,130 @@ +use git_mailmap::{parse, Entry}; +use git_testtools::fixture_bytes; + +#[test] +fn line_numbers_are_counted_correctly_in_errors() { + let input = fixture_bytes("invalid.txt"); + let mut actual = git_mailmap::parse(&input).collect::>().into_iter(); + assert_eq!(actual.len(), 2); + + let err = actual.next().expect("two items left").unwrap_err(); + assert!(matches!(err, parse::Error::Malformed { line_number: 3, .. })); + + let err = actual.next().expect("one item left").unwrap_err(); + assert!(matches!(err, parse::Error::UnconsumedInput { line_number: 5, .. })); +} + +#[test] +fn a_typical_mailmap() { + let input = fixture_bytes("typical.txt"); + let actual = git_mailmap::parse(&input).map(Result::unwrap).collect::>(); + assert_eq!( + actual, + vec![ + Entry::change_name_by_email("Joe R. Developer", "joe@example.com"), + Entry::change_name_and_email_by_name_and_email( + "Joe R. Developer", + "joe@example.com", + "Joe", + "bugs@example.com" + ), + Entry::change_name_and_email_by_email("Jane Doe", "jane@example.com", "jane@laptop.(none)"), + Entry::change_name_and_email_by_email("Jane Doe", "jane@example.com", "jane@desktop.(none)"), + Entry::change_name_and_email_by_name_and_email("Jane Doe", "jane@example.com", "Jane", "bugs@example.com"), + ] + ); +} + +#[test] +fn empty_lines_and_comments_are_ignored() { + assert!(git_mailmap::parse(b"# comment").next().is_none()); + assert!(git_mailmap::parse(b"\n\r\n\t\t \n").next().is_none()); + assert_eq!( + line(" # this is a name "), + Entry::change_name_by_email("# this is a name", "email"), + "whitespace before hashes counts as name though" + ); +} + +#[test] +fn windows_and_unix_line_endings_are_supported() { + let actual = git_mailmap::parse(b"a \n\r\nc ") + .map(Result::unwrap) + .collect::>(); + assert_eq!( + actual, + vec![ + Entry::change_name_by_email("a", "a@example.com"), + Entry::change_email_by_email("b-new", "b-old"), + Entry::change_name_by_email("c", "c@example.com") + ] + ); +} + +#[test] +fn valid_entries() { + assert_eq!( + line(" \t proper name "), + Entry::change_name_by_email("proper name", "commit-email") + ); + assert_eq!( + line(" \t "), + Entry::change_email_by_email("proper email", "commit-email") + ); + assert_eq!( + line(" proper name \t \t "), + Entry::change_name_and_email_by_email("proper name", "proper email", "commit-email") + ); + assert_eq!( + line(" proper name \tcommit name\t\t"), + Entry::change_name_and_email_by_name_and_email("proper name", "proper email", "commit name", "commit-email") + ); +} + +#[test] +fn error_if_there_is_just_a_name() { + assert!(matches!( + try_line("just a name"), + Err(parse::Error::UnconsumedInput { line_number: 1, .. }) + )); +} + +#[test] +fn error_if_there_is_just_an_email() { + assert!(matches!( + try_line(""), + Err(parse::Error::Malformed { line_number: 1, .. }) + )); + + assert!(matches!( + try_line(" \t "), + Err(parse::Error::Malformed { line_number: 1, .. }) + )); +} + +#[test] +fn error_if_email_is_empty() { + assert!(matches!( + try_line("hello <"), + Err(parse::Error::Malformed { line_number: 1, .. }) + )); + assert!(matches!( + try_line("hello < \t"), + Err(parse::Error::Malformed { line_number: 1, .. }) + )); + assert!(matches!( + try_line("hello < \t\r >"), + Err(parse::Error::Malformed { line_number: 1, .. }) + )); +} + +fn line(input: &str) -> Entry<'_> { + try_line(input).unwrap() +} + +fn try_line(input: &str) -> Result, parse::Error> { + let mut lines = git_mailmap::parse(input.as_bytes()); + let res = lines.next().expect("single line"); + assert!(lines.next().is_none(), "only one line provided"); + res +} diff --git a/git-mailmap/tests/snapshot/mod.rs b/git-mailmap/tests/snapshot/mod.rs new file mode 100644 index 00000000000..db52a924707 --- /dev/null +++ b/git-mailmap/tests/snapshot/mod.rs @@ -0,0 +1,127 @@ +use git_mailmap::Snapshot; +use git_testtools::fixture_bytes; + +#[test] +fn try_resolve() { + let snapshot = Snapshot::from_bytes(&fixture_bytes("typical.txt")); + assert_eq!( + snapshot.try_resolve(&signature("Foo", "Joe@example.com").to_ref()), + Some(signature("Joe R. Developer", "Joe@example.com")), + "resolved signatures contain all original fields, but normalizes only what's in the mapping, lookup is case-insensitive" + ); + assert_eq!( + snapshot.try_resolve(&signature("Joe", "bugs@example.com").to_ref()), + Some(signature("Joe R. Developer", "joe@example.com")), + "name and email can be mapped specifically" + ); + + assert_eq!( + snapshot.try_resolve(&signature("Jane", "jane@laptop.(none)").to_ref()), + Some(signature("Jane Doe", "jane@example.com")), + "fix name and email by email" + ); + assert_eq!( + snapshot.try_resolve(&signature("Jane", "jane@desktop.(none)").to_ref()), + Some(signature("Jane Doe", "jane@example.com")), + "fix name and email by other email" + ); + + assert_eq!( + snapshot.try_resolve(&signature("janE", "Bugs@example.com").to_ref()), + Some(signature("Jane Doe", "jane@example.com")), + "name and email can be mapped specifically, case insensitive matching of name" + ); + + let sig = signature("Jane", "other@example.com"); + assert_eq!(snapshot.try_resolve(&sig.to_ref()), None, "unmatched email"); + + assert_eq!( + snapshot.resolve(&sig.to_ref()), + sig, + "resolution always works here, returning a copy of the original" + ); + + let sig = signature("Jean", "bugs@example.com"); + assert_eq!( + snapshot.try_resolve(&sig.to_ref()), + None, + "matched email, unmatched name" + ); + assert_eq!(snapshot.resolve(&sig.to_ref()), sig); + + assert_eq!(snapshot.entries().len(), 5); +} + +#[test] +fn non_name_and_name_mappings_will_not_clash() { + let entries = vec![ + // add mapping from email + git_mailmap::Entry::change_name_and_email_by_email("new-name", "new-email", "old-email"), + // add mapping from name and email + git_mailmap::Entry::change_name_and_email_by_name_and_email( + "other-new-name", + "other-new-email", + "old-name", + "old-email", + ), + ]; + for entries in vec![entries.clone().into_iter().rev().collect::>(), entries] { + let snapshot = Snapshot::new(entries); + + assert_eq!( + snapshot.try_resolve(&signature("replace-by-email", "old-email").to_ref()), + Some(signature("new-name", "new-email")), + "it can match by email only" + ); + assert_eq!( + snapshot.try_resolve(&signature("old-name", "old-email").to_ref()), + Some(signature("other-new-name", "other-new-email")), + "it can match by email and name as well" + ); + + assert_eq!(snapshot.entries().len(), 2); + } +} + +#[test] +fn overwrite_entries() { + let snapshot = Snapshot::from_bytes(&fixture_bytes("overwrite.txt")); + assert_eq!( + snapshot.try_resolve(&signature("does not matter", "old-a-email").to_ref()), + Some(signature("A-overwritten", "old-a-email")), + "email only by email" + ); + + assert_eq!( + snapshot.try_resolve(&signature("to be replaced", "old-b-EMAIL").to_ref()), + Some(signature("B-overwritten", "new-b-email-overwritten")), + "name and email by email" + ); + + assert_eq!( + snapshot.try_resolve(&signature("old-c", "old-C-email").to_ref()), + Some(signature("C-overwritten", "new-c-email-overwritten")), + "name and email by name and email" + ); + + assert_eq!( + snapshot.try_resolve(&signature("unchanged", "old-d-email").to_ref()), + Some(signature("unchanged", "new-d-email-overwritten")), + "email by email" + ); + + assert_eq!(snapshot.entries().len(), 4); +} + +fn signature(name: &str, email: &str) -> git_actor::Signature { + git_actor::Signature { + name: name.into(), + email: email.into(), + time: git_actor::Time { + // marker + seconds_since_unix_epoch: 42, + offset_in_seconds: 53, + sign: git_actor::Sign::Minus, + }, + } +} diff --git a/git-repository/Cargo.toml b/git-repository/Cargo.toml index 58a924766f8..49d6350de29 100644 --- a/git-repository/Cargo.toml +++ b/git-repository/Cargo.toml @@ -41,7 +41,7 @@ one-stop-shop = [ "local", "local-time-support" ] #! ### Other ## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde1 = ["git-pack/serde1", "git-object/serde1", "git-protocol/serde1", "git-transport/serde1", "git-ref/serde1", "git-odb/serde1", "git-index/serde1"] +serde1 = ["git-pack/serde1", "git-object/serde1", "git-protocol/serde1", "git-transport/serde1", "git-ref/serde1", "git-odb/serde1", "git-index/serde1", "git-mailmap/serde1"] ## Activate other features that maximize performance, like usage of threads, `zlib-ng` and access to caching in object databases. ## **Note** that max-performance = ["git-features/parallel", "git-features/zlib-ng-compat", "git-pack/pack-cache-lru-static", "git-pack/pack-cache-lru-dynamic"] @@ -49,7 +49,7 @@ max-performance = ["git-features/parallel", "git-features/zlib-ng-compat", "git- local-time-support = ["git-actor/local-time-support"] ## Re-export stability tier 2 crates for convenience and make `Repository` struct fields with types from these crates publicly accessible. ## Doing so is less stable than the stability tier 1 that `git-repository` is a member of. -unstable = ["git-index", "git-worktree"] +unstable = ["git-index", "git-worktree", "git-mailmap"] ## Print debugging information about usage of object database caches, useful for tuning cache sizes. cache-efficiency-debug = ["git-features/cache-efficiency-debug"] @@ -73,6 +73,7 @@ git-traverse = { version = "^0.12.0", path = "../git-traverse" } git-protocol = { version = "^0.14.0", path = "../git-protocol", optional = true } git-transport = { version = "^0.15.0", path = "../git-transport", optional = true } git-diff = { version = "^0.13.0", path = "../git-diff", optional = true } +git-mailmap = { version = "0.0.0", path = "../git-mailmap", optional = true } git-features = { version = "^0.19.1", path = "../git-features", features = ["progress"] } # unstable only diff --git a/git-repository/src/lib.rs b/git-repository/src/lib.rs index 5fc2ef6fb51..eb0a52d9227 100644 --- a/git-repository/src/lib.rs +++ b/git-repository/src/lib.rs @@ -89,6 +89,7 @@ //! * [`bstr`][bstr] //! * [`index`] //! * [`worktree`] +//! * [`mailmap`] //! * [`objs`] //! * [`odb`] //! * [`pack`][odb::pack] @@ -239,6 +240,29 @@ mod config { } } +/// +pub mod mailmap { + #[cfg(all(feature = "unstable", feature = "git-worktree"))] + pub use git_mailmap::*; + + /// + pub mod load { + /// The error returned by [`crate::Repository::load_mailmap_into()`]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("The mailmap file declared in `mailmap.file` could not be read")] + Io(#[from] std::io::Error), + #[error("The configured mailmap.blob could not be parsed")] + BlobSpec(#[from] git_hash::decode::Error), + #[error(transparent)] + PathInterpolate(#[from] git_config::values::path::interpolate::Error), + #[error("Could not find object configured in `mailmap.blob`")] + FindExisting(#[from] crate::object::find::existing::OdbError), + } + } +} + /// pub mod init { use std::{convert::TryInto, path::Path}; diff --git a/git-repository/src/repository/location.rs b/git-repository/src/repository/location.rs index d00c2051ef5..0d05f779773 100644 --- a/git-repository/src/repository/location.rs +++ b/git-repository/src/repository/location.rs @@ -9,6 +9,11 @@ impl crate::Repository { self.work_tree.as_deref() } + /// The directory of the binary path of the current process. + pub fn install_directory(&self) -> std::io::Result { + std::env::current_exe() + } + /// Return the kind of repository, either bare or one with a work tree. pub fn kind(&self) -> crate::Kind { match self.work_tree { @@ -23,23 +28,4 @@ impl crate::Repository { pub fn git_dir(&self) -> &std::path::Path { self.refs.base() } - - // TODO: tests - /// Load the index file of this repository's workspace, if present. - /// - /// Note that it is loaded into memory each time this method is called, but also is independent of the workspace. - #[cfg(feature = "git-index")] - pub fn load_index(&self) -> Option> { - // TODO: choose better/correct options - let opts = git_index::decode::Options { - object_hash: self.object_hash, - thread_limit: None, - min_extension_block_in_bytes_for_threading: 1024 * 256, - }; - match git_index::File::at(self.git_dir().join("index"), opts) { - Ok(index) => Some(Ok(index)), - Err(git_index::file::init::Error::Io(err)) if err.kind() == std::io::ErrorKind::NotFound => None, - Err(err) => Some(Err(err)), - } - } } diff --git a/git-repository/src/repository/mod.rs b/git-repository/src/repository/mod.rs index 7a2253d912a..e13f2483387 100644 --- a/git-repository/src/repository/mod.rs +++ b/git-repository/src/repository/mod.rs @@ -78,6 +78,8 @@ mod init { mod location; +mod snapshots; + mod trait_impls; mod cache; diff --git a/git-repository/src/repository/snapshots.rs b/git-repository/src/repository/snapshots.rs new file mode 100644 index 00000000000..f6d6778331e --- /dev/null +++ b/git-repository/src/repository/snapshots.rs @@ -0,0 +1,106 @@ +impl crate::Repository { + // TODO: tests + /// Load the index file of this repository's workspace, if present. + /// + /// Note that it is loaded into memory each time this method is called, but also is independent of the workspace. + #[cfg(feature = "git-index")] + pub fn load_index(&self) -> Option> { + // TODO: choose better/correct options + let opts = git_index::decode::Options { + object_hash: self.object_hash, + thread_limit: None, + min_extension_block_in_bytes_for_threading: 1024 * 256, + }; + match git_index::File::at(self.git_dir().join("index"), opts) { + Ok(index) => Some(Ok(index)), + Err(git_index::file::init::Error::Io(err)) if err.kind() == std::io::ErrorKind::NotFound => None, + Err(err) => Some(Err(err)), + } + } + + // TODO: tests + /// Try to merge mailmaps from the following locations into `target`: + /// + /// - read the `.mailmap` file without following symlinks from the working tree, if present + /// - OR read `HEAD:.mailmap` if this repository is bare (i.e. has no working tree), if the `mailmap.blob` is not set. + /// - read the mailmap as configured in `mailmap.blob`, if set. + /// - read the file as configured by `mailmap.file`, following symlinks, if set. + /// + /// Only the first error will be reported, and as many source mailmaps will be merged into `target` as possible. + /// Parsing errors will be ignored. + #[cfg(feature = "git-mailmap")] + pub fn load_mailmap_into(&self, target: &mut git_mailmap::Snapshot) -> Result<(), crate::mailmap::load::Error> { + let mut err = None::; + let mut buf = Vec::new(); + let mut blob_id = self + .config + .get_raw_value("mailmap", None, "blob") + .ok() + .and_then(|spec| { + // TODO: actually resolve this as spec (once we can do that) + git_hash::ObjectId::from_hex(spec.as_ref()) + .map_err(|e| err.get_or_insert(e.into())) + .ok() + }); + match self.work_tree() { + None => { + // TODO: replace with ref-spec `HEAD:.mailmap` for less verbose way of getting the blob id + blob_id = blob_id.or_else(|| { + self.head().ok().and_then(|mut head| { + let commit = head.peel_to_commit_in_place().ok()?; + let tree = commit.tree().ok()?; + tree.lookup_path(std::iter::once(".mailmap")).ok()?.map(|e| e.oid) + }) + }); + } + Some(root) => { + if let Ok(mut file) = git_features::fs::open_options_no_follow() + .read(true) + .open(root.join(".mailmap")) + .map_err(|e| { + if e.kind() != std::io::ErrorKind::NotFound { + err.get_or_insert(e.into()); + } + }) + { + buf.clear(); + std::io::copy(&mut file, &mut buf) + .map_err(|e| err.get_or_insert(e.into())) + .ok(); + target.merge(git_mailmap::parse_ignore_errors(&buf)); + } + } + } + + if let Some(blob) = blob_id.and_then(|id| self.find_object(id).map_err(|e| err.get_or_insert(e.into())).ok()) { + target.merge(git_mailmap::parse_ignore_errors(&blob.data)); + } + + let configured_path = self + .config + .value::>("mailmap", None, "file") + .ok() + .and_then(|path| { + let install_dir = self.install_directory().ok()?; + match path.interpolate(Some(install_dir.as_path())) { + Ok(path) => Some(path), + Err(e) => { + err.get_or_insert(e.into()); + None + } + } + }); + + if let Some(mut file) = + configured_path.and_then(|path| std::fs::File::open(path).map_err(|e| err.get_or_insert(e.into())).ok()) + { + buf.clear(); + std::io::copy(&mut file, &mut buf) + .map_err(|e| err.get_or_insert(e.into())) + .ok(); + target.merge(git_mailmap::parse_ignore_errors(&buf)); + } + + err.map(Err).unwrap_or(Ok(())) + } +} diff --git a/git-worktree/Cargo.toml b/git-worktree/Cargo.toml index fb1d7268b65..688ec6dc470 100644 --- a/git-worktree/Cargo.toml +++ b/git-worktree/Cargo.toml @@ -42,9 +42,6 @@ bstr = { version = "0.2.13", default-features = false } document-features = { version = "0.2.0", optional = true } io-close = "0.3.7" -[target.'cfg(unix)'.dependencies] -libc = "0.2.119" - [dev-dependencies] git-testtools = { path = "../tests/tools" } git-odb = { path = "../git-odb" } diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index bc55df893b5..2cdb507c914 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -155,19 +155,11 @@ fn open_options(path: &Path, destination_is_initially_empty: bool, overwrite_exi if overwrite_existing || !destination_is_initially_empty { debug_assert_dest_is_no_symlink(path); } - let mut options = OpenOptions::new(); + let mut options = git_features::fs::open_options_no_follow(); options .create_new(destination_is_initially_empty && !overwrite_existing) .create(!destination_is_initially_empty || overwrite_existing) .write(true); - #[cfg(unix)] - { - /// Make sure that it's impossible to follow through to the target of symlinks. - /// Note that this will still follow symlinks in the path, which is what we assume - /// has been checked separately. - use std::os::unix::fs::OpenOptionsExt; - options.custom_flags(libc::O_NOFOLLOW); - } options } diff --git a/gitoxide-core/src/lib.rs b/gitoxide-core/src/lib.rs index ed27b601f77..30af836bfce 100644 --- a/gitoxide-core/src/lib.rs +++ b/gitoxide-core/src/lib.rs @@ -46,6 +46,7 @@ pub mod commitgraph; #[cfg(feature = "estimate-hours")] pub mod hours; pub mod index; +pub mod mailmap; #[cfg(feature = "organize")] pub mod organize; pub mod pack; diff --git a/gitoxide-core/src/mailmap.rs b/gitoxide-core/src/mailmap.rs new file mode 100644 index 00000000000..95c6abccbb7 --- /dev/null +++ b/gitoxide-core/src/mailmap.rs @@ -0,0 +1,42 @@ +use crate::OutputFormat; +use anyhow::{bail, Context}; +use git_repository as git; +use std::collections::HashSet; +use std::io::Write; +use std::path::Path; + +pub const PROGRESS_RANGE: std::ops::RangeInclusive = 1..=2; + +pub fn verify(path: impl AsRef, format: OutputFormat, mut out: impl Write) -> anyhow::Result<()> { + if format != OutputFormat::Human { + bail!("Only 'human' format is currently supported"); + } + let path = path.as_ref(); + let buf = std::fs::read(path).with_context(|| format!("Failed to read mailmap file at '{}'", path.display()))?; + let mut err_count = 0; + for err in git::mailmap::parse(&buf).filter_map(Result::err) { + err_count += 1; + writeln!(out, "{}", err)?; + } + + let mut seen = HashSet::<(_, _)>::default(); + for entry in git::mailmap::parse(&buf).filter_map(Result::ok) { + if !seen.insert((entry.old_email(), entry.old_name())) { + writeln!( + out, + "NOTE: entry ({:?}, {:?}) -> ({:?}, {:?}) is being overwritten", + entry.old_email(), + entry.old_name(), + entry.new_email(), + entry.new_name() + )?; + } + } + + if err_count == 0 { + writeln!(out, "{} lines OK", git::mailmap::parse(&buf).count())?; + Ok(()) + } else { + bail!("{} lines in '{}' could not be parsed", err_count, path.display()); + } +} diff --git a/gitoxide-core/src/repository/mailmap.rs b/gitoxide-core/src/repository/mailmap.rs new file mode 100644 index 00000000000..96ddb1a9c0e --- /dev/null +++ b/gitoxide-core/src/repository/mailmap.rs @@ -0,0 +1,52 @@ +use crate::OutputFormat; +use git_repository as git; +use git_repository::bstr::ByteSlice; +use git_repository::mailmap::Entry; +use std::io; +use std::path::PathBuf; + +#[cfg(feature = "serde1")] +#[cfg_attr(feature = "serde1", derive(serde::Serialize))] +struct JsonEntry { + new_name: Option, + new_email: Option, + old_name: Option, + old_email: String, +} + +#[cfg(feature = "serde1")] +impl<'a> From> for JsonEntry { + fn from(v: Entry<'a>) -> Self { + JsonEntry { + new_name: v.new_name().map(|s| s.to_str_lossy().into_owned()), + new_email: v.new_email().map(|s| s.to_str_lossy().into_owned()), + old_name: v.old_name().map(|s| s.to_str_lossy().into_owned()), + old_email: v.old_email().to_str_lossy().into_owned(), + } + } +} + +pub fn entries( + repository: PathBuf, + format: OutputFormat, + out: impl io::Write, + mut err: impl io::Write, +) -> anyhow::Result<()> { + if format == OutputFormat::Human { + writeln!(err, "Defaulting to JSON as human format isn't implemented").ok(); + } + + let repo = git::open(repository)?.apply_environment(); + let mut mailmap = git::mailmap::Snapshot::default(); + if let Err(e) = repo.load_mailmap_into(&mut mailmap) { + writeln!(err, "Error while loading mailmap, the first error is: {}", e).ok(); + } + + #[cfg(feature = "serde1")] + serde_json::to_writer_pretty( + out, + &mailmap.entries().into_iter().map(JsonEntry::from).collect::>(), + )?; + + Ok(()) +} diff --git a/gitoxide-core/src/repository/mod.rs b/gitoxide-core/src/repository/mod.rs index 0ebbe799e36..afd9e668a42 100644 --- a/gitoxide-core/src/repository/mod.rs +++ b/gitoxide-core/src/repository/mod.rs @@ -12,3 +12,5 @@ pub mod tree; pub mod verify; pub mod odb; + +pub mod mailmap; diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index dd72bd6f48c..56735c157e7 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -12,6 +12,7 @@ use clap::Parser; use gitoxide_core as core; use gitoxide_core::pack::verify; +use crate::plumbing::options::mailmap; use crate::plumbing::options::pack::multi_index; #[cfg(any(feature = "gitoxide-core-async-client", feature = "gitoxide-core-blocking-client"))] use crate::plumbing::options::remote; @@ -74,6 +75,16 @@ pub fn main() -> Result<()> { })?; match cmd { + Subcommands::Mailmap(mailmap::Platform { path, cmd }) => match cmd { + mailmap::Subcommands::Verify => prepare_and_run( + "mailmap-verify", + verbose, + progress, + progress_keep_open, + core::mailmap::PROGRESS_RANGE, + move |_progress, out, _err| core::mailmap::verify(path, format, out), + ), + }, Subcommands::Index(index::Platform { object_hash, index_path, @@ -147,6 +158,16 @@ pub fn main() -> Result<()> { ), }, Subcommands::Repository(repo::Platform { repository, cmd }) => match cmd { + repo::Subcommands::Mailmap { cmd } => match cmd { + repo::mailmap::Subcommands::Entries => prepare_and_run( + "repository-mailmap-entries", + verbose, + progress, + progress_keep_open, + None, + move |_progress, out, err| core::repository::mailmap::entries(repository, format, out, err), + ), + }, repo::Subcommands::Odb { cmd } => match cmd { repo::odb::Subcommands::Entries => prepare_and_run( "repository-odb-entries", diff --git a/src/plumbing/options.rs b/src/plumbing/options.rs index 4a75745dcf6..c329fafd8d9 100644 --- a/src/plumbing/options.rs +++ b/src/plumbing/options.rs @@ -60,6 +60,8 @@ pub enum Subcommands { Index(index::Platform), /// Subcommands for interacting with entire git repositories Repository(repo::Platform), + /// Subcommands for interacting with mailmaps + Mailmap(mailmap::Platform), } /// @@ -358,6 +360,19 @@ pub mod repo { #[clap(subcommand)] cmd: odb::Subcommands, }, + /// Interact with the mailmap. + Mailmap { + #[clap(subcommand)] + cmd: mailmap::Subcommands, + }, + } + + pub mod mailmap { + #[derive(Debug, clap::Subcommand)] + pub enum Subcommands { + /// Print all entries in configured mailmaps, inform about errors as well. + Entries, + } } pub mod odb { @@ -448,6 +463,28 @@ pub mod index { } } +/// +pub mod mailmap { + use std::path::PathBuf; + + #[derive(Debug, clap::Parser)] + pub struct Platform { + /// The path to the mailmap file. + #[clap(short = 'p', long, default_value = ".mailmap")] + pub path: PathBuf, + + /// Subcommands + #[clap(subcommand)] + pub cmd: Subcommands, + } + + #[derive(Debug, clap::Subcommand)] + pub enum Subcommands { + /// Parse all entries in the mailmap and report malformed lines or collisions. + Verify, + } +} + /// pub mod commitgraph { use std::path::PathBuf; diff --git a/tests/tools/src/lib.rs b/tests/tools/src/lib.rs index c3123ce2faa..f39132cacad 100644 --- a/tests/tools/src/lib.rs +++ b/tests/tools/src/lib.rs @@ -26,6 +26,12 @@ pub fn hex_to_id(hex: &str) -> git_hash::ObjectId { pub fn fixture_path(path: impl AsRef) -> PathBuf { PathBuf::from("tests").join("fixtures").join(path.as_ref()) } +pub fn fixture_bytes(path: impl AsRef) -> Vec { + match std::fs::read(fixture_path(path.as_ref())) { + Ok(res) => res, + Err(_) => panic!("File at '{}' not found", path.as_ref().display()), + } +} pub fn scripted_fixture_repo_read_only( script_name: impl AsRef, ) -> std::result::Result> {