Skip to content

Commit

Permalink
Merge branch 'fixes-for-crates-index-diff'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Nov 8, 2022
2 parents cf04218 + efe0a51 commit 255be4d
Show file tree
Hide file tree
Showing 16 changed files with 186 additions and 113 deletions.
3 changes: 3 additions & 0 deletions git-diff/src/blob.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//! For using text diffs, please have a look at the [`imara-diff` documentation](https://docs.rs/imara-diff),
//! maintained by [Pascal Kuthe](https://github.com/pascalkuthe).
pub use imara_diff::*;
2 changes: 1 addition & 1 deletion git-diff/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
pub mod tree;

///
pub mod text;
pub mod blob;
26 changes: 0 additions & 26 deletions git-diff/src/text.rs

This file was deleted.

2 changes: 2 additions & 0 deletions git-diff/tests/blob/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#[test]
fn currently_there_is_no_api_surface_to_test_as_it_is_reexporting_imara_diff() {}
3 changes: 2 additions & 1 deletion git-diff/tests/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ pub type Result<T = ()> = std::result::Result<T, Box<dyn std::error::Error>>;

pub use git_testtools::hex_to_id;

mod visit;
mod blob;
mod tree;
File renamed without changes.
1 change: 0 additions & 1 deletion git-index/tests/index/file/write.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use filetime::FileTime;
use git_index::{entry, extension, verify::extensions::no_find, write, write::Options, State, Version};
use git_repository as git;

use crate::{fixture_index_path, index::Fixture::*, loose_file_path};

Expand Down
12 changes: 6 additions & 6 deletions git-repository/src/config/cache/access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::{

/// Access
impl Cache {
pub(crate) fn diff_algorithm(&self) -> Result<git_diff::text::Algorithm, crate::config::diff::algorithm::Error> {
pub(crate) fn diff_algorithm(&self) -> Result<git_diff::blob::Algorithm, crate::config::diff::algorithm::Error> {
use crate::config::diff::algorithm::Error;
self.diff_algorithm
.get_or_try_init(|| {
Expand All @@ -20,14 +20,14 @@ impl Cache {
.string("diff", None, "algorithm")
.unwrap_or_else(|| Cow::Borrowed("myers".into()));
if name.eq_ignore_ascii_case(b"myers") || name.eq_ignore_ascii_case(b"default") {
Ok(git_diff::text::Algorithm::Myers)
Ok(git_diff::blob::Algorithm::Myers)
} else if name.eq_ignore_ascii_case(b"minimal") {
Ok(git_diff::text::Algorithm::MyersMinimal)
Ok(git_diff::blob::Algorithm::MyersMinimal)
} else if name.eq_ignore_ascii_case(b"histogram") {
Ok(git_diff::text::Algorithm::Histogram)
Ok(git_diff::blob::Algorithm::Histogram)
} else if name.eq_ignore_ascii_case(b"patience") {
if self.lenient_config {
Ok(git_diff::text::Algorithm::Histogram)
Ok(git_diff::blob::Algorithm::Histogram)
} else {
Err(Error::Unimplemented {
name: name.into_owned(),
Expand All @@ -39,7 +39,7 @@ impl Cache {
})
}
};
check_lenient_default(res, self.lenient_config, || git_diff::text::Algorithm::Myers)
check_lenient_default(res, self.lenient_config, || git_diff::blob::Algorithm::Myers)
})
.copied()
}
Expand Down
2 changes: 1 addition & 1 deletion git-repository/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ pub(crate) struct Cache {
#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))]
pub(crate) url_scheme: OnceCell<remote::url::SchemePermission>,
/// The algorithm to use when diffing blobs
pub(crate) diff_algorithm: OnceCell<git_diff::text::Algorithm>,
pub(crate) diff_algorithm: OnceCell<git_diff::blob::Algorithm>,
/// The config section filter from the options used to initialize this instance. Keep these in sync!
filter_config_section: fn(&git_config::file::Metadata) -> bool,
/// The object kind to pick if a prefix is ambiguous.
Expand Down
148 changes: 148 additions & 0 deletions git-repository/src/object/blob.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
///
pub mod diff {
use crate::bstr::ByteSlice;
use crate::object::blob::diff::line::Change;
use std::ops::Range;

/// A platform to keep temporary information to perform line diffs on modified blobs.
///
pub struct Platform<'old, 'new> {
/// The previous version of the blob.
pub old: crate::Object<'old>,
/// The new version of the blob.
pub new: crate::Object<'new>,
/// The algorithm to use when calling [imara_diff::diff()][git_diff::blob::diff()].
/// This value is determined by the `diff.algorithm` configuration.
pub algo: git_diff::blob::Algorithm,
}

///
pub mod init {
/// The error returned by [`Platform::from_ids()`][super::Platform::from_ids()].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("Could not find the previous blob or the new blob to diff against")]
FindExisting(#[from] crate::object::find::existing::Error),
#[error("Could not obtain diff algorithm from configuration")]
DiffAlgorithm(#[from] crate::config::diff::algorithm::Error),
}
}

impl<'old, 'new> Platform<'old, 'new> {
/// Produce a platform for performing various diffs after obtaining the object data of `previous_id` and `new_id`.
///
/// Note that these objects are treated as raw data and are assumed to be blobs.
pub fn from_ids(
previous_id: &crate::Id<'old>,
new_id: &crate::Id<'new>,
) -> Result<Platform<'old, 'new>, init::Error> {
match previous_id
.object()
.and_then(|old| new_id.object().map(|new| (old, new)))
{
Ok((old, new)) => {
let algo = match new_id.repo.config.diff_algorithm() {
Ok(algo) => algo,
Err(err) => return Err(err.into()),
};
Ok(Platform { old, new, algo })
}
Err(err) => Err(err.into()),
}
}
}

///
pub mod line {
use crate::bstr::BStr;

/// A change to a hunk of lines.
pub enum Change<'a, 'data> {
/// Lines were added.
Addition {
/// The lines themselves without terminator.
lines: &'a [&'data BStr],
},
/// Lines were removed.
Deletion {
/// The lines themselves without terminator.
lines: &'a [&'data BStr],
},
/// Lines have been replaced.
Modification {
/// The replaced lines without terminator.
lines_before: &'a [&'data BStr],
/// The new lines without terminator.
lines_after: &'a [&'data BStr],
},
}
}

impl<'old, 'new> Platform<'old, 'new> {
/// Perform a diff on lines between the old and the new version of a blob, passing each hunk of lines to `process_hunk`.
/// The diffing algorithm is determined by the `diff.algorithm` configuration.
///
/// Note that you can invoke the diff more flexibly as well.
// TODO: more tests (only tested insertion right now)
pub fn lines<FnH, E>(&self, mut process_hunk: FnH) -> Result<(), E>
where
FnH: FnMut(line::Change<'_, '_>) -> Result<(), E>,
E: std::error::Error,
{
let input = git_diff::blob::intern::InternedInput::new(self.old.data.as_bytes(), self.new.data.as_bytes());
let mut err = None;
let mut lines = Vec::new();
git_diff::blob::diff(self.algo, &input, |before: Range<u32>, after: Range<u32>| {
if err.is_some() {
return;
}
lines.clear();
lines.extend(
input.before[before.start as usize..before.end as usize]
.iter()
.map(|&line| input.interner[line].as_bstr()),
);
let end_of_before = lines.len();
lines.extend(
input.after[after.start as usize..after.end as usize]
.iter()
.map(|&line| input.interner[line].as_bstr()),
);
let hunk_before = &lines[..end_of_before];
let hunk_after = &lines[end_of_before..];
if hunk_after.is_empty() {
err = process_hunk(Change::Deletion { lines: hunk_before }).err();
} else if hunk_before.is_empty() {
err = process_hunk(Change::Addition { lines: hunk_after }).err();
} else {
err = process_hunk(Change::Modification {
lines_before: hunk_before,
lines_after: hunk_after,
})
.err();
}
});

match err {
Some(err) => Err(err),
None => Ok(()),
}
}

/// Count the amount of removed and inserted lines efficiently.
pub fn line_counts(&self) -> git_diff::blob::sink::Counter<()> {
let tokens = self.line_tokens();
git_diff::blob::diff(self.algo, &tokens, git_diff::blob::sink::Counter::default())
}

/// Return a tokenizer which treats lines as smallest unit for use in a [diff operation][git_diff::blob::diff()].
///
/// The line separator is determined according to normal git rules and filters.
pub fn line_tokens(&self) -> git_diff::blob::intern::InternedInput<&[u8]> {
// TODO: make use of `core.eol` and/or filters to do line-counting correctly. It's probably
// OK to just know how these objects are saved to know what constitutes a line.
git_diff::blob::intern::InternedInput::new(self.old.data.as_bytes(), self.new.data.as_bytes())
}
}
}
2 changes: 2 additions & 0 deletions git-repository/src/object/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ pub(crate) mod cache {
}
pub use errors::{conversion, find, write};
///
pub mod blob;
///
pub mod commit;
mod impls;
pub mod peel;
Expand Down
73 changes: 5 additions & 68 deletions git-repository/src/object/tree/diff/change.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use git_object::tree::EntryMode;

use crate::{bstr::ByteSlice, Id, Repository};
use crate::Id;

/// An event emitted when finding differences between two trees.
#[derive(Debug, Clone, Copy)]
Expand Down Expand Up @@ -34,83 +34,20 @@ pub enum Event<'old, 'new> {
},
}

/// A platform to keep temporary information to perform line diffs.
pub struct DiffPlatform<'old, 'new> {
old: crate::Object<'old>,
new: crate::Object<'new>,
algo: git_diff::text::Algorithm,
}

impl<'old, 'new> Event<'old, 'new> {
fn repo(&self) -> &Repository {
match self {
Event::Addition { id, .. } => id.repo,
Event::Deletion { id, .. } => id.repo,
Event::Modification { id, .. } => id.repo,
}
}
}

///
pub mod event {
///
pub mod diff {
/// The error returned by [`Event::diff()`][super::super::Event::diff()].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("Could not find the previous object to diff against")]
FindPrevious(#[from] crate::object::find::existing::Error),
#[error("Could not obtain diff algorithm from configuration")]
DiffAlgorithm(#[from] crate::config::diff::algorithm::Error),
}
}
}

impl<'old, 'new> Event<'old, 'new> {
/// Produce a platform for performing a line-diff, or `None` if this is not a [`Modification`][Event::Modification]
/// or one of the entries to compare is not a blob.
pub fn diff(&self) -> Option<Result<DiffPlatform<'old, 'new>, event::diff::Error>> {
pub fn diff(
&self,
) -> Option<Result<crate::object::blob::diff::Platform<'old, 'new>, crate::object::blob::diff::init::Error>> {
match self {
Event::Modification {
previous_entry_mode: EntryMode::BlobExecutable | EntryMode::Blob,
previous_id,
entry_mode: EntryMode::BlobExecutable | EntryMode::Blob,
id,
} => match previous_id.object().and_then(|old| id.object().map(|new| (old, new))) {
Ok((old, new)) => {
let algo = match self.repo().config.diff_algorithm() {
Ok(algo) => algo,
Err(err) => return Some(Err(err.into())),
};
Some(Ok(DiffPlatform { old, new, algo }))
}
Err(err) => Some(Err(err.into())),
},
} => Some(crate::object::blob::diff::Platform::from_ids(previous_id, id)),
_ => None,
}
}
}

impl<'old, 'new> DiffPlatform<'old, 'new> {
/// Perform a diff on lines between the old and the new version of a blob.
/// The algorithm is determined by the `diff.algorithm` configuration.
/// Note that the [`Sink`][git_diff::text::imara::Sink] implementation is
/// what makes the diff usable and relies heavily on what the caller requires, as created by `make_sink`.
pub fn lines<FnS, S>(&self, new_sink: FnS) -> S::Out
where
FnS: for<'a> FnOnce(&git_diff::text::imara::intern::InternedInput<&'a [u8]>) -> S,
S: git_diff::text::imara::Sink,
{
git_diff::text::with(
self.old.data.as_bstr(),
self.new.data.as_bstr(),
self.algo,
// TODO: make use of `core.eol` and/or filters to do line-counting correctly. It's probably
// OK to just know how these objects are saved to know what constitutes a line.
git_diff::text::imara::intern::InternedInput::new,
new_sink,
)
.1
}
}
2 changes: 2 additions & 0 deletions git-repository/tests/object/blob.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// TODO: needs repos with specific known objects for proper testing
mod diff {}
1 change: 1 addition & 0 deletions git-repository/tests/object/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod blob;
mod commit;
mod tree;

Expand Down
18 changes: 12 additions & 6 deletions git-repository/tests/object/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod diff {

use git_object::{bstr::ByteSlice, tree::EntryMode};
use git_repository as git;
use git_repository::object::blob::diff::line::Change;
use git_repository::object::tree::diff::change::Event;

use crate::named_repo;
Expand Down Expand Up @@ -30,14 +31,19 @@ mod diff {
Event::Deletion { .. } | Event::Addition { .. } => unreachable!("only modification is expected"),
};

let count = change
.event
.diff()
.expect("changed file")
.expect("objects available")
.lines(|_| git_diff::text::imara::sink::Counter::default());
let diff = change.event.diff().expect("changed file").expect("objects available");
let count = diff.line_counts();
assert_eq!(count.insertions, 1);
assert_eq!(count.removals, 0);
diff.lines(|hunk| {
match hunk {
Change::Deletion { .. } => unreachable!("there was no deletion"),
Change::Addition { lines } => assert_eq!(lines, vec!["a1".as_bytes().as_bstr()]),
Change::Modification { .. } => unreachable!("there was no modification"),
};
Ok::<_, Infallible>(())
})
.expect("infallible");
Ok(Default::default())
})
.unwrap();
Expand Down

0 comments on commit 255be4d

Please sign in to comment.