From 94dac4bfe89c2e4e53fe3f0963ab1a345e71ed37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Thu, 30 Oct 2025 13:10:10 +0100 Subject: [PATCH 1/4] Add script to generate test-cases for `gix-diff` to validate that diff-algorithms match (diff-slider problem) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We assume that most people would expect the output of `gix-diff` to be identical to that of `git diff`. Therefore, we want an easy way to compare the output of `gix-diff` and `git diff` on a large corpus of diffs to make sure they match. This commit tries to make as much of that process scripted and as easily reproducible as possible. There is a repository that contains “\[t\]ools for experimenting \[with\] diff "slider" heuristics”: [diff-slider-tools]. We can use `diff-slider-tools` to, for any git repository, generate a list of locations where a diff between two files is not unambiguous. This commit creates a tool that takes this a list of locations generated by `diff-slider-tools` and turns it into test cases that can be run as part of `gix-diff`’s test suite. This enables us to, whenever we want, run large-scale tests comparing `gix-diff` to `gix diff`, hopefully uncovering any edge case we might have missed in our slider heuristic and making sure we conform to our users’ expectations. [diff-slider-tools]: https://github.com/mhagger/diff-slider-tools 1. Follow these instructions to generate a file containing sliders: https://github.com/mhagger/diff-slider-tools/blob/b59ed13d7a2a6cfe14a8f79d434b6221cc8b04dd/README.md?plain=1#L122-L146 2. Run `create-diff-cases` to create the script in `gix-diff/tests/fixtures/`. The script which will be called `make_diff_for_sliders_repo.sh`. ``` # run inside `gitoxide` cargo run --package internal-tools -- create-diff-cases --sliders-file $DIFF_SLIDER_TOOLS/corpus/git.sliders --worktree-dir $DIFF_SLIDER_TOOLS/corpus/git.git/ --destination-dir gix-diff/tests/fixtures/ ``` 3. Run `cargo test sliders -- --nocapture` inside `gix-diff/tests` to run the actual tests. --- gix-diff/src/blob/unified_diff/impls.rs | 3 +- gix-diff/tests/diff/blob/mod.rs | 1 + gix-diff/tests/diff/blob/slider.rs | 258 ++++++++++++++++++ .../fixtures/make_diff_for_sliders_repo.sh | 23 ++ tests/it/src/args.rs | 28 ++ tests/it/src/commands/create_diff_cases.rs | 110 ++++++++ tests/it/src/commands/mod.rs | 3 + tests/it/src/main.rs | 8 + 8 files changed, 433 insertions(+), 1 deletion(-) create mode 100644 gix-diff/tests/diff/blob/slider.rs create mode 100755 gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh create mode 100644 tests/it/src/commands/create_diff_cases.rs diff --git a/gix-diff/src/blob/unified_diff/impls.rs b/gix-diff/src/blob/unified_diff/impls.rs index 959560fba7f..16512f3ebfa 100644 --- a/gix-diff/src/blob/unified_diff/impls.rs +++ b/gix-diff/src/blob/unified_diff/impls.rs @@ -260,7 +260,8 @@ where } impl DiffLineKind { - const fn to_prefix(self) -> char { + /// Returns a one-character representation for use in unified diffs. + pub const fn to_prefix(self) -> char { match self { DiffLineKind::Context => ' ', DiffLineKind::Add => '+', diff --git a/gix-diff/tests/diff/blob/mod.rs b/gix-diff/tests/diff/blob/mod.rs index 1959c4e6fdb..5742dfd4350 100644 --- a/gix-diff/tests/diff/blob/mod.rs +++ b/gix-diff/tests/diff/blob/mod.rs @@ -1,3 +1,4 @@ pub(crate) mod pipeline; mod platform; +mod slider; mod unified_diff; diff --git a/gix-diff/tests/diff/blob/slider.rs b/gix-diff/tests/diff/blob/slider.rs new file mode 100644 index 00000000000..e23c036cd1f --- /dev/null +++ b/gix-diff/tests/diff/blob/slider.rs @@ -0,0 +1,258 @@ +use std::{iter::Peekable, path::PathBuf}; + +use gix_diff::blob::{ + intern::TokenSource, + unified_diff::{ConsumeHunk, ContextSize, HunkHeader}, + Algorithm, UnifiedDiff, +}; +use gix_object::bstr::{self, BString, ByteVec}; + +#[derive(Debug, PartialEq)] +struct DiffHunk { + header: HunkHeader, + lines: BString, +} + +struct DiffHunkRecorder { + inner: Vec, +} + +impl DiffHunkRecorder { + fn new() -> Self { + Self { inner: Vec::new() } + } +} + +impl ConsumeHunk for DiffHunkRecorder { + type Out = Vec; + + fn consume_hunk( + &mut self, + header: HunkHeader, + lines: &[(gix_diff::blob::unified_diff::DiffLineKind, &[u8])], + ) -> std::io::Result<()> { + let mut buf = Vec::new(); + + for &(kind, line) in lines { + buf.push(kind.to_prefix() as u8); + buf.extend_from_slice(line); + buf.push(b'\n'); + } + + let diff_hunk = DiffHunk { + header, + lines: buf.into(), + }; + + self.inner.push(diff_hunk); + + Ok(()) + } + + fn finish(self) -> Self::Out { + self.inner + } +} + +struct Baseline<'a> { + lines: Peekable>, +} + +mod baseline { + use std::path::Path; + + use gix_diff::blob::unified_diff::HunkHeader; + use gix_object::bstr::ByteSlice; + + use super::{Baseline, DiffHunk}; + + static START_OF_HEADER: &[u8; 4] = b"@@ -"; + + impl Baseline<'_> { + pub fn collect(baseline_path: impl AsRef) -> std::io::Result> { + let content = std::fs::read(baseline_path)?; + + let mut baseline = Baseline { + lines: content.lines().peekable(), + }; + + baseline.skip_header(); + + Ok(baseline.collect()) + } + + fn skip_header(&mut self) { + // diff --git a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + // index ccccccc..ddddddd 100644 + // --- a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + // +++ b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + + let line = self.lines.next().expect("line to be present"); + assert!(line.starts_with(b"diff --git ")); + + let line = self.lines.next().expect("line to be present"); + assert!(line.starts_with(b"index ")); + + let line = self.lines.next().expect("line to be present"); + assert!(line.starts_with(b"--- ")); + + let line = self.lines.next().expect("line to be present"); + assert!(line.starts_with(b"+++ ")); + } + + /// Parse diff hunk headers that conform to the unified diff hunk header format. + /// + /// The parser is very primitive and relies on the fact that `+18` is parsed as `18`. This + /// allows us to split the input on ` ` and `,` only. + /// + /// @@ -18,6 +18,7 @@ abc def ghi + /// @@ -{before_hunk_start},{before_hunk_len} +{after_hunk_start},{after_hunk_len} @@ + fn parse_hunk_header(&self, line: &[u8]) -> gix_testtools::Result { + let Some(line) = line.strip_prefix(START_OF_HEADER) else { + todo!() + }; + + let parts: Vec<_> = line.split(|b| *b == b' ' || *b == b',').collect(); + let [before_hunk_start, before_hunk_len, after_hunk_start, after_hunk_len, ..] = parts[..] else { + todo!() + }; + + Ok(HunkHeader { + before_hunk_start: self.parse_number(before_hunk_start), + before_hunk_len: self.parse_number(before_hunk_len), + after_hunk_start: self.parse_number(after_hunk_start), + after_hunk_len: self.parse_number(after_hunk_len), + }) + } + + fn parse_number(&self, bytes: &[u8]) -> u32 { + bytes + .to_str() + .expect("to be a valid UTF-8 string") + .parse::() + .expect("to be a number") + } + } + + impl Iterator for Baseline<'_> { + type Item = DiffHunk; + + fn next(&mut self) -> Option { + let mut hunk_header = None; + let mut hunk_lines = Vec::new(); + + while let Some(line) = self.lines.next() { + if line.starts_with(START_OF_HEADER) { + assert!(hunk_header.is_none(), "should not overwrite existing hunk_header"); + hunk_header = self.parse_hunk_header(line).ok(); + + continue; + } + + match line[0] { + b' ' | b'+' | b'-' => { + hunk_lines.extend_from_slice(line); + hunk_lines.push(b'\n'); + } + _ => todo!(), + } + + match self.lines.peek() { + Some(next_line) if next_line.starts_with(START_OF_HEADER) => break, + None => break, + _ => {} + } + } + + hunk_header.map(|hunk_header| DiffHunk { + header: hunk_header, + lines: hunk_lines.into(), + }) + } + } +} + +#[test] +fn sliders() -> gix_testtools::Result { + let worktree_path = fixture_path()?; + let asset_dir = worktree_path.join("assets"); + + let dir = std::fs::read_dir(&worktree_path)?; + + for entry in dir { + let entry = entry?; + let file_name = entry.file_name().into_string().expect("to be string"); + + if !file_name.ends_with(".baseline") { + continue; + } + + let parts: Vec<_> = file_name.split('.').collect(); + let [name, algorithm, ..] = parts[..] else { + unimplemented!() + }; + let algorithm = match algorithm { + "myers" => Algorithm::Myers, + "histogram" => Algorithm::Histogram, + _ => unimplemented!(), + }; + + let parts: Vec<_> = name.split('-').collect(); + let [old_blob_id, new_blob_id] = parts[..] else { + unimplemented!(); + }; + + let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?; + let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?; + + let interner = gix_diff::blob::intern::InternedInput::new( + tokens_for_diffing(old_data.as_slice()), + tokens_for_diffing(new_data.as_slice()), + ); + + let actual = gix_diff::blob::diff( + algorithm, + &interner, + UnifiedDiff::new(&interner, DiffHunkRecorder::new(), ContextSize::symmetrical(3)), + )?; + + let baseline_path = worktree_path.join(file_name); + let baseline = Baseline::collect(baseline_path).unwrap(); + + let actual = actual + .iter() + .fold(BString::default(), |mut acc, diff_hunk| { + acc.push_str(diff_hunk.header.to_string().as_str()); + acc.push(b'\n'); + + acc.extend_from_slice(&diff_hunk.lines); + + acc + }) + .to_string(); + + let baseline = baseline + .iter() + .fold(BString::default(), |mut acc, diff_hunk| { + acc.push_str(diff_hunk.header.to_string().as_str()); + acc.push(b'\n'); + + acc.extend_from_slice(&diff_hunk.lines); + + acc + }) + .to_string(); + + pretty_assertions::assert_eq!(actual, baseline); + } + + Ok(()) +} + +fn tokens_for_diffing(data: &[u8]) -> impl TokenSource { + gix_diff::blob::sources::byte_lines(data) +} + +fn fixture_path() -> gix_testtools::Result { + gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh") +} diff --git a/gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh b/gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh new file mode 100755 index 00000000000..aa303c4f0fc --- /dev/null +++ b/gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +# This script is a placeholder for a script generated by the +# `create-diff-cases` subcommand of `internal-tools`. The placeholder does +# nothing except making the associated test trivially pass. +# +# ## Usage +# +# Follow these instructions to generate a file containing sliders: +# https://github.com/mhagger/diff-slider-tools/blob/b59ed13d7a2a6cfe14a8f79d434b6221cc8b04dd/README.md?plain=1#L122-L146 +# +# Run `create-diff-cases` to create the script in `gix-diff/tests/fixtures/`. +# The script will be called `make_diff_for_sliders_repo.sh` and it will +# overwrite the placeholder. +# +# ``` +# # run inside `gitoxide` +# cargo run --package internal-tools -- create-diff-cases --sliders-file $PATH_TO_DIFF_SLIDER_TOOLS/corpus/git.sliders --worktree-dir $PATH_TO_DIFF_SLIDER_TOOLS/corpus/git.git/ --destination-dir gix-diff/tests/fixtures/ +# ``` +# +# Run `cargo test sliders -- --nocapture` inside `gix-diff/tests` to run the +# actual tests. diff --git a/tests/it/src/args.rs b/tests/it/src/args.rs index 78f1873d64b..cad0be3bb04 100644 --- a/tests/it/src/args.rs +++ b/tests/it/src/args.rs @@ -123,6 +123,34 @@ pub enum Subcommands { #[clap(value_parser = AsPathSpec)] patterns: Vec, }, + /// Take a slider file generated with the help of [diff-slider-tools] and turn it into a series + /// of baseline diffs to be used in [slider-rs]. + /// + /// See [make-diff-for-sliders-repo] for details. + /// + /// [diff-slider-tools]: https://github.com/mhagger/diff-slider-tools + /// [slider-rs]: gix-diff/tests/diff/blob/slider.rs + /// [make-diff-for-sliders-repo]: gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh + CreateDiffCases { + /// Don't really copy anything. + #[clap(long, short = 'n')] + dry_run: bool, + /// The `.sliders` file that contains a list of sliders. + #[clap(long)] + sliders_file: PathBuf, + /// The git root to extract the diff-related parts from. + #[clap(long)] + worktree_dir: PathBuf, + /// The directory into which to copy the files. + #[clap(long)] + destination_dir: PathBuf, + /// The number of sliders to generate test cases for. + #[clap(long, default_value_t = 10)] + count: usize, + /// The directory to place assets in. + #[clap(long)] + asset_dir: Option, + }, /// Check for executable bits that disagree with shebangs. /// /// This checks committed and staged files, but not anything unstaged, to find shell scripts diff --git a/tests/it/src/commands/create_diff_cases.rs b/tests/it/src/commands/create_diff_cases.rs new file mode 100644 index 00000000000..acdb5b682bf --- /dev/null +++ b/tests/it/src/commands/create_diff_cases.rs @@ -0,0 +1,110 @@ +pub(super) mod function { + use anyhow::Context; + use std::{ + collections::HashSet, + path::{Path, PathBuf}, + }; + + use gix::{ + bstr::{BString, ByteSlice}, + objs::FindExt, + }; + + pub fn create_diff_cases( + dry_run: bool, + sliders_file: PathBuf, + worktree_dir: &Path, + destination_dir: PathBuf, + count: usize, + asset_dir: Option, + ) -> anyhow::Result<()> { + let prefix = if dry_run { "WOULD" } else { "Will" }; + let sliders = std::fs::read_to_string(&sliders_file)?; + + eprintln!( + "Read '{}' which has {} lines", + sliders_file.display(), + sliders.lines().count() + ); + + let sliders: HashSet<_> = sliders + .lines() + .take(count) + .map(|line| { + let parts: Vec<_> = line.split_ascii_whitespace().collect(); + + match parts[..] { + [before, after, ..] => (before, after), + _ => todo!(), + } + }) + .collect(); + + let repo = gix::open(worktree_dir)?; + + let asset_dir = asset_dir.unwrap_or("assets".into()); + let assets = destination_dir.join(asset_dir.to_os_str()?); + + eprintln!("{prefix} create directory '{assets}'", assets = assets.display()); + if !dry_run { + std::fs::create_dir_all(&assets)?; + } + + let mut buf = Vec::new(); + + let script_name = "make_diff_for_sliders_repo.sh"; + + let mut blocks: Vec = vec![format!( + r#"#!/usr/bin/env bash +set -eu -o pipefail + +ROOT="$(cd "$(dirname "${{BASH_SOURCE[0]}}")" && pwd)" + +mkdir -p {asset_dir} +"# + )]; + + for (before, after) in sliders.iter() { + let revspec = repo.rev_parse(*before)?; + let old_blob_id = revspec + .single() + .context(format!("rev-spec '{before}' must resolve to a single object"))?; + + let revspec = repo.rev_parse(*after)?; + let new_blob_id = revspec + .single() + .context(format!("rev-spec '{after}' must resolve to a single object"))?; + + let dst_old_blob = assets.join(format!("{old_blob_id}.blob")); + let dst_new_blob = assets.join(format!("{new_blob_id}.blob")); + if !dry_run { + let old_blob = repo.objects.find_blob(&old_blob_id, &mut buf)?.data; + std::fs::write(dst_old_blob, old_blob)?; + + let new_blob = repo.objects.find_blob(&new_blob_id, &mut buf)?.data; + std::fs::write(dst_new_blob, new_blob)?; + } + + blocks.push(format!( + r#"git -c diff.algorithm=myers diff --no-index "$ROOT/{asset_dir}/{old_blob_id}.blob" "$ROOT/{asset_dir}/{new_blob_id}.blob" > {old_blob_id}-{new_blob_id}.myers.baseline || true +git -c diff.algorithm=histogram diff --no-index "$ROOT/{asset_dir}/{old_blob_id}.blob" "$ROOT/{asset_dir}/{new_blob_id}.blob" > {old_blob_id}-{new_blob_id}.histogram.baseline || true +cp "$ROOT/{asset_dir}/{old_blob_id}.blob" assets/ +cp "$ROOT/{asset_dir}/{new_blob_id}.blob" assets/ +"# + )); + } + + let script_file = destination_dir.join(script_name); + eprintln!( + "{prefix} write script file at '{script_file}'", + script_file = script_file.display() + ); + + if !dry_run { + let script = blocks.join("\n"); + std::fs::write(script_file, script)?; + } + + Ok(()) + } +} diff --git a/tests/it/src/commands/mod.rs b/tests/it/src/commands/mod.rs index 6bec01671df..ed3abfd0cfc 100644 --- a/tests/it/src/commands/mod.rs +++ b/tests/it/src/commands/mod.rs @@ -7,6 +7,9 @@ pub use copy_royal::function::copy_royal; pub mod git_to_sh; pub use git_to_sh::function::git_to_sh; +pub mod create_diff_cases; +pub use create_diff_cases::function::create_diff_cases; + pub mod check_mode; pub use check_mode::function::check_mode; diff --git a/tests/it/src/main.rs b/tests/it/src/main.rs index df22a7e8c6f..c0b3ff22741 100644 --- a/tests/it/src/main.rs +++ b/tests/it/src/main.rs @@ -46,6 +46,14 @@ fn main() -> anyhow::Result<()> { destination_dir, patterns, } => commands::copy_royal(dry_run, &worktree_root, destination_dir, patterns), + Subcommands::CreateDiffCases { + dry_run, + sliders_file, + worktree_dir, + destination_dir, + count, + asset_dir, + } => commands::create_diff_cases(dry_run, sliders_file, &worktree_dir, destination_dir, count, asset_dir), Subcommands::CheckMode {} => commands::check_mode(), Subcommands::Env {} => commands::env(), } From 0ed01645e2284f86e1afaf1486f08cad3243ed76 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 30 Oct 2025 13:14:28 +0100 Subject: [PATCH 2/4] feat: add `Repository::set_workdir()`. Force this repository instance to use the given worktree directory. --- gitoxide-core/src/repository/merge_base.rs | 1 - gix/src/repository/location.rs | 30 ++++++++++++++++++ gix/src/repository/worktree.rs | 5 +-- gix/tests/gix/repository/open.rs | 24 +++++++++++++- gix/tests/gix/repository/worktree.rs | 37 ++++++++++++++++++++++ 5 files changed, 93 insertions(+), 4 deletions(-) diff --git a/gitoxide-core/src/repository/merge_base.rs b/gitoxide-core/src/repository/merge_base.rs index b6140ec9d4b..91af2323835 100644 --- a/gitoxide-core/src/repository/merge_base.rs +++ b/gitoxide-core/src/repository/merge_base.rs @@ -16,7 +16,6 @@ pub fn merge_base( let first_id = repo.rev_parse_single(first.as_str())?; let other_ids: Vec<_> = others .iter() - .cloned() .map(|other| repo.rev_parse_single(other.as_str()).map(gix::Id::detach)) .collect::>()?; diff --git a/gix/src/repository/location.rs b/gix/src/repository/location.rs index 6f38b3c958d..db490687e05 100644 --- a/gix/src/repository/location.rs +++ b/gix/src/repository/location.rs @@ -58,6 +58,36 @@ impl crate::Repository { self.work_tree.as_deref() } + /// Forcefully set the given `workdir` to be the worktree of this repository, *in memory*, + /// no matter if it had one or not, or unset it with `None`. + /// Return the previous working directory if one existed. + /// + /// Fail if the `workdir`, if not `None`, isn't accessible or isn't a directory. + /// No change is performed on error. + /// + /// ### About Worktrees + /// + /// * When setting a main worktree to a linked worktree directory, this repository instance + /// will still claim that it is the [main worktree](crate::Worktree::is_main()) as that depends + /// on the `git_dir`, not the worktree dir. + /// * When setting a linked worktree to a main worktree directory, this repository instance + /// will still claim that it is *not* a [main worktree](crate::Worktree::is_main()) as that depends + /// on the `git_dir`, not the worktree dir. + #[doc(alias = "git2")] + pub fn set_workdir(&mut self, workdir: impl Into>) -> Result, std::io::Error> { + let workdir = workdir.into(); + Ok(match workdir { + None => self.work_tree.take(), + Some(new_workdir) => { + _ = std::fs::read_dir(&new_workdir)?; + + let old = self.work_tree.take(); + self.work_tree = Some(new_workdir); + old + } + }) + } + /// Return the work tree containing all checked out files, if there is one. pub fn workdir(&self) -> Option<&std::path::Path> { self.work_tree.as_deref() diff --git a/gix/src/repository/worktree.rs b/gix/src/repository/worktree.rs index e3475d2cc45..db395c01dc3 100644 --- a/gix/src/repository/worktree.rs +++ b/gix/src/repository/worktree.rs @@ -30,6 +30,7 @@ impl crate::Repository { res.sort_by(|a, b| a.git_dir.cmp(&b.git_dir)); Ok(res) } + /// Return the repository owning the main worktree, typically from a linked worktree. /// /// Note that it might be the one that is currently open if this repository doesn't point to a linked worktree. @@ -41,7 +42,7 @@ impl crate::Repository { /// Return the currently set worktree if there is one, acting as platform providing a validated worktree base path. /// - /// Note that there would be `None` if this repository is `bare` and the parent [`Repository`][crate::Repository] was instantiated without + /// Note that there would be `None` if this repository is `bare` and the parent [`Repository`](crate::Repository) was instantiated without /// registered worktree in the current working dir, even if no `.git` file or directory exists. /// It's merely based on configuration, see [Worktree::dot_git_exists()] for a way to perform more validation. pub fn worktree(&self) -> Option> { @@ -50,7 +51,7 @@ impl crate::Repository { /// Return true if this repository is bare, and has no main work tree. /// - /// This is not to be confused with the [`worktree()`][crate::Repository::worktree()] worktree, which may exists if this instance + /// This is not to be confused with the [`worktree()`](crate::Repository::worktree()) method, which may exist if this instance /// was opened in a worktree that was created separately. pub fn is_bare(&self) -> bool { self.config.is_bare && self.workdir().is_none() diff --git a/gix/tests/gix/repository/open.rs b/gix/tests/gix/repository/open.rs index 48eea8ad821..6541a584836 100644 --- a/gix/tests/gix/repository/open.rs +++ b/gix/tests/gix/repository/open.rs @@ -154,7 +154,7 @@ fn non_bare_non_git_repo_without_worktree() -> crate::Result { #[test] fn none_bare_repo_without_index() -> crate::Result { - let repo = named_subrepo_opts( + let mut repo = named_subrepo_opts( "make_basic_repo.sh", "non-bare-repo-without-index", gix::open::Options::isolated(), @@ -175,6 +175,28 @@ fn none_bare_repo_without_index() -> crate::Result { .is_ok(), "this is a minimal path" ); + + let old = repo.set_workdir(None).expect("should never fail"); + assert_eq!( + old.as_ref().and_then(|wd| wd.file_name()?.to_str()), + Some("non-bare-repo-without-index") + ); + assert!(repo.workdir().is_none(), "the workdir was unset"); + assert!(repo.worktree().is_none(), "the worktree was unset"); + assert!( + !repo.is_bare(), + "this is based on `core.bare`, not on the lack of worktree" + ); + + assert_eq!( + repo.set_workdir(old.clone()).expect("does not fail as it exists"), + None, + "nothing was set before" + ); + assert_eq!(repo.workdir(), old.as_deref()); + + let worktree = repo.worktree().expect("should be present after setting"); + assert!(worktree.is_main(), "it's still the main worktree"); Ok(()) } diff --git a/gix/tests/gix/repository/worktree.rs b/gix/tests/gix/repository/worktree.rs index ad233eff627..9ab8eb95c8c 100644 --- a/gix/tests/gix/repository/worktree.rs +++ b/gix/tests/gix/repository/worktree.rs @@ -254,6 +254,43 @@ fn from_nonbare_parent_repo() { run_assertions(repo, false /* bare */); } +#[test] +fn from_nonbare_parent_repo_set_workdir() -> gix_testtools::Result { + if gix_testtools::should_skip_as_git_version_is_smaller_than(2, 31, 0) { + return Ok(()); + } + + let dir = gix_testtools::scripted_fixture_read_only("make_worktree_repo.sh").unwrap(); + let mut repo = gix::open(dir.join("repo")).unwrap(); + + assert!(repo.worktree().is_some_and(|wt| wt.is_main()), "we have main worktree"); + + let worktrees = repo.worktrees()?; + assert_eq!(worktrees.len(), 6); + + let linked_wt_dir = worktrees.first().unwrap().base().expect("this linked worktree exists"); + repo.set_workdir(linked_wt_dir).expect("works as the dir exists"); + + assert!( + repo.worktree().is_some_and(|wt| wt.is_main()), + "it's still the main worktree as that depends on the git_dir" + ); + + let mut wt_repo = repo.worktrees()?.first().unwrap().clone().into_repo()?; + assert!( + wt_repo.worktree().is_some_and(|wt| !wt.is_main()), + "linked worktrees are never main" + ); + + wt_repo.set_workdir(Some(repo.workdir().unwrap().to_owned()))?; + assert!( + wt_repo.worktree().is_some_and(|wt| !wt.is_main()), + "it's still the linked worktree as that depends on the git_dir" + ); + + Ok(()) +} + fn run_assertions(main_repo: gix::Repository, should_be_bare: bool) { assert_eq!(main_repo.is_bare(), should_be_bare); let mut baseline = Baseline::collect( From 9a9bd293e966ed0b8095bbe8b3bcea15ff39d89e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 2 Nov 2025 09:08:50 +0100 Subject: [PATCH 3/4] Improve test instructions to be more foolproof Also put them in a place where they won't be overwritten. --- gix-diff/tests/README.md | 26 +++++++++++++++++++ gix-diff/tests/fixtures/.gitignore | 2 ++ .../fixtures/generated-archives/.gitignore | 2 ++ .../fixtures/make_diff_for_sliders_repo.sh | 17 +----------- 4 files changed, 31 insertions(+), 16 deletions(-) create mode 100644 gix-diff/tests/README.md create mode 100644 gix-diff/tests/fixtures/.gitignore create mode 100644 gix-diff/tests/fixtures/generated-archives/.gitignore diff --git a/gix-diff/tests/README.md b/gix-diff/tests/README.md new file mode 100644 index 00000000000..59e1101a413 --- /dev/null +++ b/gix-diff/tests/README.md @@ -0,0 +1,26 @@ +## How to run diff-slider tests + +The idea is to use https://github.com/mhagger/diff-slider-tools to create slider information for use to generate a test which invokes our own implementation and compares it to Git itself. +Follow these instructions to set it up. + +1. DIFF_SLIDER_TOOLS=/your/anticipated/path/to/diff-slider-tools +2. `git clone https://github.com/mhagger/diff-slider-tools $DIFF_SLIDER_TOOLS` +3. `pushd $DIFF_SLIDER_TOOLS` +4. Follow [these instructions](https://github.com/mhagger/diff-slider-tools/blob/b59ed13d7a2a6cfe14a8f79d434b6221cc8b04dd/README.md?plain=1#L122-L146) to generate a file containing the slider information. Be sure to set the `repo` variable as it's used in later script invocations. + - Note that `get-corpus` must be run with `./get-corpus`. + - You can use an existing repository, for instance via `repo=git-human`, so there is no need to find your own repository to test. + - The script suite is very slow, and it's recommended to only set a range of commits, or use a small repository for testing. + +Finally, run the `internal-tools` program to turn that file into a fixture called `make_diff_for_sliders_repo.sh`. + +```shell +# run inside `gitoxide` +popd +cargo run --package internal-tools -- \ + create-diff-cases \ + --sliders-file $DIFF_SLIDER_TOOLS/corpus/$repo.sliders \ + --worktree-dir $DIFF_SLIDER_TOOLS/corpus/$repo.git/ \ + --destination-dir gix-diff/tests/fixtures/ +``` + +Finally, run `cargo test -p gix-diff-tests sliders -- --nocapture` to execute the actual tests to compare. diff --git a/gix-diff/tests/fixtures/.gitignore b/gix-diff/tests/fixtures/.gitignore new file mode 100644 index 00000000000..1f3f8401d34 --- /dev/null +++ b/gix-diff/tests/fixtures/.gitignore @@ -0,0 +1,2 @@ +# The slider assets that are needed for that slider fixture script to work and build the fixture. +/assets/ \ No newline at end of file diff --git a/gix-diff/tests/fixtures/generated-archives/.gitignore b/gix-diff/tests/fixtures/generated-archives/.gitignore new file mode 100644 index 00000000000..d08ec9b1c7a --- /dev/null +++ b/gix-diff/tests/fixtures/generated-archives/.gitignore @@ -0,0 +1,2 @@ +# The auto-generated sliders fixtures. For now it's experimental, but we may store it later once it's all working. +/make_diff_for_sliders_repo.tar \ No newline at end of file diff --git a/gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh b/gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh index aa303c4f0fc..deab2a921e7 100755 --- a/gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh +++ b/gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh @@ -5,19 +5,4 @@ set -eu -o pipefail # `create-diff-cases` subcommand of `internal-tools`. The placeholder does # nothing except making the associated test trivially pass. # -# ## Usage -# -# Follow these instructions to generate a file containing sliders: -# https://github.com/mhagger/diff-slider-tools/blob/b59ed13d7a2a6cfe14a8f79d434b6221cc8b04dd/README.md?plain=1#L122-L146 -# -# Run `create-diff-cases` to create the script in `gix-diff/tests/fixtures/`. -# The script will be called `make_diff_for_sliders_repo.sh` and it will -# overwrite the placeholder. -# -# ``` -# # run inside `gitoxide` -# cargo run --package internal-tools -- create-diff-cases --sliders-file $PATH_TO_DIFF_SLIDER_TOOLS/corpus/git.sliders --worktree-dir $PATH_TO_DIFF_SLIDER_TOOLS/corpus/git.git/ --destination-dir gix-diff/tests/fixtures/ -# ``` -# -# Run `cargo test sliders -- --nocapture` inside `gix-diff/tests` to run the -# actual tests. +# See the `gix-diff/tests/README.md` file for more information. From fb2ee849673e8d5353098a86991c0b3dc7177851 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 2 Nov 2025 10:01:14 +0100 Subject: [PATCH 4/4] refactor --- gix-diff/tests/diff/blob/slider.rs | 345 ++++++++++----------- tests/it/src/commands/create_diff_cases.rs | 11 +- 2 files changed, 175 insertions(+), 181 deletions(-) diff --git a/gix-diff/tests/diff/blob/slider.rs b/gix-diff/tests/diff/blob/slider.rs index e23c036cd1f..f152f00ffb2 100644 --- a/gix-diff/tests/diff/blob/slider.rs +++ b/gix-diff/tests/diff/blob/slider.rs @@ -1,136 +1,166 @@ -use std::{iter::Peekable, path::PathBuf}; - -use gix_diff::blob::{ - intern::TokenSource, - unified_diff::{ConsumeHunk, ContextSize, HunkHeader}, - Algorithm, UnifiedDiff, -}; -use gix_object::bstr::{self, BString, ByteVec}; - -#[derive(Debug, PartialEq)] -struct DiffHunk { - header: HunkHeader, - lines: BString, -} - -struct DiffHunkRecorder { - inner: Vec, -} +use gix_diff::blob::intern::TokenSource; +use gix_diff::blob::unified_diff::ContextSize; +use gix_diff::blob::{Algorithm, UnifiedDiff}; +use gix_testtools::bstr::{BString, ByteVec}; -impl DiffHunkRecorder { - fn new() -> Self { - Self { inner: Vec::new() } - } -} +#[test] +fn baseline() -> gix_testtools::Result { + let worktree_path = gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh")?; + let asset_dir = worktree_path.join("assets"); -impl ConsumeHunk for DiffHunkRecorder { - type Out = Vec; + let dir = std::fs::read_dir(&worktree_path)?; - fn consume_hunk( - &mut self, - header: HunkHeader, - lines: &[(gix_diff::blob::unified_diff::DiffLineKind, &[u8])], - ) -> std::io::Result<()> { - let mut buf = Vec::new(); + let mut count = 0; + for entry in dir { + let entry = entry?; + let file_name = entry.file_name().into_string().expect("to be string"); - for &(kind, line) in lines { - buf.push(kind.to_prefix() as u8); - buf.extend_from_slice(line); - buf.push(b'\n'); + if !file_name.ends_with(".baseline") { + continue; } + count += 1; + + let parts: Vec<_> = file_name.split('.').collect(); + let [name, algorithm, ..] = parts[..] else { + unreachable!() + }; + let algorithm = match algorithm { + "myers" => Algorithm::Myers, + "histogram" => Algorithm::Histogram, + _ => unreachable!(), + }; - let diff_hunk = DiffHunk { - header, - lines: buf.into(), + let parts: Vec<_> = name.split('-').collect(); + let [old_blob_id, new_blob_id] = parts[..] else { + unreachable!(); }; - self.inner.push(diff_hunk); + let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?; + let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?; - Ok(()) + let interner = gix_diff::blob::intern::InternedInput::new( + tokens_for_diffing(old_data.as_slice()), + tokens_for_diffing(new_data.as_slice()), + ); + + let actual = gix_diff::blob::diff( + algorithm, + &interner, + UnifiedDiff::new( + &interner, + baseline::DiffHunkRecorder::new(), + ContextSize::symmetrical(3), + ), + )?; + + let baseline_path = worktree_path.join(file_name); + let baseline = std::fs::read(baseline_path)?; + let baseline = baseline::Baseline::new(&baseline); + + let actual = actual + .iter() + .fold(BString::default(), |mut acc, diff_hunk| { + acc.push_str(diff_hunk.header.to_string().as_str()); + acc.push(b'\n'); + + acc.extend_from_slice(&diff_hunk.lines); + + acc + }) + .to_string(); + + let baseline = baseline + .fold(BString::default(), |mut acc, diff_hunk| { + acc.push_str(diff_hunk.header.to_string().as_str()); + acc.push(b'\n'); + + acc.extend_from_slice(&diff_hunk.lines); + + acc + }) + .to_string(); + + pretty_assertions::assert_eq!(actual, baseline); } - fn finish(self) -> Self::Out { - self.inner + if count == 0 { + eprintln!("Slider baseline isn't setup - look at ./gix-diff/tests/README.md for instructions"); } + + Ok(()) } -struct Baseline<'a> { - lines: Peekable>, +fn tokens_for_diffing(data: &[u8]) -> impl TokenSource { + gix_diff::blob::sources::byte_lines(data) } mod baseline { - use std::path::Path; - - use gix_diff::blob::unified_diff::HunkHeader; use gix_object::bstr::ByteSlice; + use std::iter::Peekable; - use super::{Baseline, DiffHunk}; + use gix_diff::blob::unified_diff::{ConsumeHunk, HunkHeader}; + use gix_object::bstr::{self, BString}; static START_OF_HEADER: &[u8; 4] = b"@@ -"; - impl Baseline<'_> { - pub fn collect(baseline_path: impl AsRef) -> std::io::Result> { - let content = std::fs::read(baseline_path)?; - - let mut baseline = Baseline { - lines: content.lines().peekable(), - }; + #[derive(Debug, PartialEq)] + pub struct DiffHunk { + pub header: HunkHeader, + pub lines: BString, + } - baseline.skip_header(); + pub struct DiffHunkRecorder { + inner: Vec, + } - Ok(baseline.collect()) + impl DiffHunkRecorder { + pub fn new() -> Self { + Self { inner: Vec::new() } } + } - fn skip_header(&mut self) { - // diff --git a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb - // index ccccccc..ddddddd 100644 - // --- a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa - // +++ b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + impl ConsumeHunk for DiffHunkRecorder { + type Out = Vec; - let line = self.lines.next().expect("line to be present"); - assert!(line.starts_with(b"diff --git ")); + fn consume_hunk( + &mut self, + header: HunkHeader, + lines: &[(gix_diff::blob::unified_diff::DiffLineKind, &[u8])], + ) -> std::io::Result<()> { + let mut buf = Vec::new(); - let line = self.lines.next().expect("line to be present"); - assert!(line.starts_with(b"index ")); + for &(kind, line) in lines { + buf.push(kind.to_prefix() as u8); + buf.extend_from_slice(line); + buf.push(b'\n'); + } + + let diff_hunk = DiffHunk { + header, + lines: buf.into(), + }; - let line = self.lines.next().expect("line to be present"); - assert!(line.starts_with(b"--- ")); + self.inner.push(diff_hunk); - let line = self.lines.next().expect("line to be present"); - assert!(line.starts_with(b"+++ ")); + Ok(()) } - /// Parse diff hunk headers that conform to the unified diff hunk header format. - /// - /// The parser is very primitive and relies on the fact that `+18` is parsed as `18`. This - /// allows us to split the input on ` ` and `,` only. - /// - /// @@ -18,6 +18,7 @@ abc def ghi - /// @@ -{before_hunk_start},{before_hunk_len} +{after_hunk_start},{after_hunk_len} @@ - fn parse_hunk_header(&self, line: &[u8]) -> gix_testtools::Result { - let Some(line) = line.strip_prefix(START_OF_HEADER) else { - todo!() - }; + fn finish(self) -> Self::Out { + self.inner + } + } - let parts: Vec<_> = line.split(|b| *b == b' ' || *b == b',').collect(); - let [before_hunk_start, before_hunk_len, after_hunk_start, after_hunk_len, ..] = parts[..] else { - todo!() - }; + type Lines<'a> = Peekable>; - Ok(HunkHeader { - before_hunk_start: self.parse_number(before_hunk_start), - before_hunk_len: self.parse_number(before_hunk_len), - after_hunk_start: self.parse_number(after_hunk_start), - after_hunk_len: self.parse_number(after_hunk_len), - }) - } + pub struct Baseline<'a> { + lines: Lines<'a>, + } - fn parse_number(&self, bytes: &[u8]) -> u32 { - bytes - .to_str() - .expect("to be a valid UTF-8 string") - .parse::() - .expect("to be a number") + impl<'a> Baseline<'a> { + pub fn new(content: &'a [u8]) -> Baseline<'a> { + let mut lines = content.lines().peekable(); + skip_header(&mut lines); + Baseline { lines } } } @@ -144,7 +174,7 @@ mod baseline { while let Some(line) = self.lines.next() { if line.starts_with(START_OF_HEADER) { assert!(hunk_header.is_none(), "should not overwrite existing hunk_header"); - hunk_header = self.parse_hunk_header(line).ok(); + hunk_header = parse_hunk_header(line).ok(); continue; } @@ -154,7 +184,7 @@ mod baseline { hunk_lines.extend_from_slice(line); hunk_lines.push(b'\n'); } - _ => todo!(), + _ => unreachable!("BUG: expecting unified diff format"), } match self.lines.peek() { @@ -170,89 +200,54 @@ mod baseline { }) } } -} -#[test] -fn sliders() -> gix_testtools::Result { - let worktree_path = fixture_path()?; - let asset_dir = worktree_path.join("assets"); + fn skip_header(lines: &mut Lines) { + // diff --git a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + // index ccccccc..ddddddd 100644 + // --- a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + // +++ b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb - let dir = std::fs::read_dir(&worktree_path)?; + let line = lines.next().expect("line to be present"); + assert!(line.starts_with(b"diff --git ")); - for entry in dir { - let entry = entry?; - let file_name = entry.file_name().into_string().expect("to be string"); + let line = lines.next().expect("line to be present"); + assert!(line.starts_with(b"index ")); - if !file_name.ends_with(".baseline") { - continue; - } + let line = lines.next().expect("line to be present"); + assert!(line.starts_with(b"--- ")); - let parts: Vec<_> = file_name.split('.').collect(); - let [name, algorithm, ..] = parts[..] else { - unimplemented!() - }; - let algorithm = match algorithm { - "myers" => Algorithm::Myers, - "histogram" => Algorithm::Histogram, - _ => unimplemented!(), - }; + let line = lines.next().expect("line to be present"); + assert!(line.starts_with(b"+++ ")); + } - let parts: Vec<_> = name.split('-').collect(); - let [old_blob_id, new_blob_id] = parts[..] else { - unimplemented!(); + /// Parse diff hunk headers that conform to the unified diff hunk header format. + /// + /// The parser is very primitive and relies on the fact that `+18` is parsed as `18`. This + /// allows us to split the input on ` ` and `,` only. + /// + /// @@ -18,6 +18,7 @@ abc def ghi + /// @@ -{before_hunk_start},{before_hunk_len} +{after_hunk_start},{after_hunk_len} @@ + fn parse_hunk_header(line: &[u8]) -> gix_testtools::Result { + let line = line.strip_prefix(START_OF_HEADER).unwrap(); + + let parts: Vec<_> = line.split(|b| *b == b' ' || *b == b',').collect(); + let [before_hunk_start, before_hunk_len, after_hunk_start, after_hunk_len, ..] = parts[..] else { + unreachable!() }; - let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?; - let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?; - - let interner = gix_diff::blob::intern::InternedInput::new( - tokens_for_diffing(old_data.as_slice()), - tokens_for_diffing(new_data.as_slice()), - ); - - let actual = gix_diff::blob::diff( - algorithm, - &interner, - UnifiedDiff::new(&interner, DiffHunkRecorder::new(), ContextSize::symmetrical(3)), - )?; - - let baseline_path = worktree_path.join(file_name); - let baseline = Baseline::collect(baseline_path).unwrap(); - - let actual = actual - .iter() - .fold(BString::default(), |mut acc, diff_hunk| { - acc.push_str(diff_hunk.header.to_string().as_str()); - acc.push(b'\n'); - - acc.extend_from_slice(&diff_hunk.lines); - - acc - }) - .to_string(); - - let baseline = baseline - .iter() - .fold(BString::default(), |mut acc, diff_hunk| { - acc.push_str(diff_hunk.header.to_string().as_str()); - acc.push(b'\n'); - - acc.extend_from_slice(&diff_hunk.lines); - - acc - }) - .to_string(); - - pretty_assertions::assert_eq!(actual, baseline); + Ok(HunkHeader { + before_hunk_start: parse_number(before_hunk_start), + before_hunk_len: parse_number(before_hunk_len), + after_hunk_start: parse_number(after_hunk_start), + after_hunk_len: parse_number(after_hunk_len), + }) } - Ok(()) -} - -fn tokens_for_diffing(data: &[u8]) -> impl TokenSource { - gix_diff::blob::sources::byte_lines(data) -} - -fn fixture_path() -> gix_testtools::Result { - gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh") + fn parse_number(bytes: &[u8]) -> u32 { + bytes + .to_str() + .expect("to be a valid UTF-8 string") + .parse::() + .expect("to be a number") + } } diff --git a/tests/it/src/commands/create_diff_cases.rs b/tests/it/src/commands/create_diff_cases.rs index acdb5b682bf..e5b4d6079c1 100644 --- a/tests/it/src/commands/create_diff_cases.rs +++ b/tests/it/src/commands/create_diff_cases.rs @@ -35,12 +35,12 @@ pub(super) mod function { match parts[..] { [before, after, ..] => (before, after), - _ => todo!(), + _ => unreachable!(), } }) .collect(); - let repo = gix::open(worktree_dir)?; + let repo = gix::open_opts(worktree_dir, gix::open::Options::isolated())?; let asset_dir = asset_dir.unwrap_or("assets".into()); let assets = destination_dir.join(asset_dir.to_os_str()?); @@ -51,7 +51,6 @@ pub(super) mod function { } let mut buf = Vec::new(); - let script_name = "make_diff_for_sliders_repo.sh"; let mut blocks: Vec = vec![format!( @@ -64,13 +63,13 @@ mkdir -p {asset_dir} "# )]; - for (before, after) in sliders.iter() { - let revspec = repo.rev_parse(*before)?; + for (before, after) in sliders.iter().copied() { + let revspec = repo.rev_parse(before)?; let old_blob_id = revspec .single() .context(format!("rev-spec '{before}' must resolve to a single object"))?; - let revspec = repo.rev_parse(*after)?; + let revspec = repo.rev_parse(after)?; let new_blob_id = revspec .single() .context(format!("rev-spec '{after}' must resolve to a single object"))?;