Skip to content

Commit

Permalink
basic version of index checkout via command-line (#301)
Browse files Browse the repository at this point in the history
For now just with empty files, but that shows that with a single thread
it's faster than git at 27k files per second created compared to 14.5k
of a single thread of git.

We do the least amount of work necessary even without an lstat cache,
but that probably changes once we have to access the ODB to obtain
actual data.

Note that git might also do an additional exists check per file
to see if it changed, something we don't do as we may assume
exclusive access to the directory and just go with that for now.

Long story short: it looks like there is a lot of potential for
performance improvements and I think there is a lot of room for
being faster especially in multi-threaded mode.
  • Loading branch information
Byron committed Mar 4, 2022
1 parent 039e822 commit f23b8d2
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 18 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions git-index/src/access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ impl State {
pub fn entries(&self) -> &[Entry] {
&self.entries
}
pub fn entries_mut(&mut self) -> &mut [Entry] {
&mut self.entries
}
pub fn entries_mut_with_paths(&mut self) -> impl Iterator<Item = (&mut Entry, &BStr)> {
let paths = &self.path_backing;
self.entries.iter_mut().map(move |e| {
Expand Down
3 changes: 2 additions & 1 deletion git-repository/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ max-performance = ["git-features/parallel", "git-features/zlib-ng-compat", "git-
local-time-support = ["git-actor/local-time-support"]
## Re-export stability tier 2 crates for convenience and make `Repository` struct fields with types from these crates publicly accessible.
## Doing so is less stable than the stability tier 1 that `git-repository` is a member of.
unstable = ["git-index"]
unstable = ["git-index", "git-worktree"]
## Print debugging information about usage of object database caches, useful for tuning cache sizes.
cache-efficiency-debug = ["git-features/cache-efficiency-debug"]

Expand Down Expand Up @@ -77,6 +77,7 @@ git-features = { version = "^0.19.1", path = "../git-features", features = ["pro

# unstable only
git-index = { version ="^0.1.0", path = "../git-index", optional = true }
git-worktree = { version ="^0.0.0", path = "../git-worktree", optional = true }

signal-hook = { version = "0.3.9", default-features = false }
thiserror = "1.0.26"
Expand Down
3 changes: 3 additions & 0 deletions git-repository/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
//! * [`actor`]
//! * [`bstr`][bstr]
//! * [`index`]
//! * [`worktree`]
//! * [`objs`]
//! * [`odb`]
//! * [`pack`][odb::pack]
Expand Down Expand Up @@ -144,6 +145,8 @@ pub use git_url as url;
#[doc(inline)]
#[cfg(all(feature = "unstable", feature = "git-url"))]
pub use git_url::Url;
#[cfg(all(feature = "unstable", feature = "git-worktree"))]
pub use git_worktree as worktree;
pub use hash::{oid, ObjectId};

pub mod interrupt;
Expand Down
4 changes: 0 additions & 4 deletions git-worktree/src/index/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use git_features::progress;
use git_features::progress::Progress;
use git_hash::oid;

Expand Down Expand Up @@ -27,9 +26,6 @@ where
let mut buf = Vec::new();
let mut collisions = Vec::new();

files.init(Some(index.entries().len()), progress::count("files"));
bytes.init(Some(index.entries().len()), progress::bytes());

for (entry, entry_path) in index.entries_mut_with_paths() {
// TODO: write test for that
if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) {
Expand Down
9 changes: 1 addition & 8 deletions git-worktree/tests/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,7 @@ mod checkout {
let odb = git_odb::at(git_dir.join("objects"))?;
let destination = tempfile::tempdir()?;

let outcome = index::checkout(
&mut index,
&destination,
move |oid, buf| odb.find_blob(oid, buf).ok(),
&mut progress::Discard,
&mut progress::Discard,
opts,
)?;
let outcome = index::checkout(&mut index)?;
Ok((source_tree, destination, index, outcome))
}

Expand Down
67 changes: 67 additions & 0 deletions gitoxide-core/src/index/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use anyhow::bail;
use std::path::Path;

use git_repository as git;
use git_repository::Progress;

pub struct Options {
pub object_hash: git::hash::Kind,
Expand Down Expand Up @@ -98,3 +100,68 @@ fn parse_file(index_path: impl AsRef<Path>, object_hash: git::hash::Kind) -> any
)
.map_err(Into::into)
}

pub fn checkout_exclusive(
index_path: impl AsRef<Path>,
dest_directory: impl AsRef<Path>,
mut progress: impl Progress,
Options { object_hash, .. }: Options,
) -> anyhow::Result<()> {
let dest_directory = dest_directory.as_ref();
if dest_directory.exists() {
bail!(
"Refusing to checkout index into existing directory '{}' - remove it and try again",
dest_directory.display()
)
}
std::fs::create_dir_all(dest_directory)?;

let mut index = parse_file(index_path, object_hash)?;

let mut num_skipped = 0;
for entry in index.entries_mut().iter_mut().filter(|e| {
e.mode
.contains(git::index::entry::Mode::DIR | git::index::entry::Mode::SYMLINK | git::index::entry::Mode::COMMIT)
}) {
entry.flags.insert(git::index::entry::Flags::SKIP_WORKTREE);
num_skipped += 1;
}
if num_skipped > 0 {
progress.info(format!("Skipping {} DIR/SYMLINK/COMMIT entries", num_skipped));
}

let opts = git::worktree::index::checkout::Options {
fs: git::worktree::fs::Capabilities::probe(dest_directory),

// TODO: turn the two following flags into an enum
destination_is_initially_empty: true,
overwrite_existing: false,
..Default::default()
};

let mut files = progress.add_child("checkout");
let mut bytes = progress.add_child("writing");

let entries_for_checkout = index.entries().len() - num_skipped;
files.init(Some(entries_for_checkout), git::progress::count("files"));
bytes.init(Some(entries_for_checkout), git::progress::bytes());

let start = std::time::Instant::now();
git::worktree::index::checkout(
&mut index,
dest_directory,
|_, buf| {
buf.clear();
Some(git::objs::BlobRef { data: buf })
},
&mut files,
&mut bytes,
opts,
)?;

files.show_throughput(start);
bytes.show_throughput(start);

progress.done(format!("Created {} empty files", entries_for_checkout));
Ok(())
}
15 changes: 15 additions & 0 deletions src/plumbing/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,21 @@ pub fn main() -> Result<()> {
index_path,
cmd,
}) => match cmd {
index::Subcommands::CheckoutExclusive { directory } => prepare_and_run(
"index-checkout",
verbose,
progress,
progress_keep_open,
None,
move |progress, _out, _err| {
core::index::checkout_exclusive(
index_path,
directory,
progress,
core::index::Options { object_hash, format },
)
},
),
index::Subcommands::Info { no_details } => prepare_and_run(
"index-entries",
verbose,
Expand Down
10 changes: 5 additions & 5 deletions src/plumbing/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,9 @@ pub mod pack {
sink_compress: bool,

/// The '.pack' or '.idx' file to explode into loose objects
#[clap(parse(from_os_str))]
pack_path: PathBuf,

/// The path into which all objects should be written. Commonly '.git/objects'
#[clap(parse(from_os_str))]
object_path: Option<PathBuf>,
},
/// Verify the integrity of a pack, index or multi-index file
Expand All @@ -219,7 +217,6 @@ pub mod pack {
args: VerifyOptions,

/// The '.pack', '.idx' or 'multi-pack-index' file to validate.
#[clap(parse(from_os_str))]
path: PathBuf,
},
}
Expand Down Expand Up @@ -316,7 +313,6 @@ pub mod pack {
/// The folder into which to place the pack and the generated index file
///
/// If unset, only informational output will be provided to standard output.
#[clap(parse(from_os_str))]
directory: Option<PathBuf>,
},
}
Expand Down Expand Up @@ -371,6 +367,11 @@ pub mod index {
#[clap(long)]
no_details: bool,
},
/// Checkout the index into a directory with exclusive write access, similar to what would happen during clone.
CheckoutExclusive {
/// The directory into which to write all index entries.
directory: PathBuf,
},
}
}

Expand All @@ -383,7 +384,6 @@ pub mod commitgraph {
/// Verify the integrity of a commit graph
Verify {
/// The path to '.git/objects/info/', '.git/objects/info/commit-graphs/', or '.git/objects/info/commit-graph' to validate.
#[clap(parse(from_os_str))]
path: PathBuf,
/// output statistical information about the pack
#[clap(long, short = 's')]
Expand Down

0 comments on commit f23b8d2

Please sign in to comment.