Skip to content
10 changes: 8 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion git-odb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ all-features = true
git-features = { version = "^0.16.0", path = "../git-features", features = ["rustsha1", "walkdir", "zlib"] }
git-hash = { version = "^0.5.0", path = "../git-hash" }
git-object = { version ="0.12.0", path = "../git-object" }
git-pack = { version ="0.8.0", path = "../git-pack" }
git-pack = { version ="^0.9.0", path = "../git-pack" }

btoi = "0.4.2"
tempfile = "3.1.0"
Expand Down
2 changes: 2 additions & 0 deletions git-odb/src/store/compound/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ impl compound::Store {
p.extension().unwrap_or_default() == "idx"
&& p.file_name().unwrap_or_default().to_string_lossy().starts_with("pack-")
})
// TODO: make this configurable, git for instance sorts by modification date
// https://github.com/libgit2/libgit2/blob/main/src/odb_pack.c#L41-L158
.map(|(p, md)| pack::Bundle::at(p).map(|b| (b, md.len())))
.collect::<Result<Vec<_>, _>>()?;
packs_and_sizes.sort_by_key(|e| e.1);
Expand Down
6 changes: 3 additions & 3 deletions git-pack/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "git-pack"
version = "0.8.2"
version = "0.9.0"
repository = "https://github.com/Byron/gitoxide"
authors = ["Sebastian Thiel <sebastian.thiel@icloud.com>"]
license = "MIT/Apache-2.0"
Expand All @@ -13,7 +13,7 @@ doctest = false

[features]
pack-cache-lru-static = ["uluru"]
pack-cache-lru-dynamic = ["memory-lru"]
pack-cache-lru-dynamic = ["clru"]
serde1 = ["serde", "git-object/serde1"]
internal-testing-git-features-parallel = ["git-features/parallel"]
internal-testing-to-avoid-being-run-by-cargo-test-all = []
Expand Down Expand Up @@ -49,7 +49,7 @@ bytesize = "1.0.1"
parking_lot = { version = "0.11.0", default-features = false }
thiserror = "1.0.26"
uluru = { version = "3.0.0", optional = true }
memory-lru = { version = "0.1.0", optional = true }
clru = { version = "0.5.0", optional = true }
dashmap = "4.0.2"

[dev-dependencies]
Expand Down
38 changes: 30 additions & 8 deletions git-pack/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,28 @@ pub mod lru {
#[cfg(feature = "pack-cache-lru-dynamic")]
mod memory {
use super::DecodeEntry;
use clru::WeightScale;
use std::num::NonZeroUsize;

struct Entry {
data: Vec<u8>,
kind: git_object::Kind,
compressed_size: usize,
}

impl memory_lru::ResidentSize for Entry {
fn resident_size(&self) -> usize {
self.data.len()
type Key = (u32, u64);
struct CustomScale;

impl WeightScale<Key, Entry> for CustomScale {
fn weight(&self, _key: &Key, value: &Entry) -> usize {
value.data.len()
}
}

/// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes.
pub struct MemoryCappedHashmap {
inner: memory_lru::MemoryLruCache<(u32, u64), Entry>,
inner: clru::CLruCache<Key, Entry, std::collections::hash_map::RandomState, CustomScale>,
free_list: Vec<Vec<u8>>,
debug: git_features::cache::Debug,
}

Expand All @@ -67,7 +74,11 @@ pub mod lru {
/// object data.
pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap {
MemoryCappedHashmap {
inner: memory_lru::MemoryLruCache::new(memory_cap_in_bytes),
inner: clru::CLruCache::with_config(
clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero"))
.with_scale(CustomScale),
),
free_list: Vec::new(),
debug: git_features::cache::Debug::new(format!("MemoryCappedHashmap({}B)", memory_cap_in_bytes)),
}
}
Expand All @@ -76,14 +87,25 @@ pub mod lru {
impl DecodeEntry for MemoryCappedHashmap {
fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: git_object::Kind, compressed_size: usize) {
self.debug.put();
self.inner.insert(
if let Ok(Some(previous_entry)) = self.inner.put_with_weight(
(pack_id, offset),
Entry {
data: Vec::from(data),
data: self
.free_list
.pop()
.map(|mut v| {
v.clear();
v.resize(data.len(), 0);
v.copy_from_slice(data);
v
})
.unwrap_or_else(|| Vec::from(data)),
kind,
compressed_size,
},
)
) {
self.free_list.push(previous_entry.data)
}
}

fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(git_object::Kind, usize)> {
Expand Down
35 changes: 31 additions & 4 deletions git-pack/src/data/output/count/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,46 @@ pub struct Count {
/// The hash of the object to write
pub id: ObjectId,
/// A way to locate a pack entry in the object database, only available if the object is in a pack.
pub entry_pack_location: Option<crate::bundle::Location>,
pub entry_pack_location: PackLocation,
}

/// Specifies how the pack location was handled during counting
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub enum PackLocation {
/// We did not lookup this object
NotLookedUp,
/// The object was looked up and there may be a location in a pack, along with enty information
LookedUp(Option<crate::bundle::Location>),
}

impl PackLocation {
/// Directly go through to LookedUp variant, panic otherwise
pub fn is_none(&self) -> bool {
match self {
PackLocation::LookedUp(opt) => opt.is_none(),
PackLocation::NotLookedUp => unreachable!("must have been resolved"),
}
}
/// Directly go through to LookedUp variant, panic otherwise
pub fn as_ref(&self) -> Option<&crate::bundle::Location> {
match self {
PackLocation::LookedUp(opt) => opt.as_ref(),
PackLocation::NotLookedUp => unreachable!("must have been resolved"),
}
}
}

impl Count {
/// Create a new instance from the given `oid` and its corresponding git `obj`ect data.
pub fn from_data(oid: impl Into<ObjectId>, obj: &data::Object<'_>) -> Self {
Count {
id: oid.into(),
entry_pack_location: obj.pack_location.clone(),
entry_pack_location: PackLocation::LookedUp(obj.pack_location.clone()),
}
}
}

///
pub mod iter_from_objects;
pub use iter_from_objects::{objects, objects_unthreaded};
pub mod objects;
pub use objects::{objects, objects_unthreaded};
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ where
cache,
progress,
should_interrupt,
true,
)
}
},
Expand Down Expand Up @@ -125,6 +126,7 @@ where
pack_cache,
&mut progress,
should_interrupt,
false,
)
}

Expand All @@ -139,6 +141,7 @@ fn expand_inner<Find, IterErr, Oid>(
cache: &mut impl crate::cache::DecodeEntry,
progress: &mut impl Progress,
should_interrupt: &AtomicBool,
allow_pack_lookups: bool,
) -> Result<find::existing::Error<Find::Error>, IterErr>
where
Find: crate::Find + Send + Sync,
Expand Down Expand Up @@ -205,12 +208,20 @@ where
&mut tree_traversal_state,
|oid, buf| {
stats.decoded_objects += 1;
db.find_existing_tree_iter(oid, buf, cache).ok()
match db.find_existing(oid, buf, cache).ok() {
Some(obj) => {
progress.inc();
stats.expanded_objects += 1;
out.push(output::Count::from_data(oid, &obj));
obj.into_tree_iter()
}
None => None,
}
},
&mut traverse_delegate,
)
.map_err(Error::TreeTraverse)?;
&traverse_delegate.objects
&traverse_delegate.non_trees
} else {
for commit_id in &parent_commit_ids {
let parent_tree_id = {
Expand Down Expand Up @@ -259,7 +270,7 @@ where
&changes_delegate.objects
};
for id in objects.iter() {
out.push(id_to_count(db, buf2, id, progress, stats));
out.push(id_to_count(db, buf2, id, progress, stats, allow_pack_lookups));
}
break;
}
Expand All @@ -280,13 +291,21 @@ where
&mut tree_traversal_state,
|oid, buf| {
stats.decoded_objects += 1;
db.find_existing_tree_iter(oid, buf, cache).ok()
match db.find_existing(oid, buf, cache).ok() {
Some(obj) => {
progress.inc();
stats.expanded_objects += 1;
out.push(output::Count::from_data(oid, &obj));
obj.into_tree_iter()
}
None => None,
}
},
&mut traverse_delegate,
)
.map_err(Error::TreeTraverse)?;
for id in traverse_delegate.objects.iter() {
out.push(id_to_count(db, buf1, id, progress, stats));
for id in traverse_delegate.non_trees.iter() {
out.push(id_to_count(db, buf1, id, progress, stats, allow_pack_lookups));
}
break;
}
Expand Down Expand Up @@ -318,7 +337,7 @@ where

mod tree {
pub mod changes {
use crate::data::output::count::iter_from_objects::util::InsertImmutable;
use crate::data::output::count::objects::util::InsertImmutable;
use git_diff::tree::{
visit::{Action, Change},
Visit,
Expand Down Expand Up @@ -374,13 +393,13 @@ mod tree {
}

pub mod traverse {
use crate::data::output::count::iter_from_objects::util::InsertImmutable;
use crate::data::output::count::objects::util::InsertImmutable;
use git_hash::ObjectId;
use git_object::{bstr::BStr, immutable::tree::Entry};
use git_traverse::tree::visit::{Action, Visit};

pub struct AllUnseen<'a, H> {
pub objects: Vec<ObjectId>,
pub non_trees: Vec<ObjectId>,
all_seen: &'a H,
}

Expand All @@ -390,12 +409,12 @@ mod tree {
{
pub fn new(all_seen: &'a H) -> Self {
AllUnseen {
objects: Default::default(),
non_trees: Default::default(),
all_seen,
}
}
pub fn clear(&mut self) {
self.objects.clear();
self.non_trees.clear();
}
}

Expand All @@ -414,7 +433,6 @@ mod tree {
fn visit_tree(&mut self, entry: &Entry<'_>) -> Action {
let inserted = self.all_seen.insert(entry.oid.to_owned());
if inserted {
self.objects.push(entry.oid.to_owned());
Action::Continue
} else {
Action::Skip
Expand All @@ -424,14 +442,15 @@ mod tree {
fn visit_nontree(&mut self, entry: &Entry<'_>) -> Action {
let inserted = self.all_seen.insert(entry.oid.to_owned());
if inserted {
self.objects.push(entry.oid.to_owned());
self.non_trees.push(entry.oid.to_owned());
}
Action::Continue
}
}
}
}

#[inline]
fn push_obj_count_unique(
out: &mut Vec<output::Count>,
all_seen: &impl util::InsertImmutable<ObjectId>,
Expand All @@ -452,18 +471,24 @@ fn push_obj_count_unique(
}
}

#[inline]
fn id_to_count<Find: crate::Find>(
db: &Find,
buf: &mut Vec<u8>,
id: &oid,
progress: &mut impl Progress,
statistics: &mut Outcome,
allow_pack_lookups: bool,
) -> output::Count {
progress.inc();
statistics.expanded_objects += 1;
output::Count {
id: id.to_owned(),
entry_pack_location: db.location_by_oid(id, buf),
entry_pack_location: if allow_pack_lookups {
PackLocation::LookedUp(db.location_by_oid(id, buf))
} else {
PackLocation::NotLookedUp
},
}
}

Expand Down Expand Up @@ -625,6 +650,7 @@ mod types {
Interrupted,
}
}
use crate::data::output::count::PackLocation;
use std::cell::RefCell;
use std::collections::HashSet;
pub use types::{Error, ObjectExpansion, Options, Outcome};
Expand Down
Loading