Skip to content

Commit

Permalink
refactor!: move git_pack::data::Object to git_object::Data, massively…
Browse files Browse the repository at this point in the history
… alter git_odb::Find trait (#266)

This will break a lot, but has to happen to prepare these traits for the
next generation of object databases.
  • Loading branch information
Byron committed Nov 30, 2021
1 parent f788310 commit e22a710
Show file tree
Hide file tree
Showing 49 changed files with 377 additions and 302 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion etc/check-package-size.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ echo "in root: gitoxide CLI"
(enter git-object && indent cargo diet -n --package-size-limit 25KB)
(enter git-commitgraph && indent cargo diet -n --package-size-limit 25KB)
(enter git-pack && indent cargo diet -n --package-size-limit 90KB)
(enter git-odb && indent cargo diet -n --package-size-limit 70KB)
(enter git-odb && indent cargo diet -n --package-size-limit 75KB)
(enter git-protocol && indent cargo diet -n --package-size-limit 50KB)
(enter git-packetline && indent cargo diet -n --package-size-limit 35KB)
(enter git-repository && indent cargo diet -n --package-size-limit 70KB)
Expand Down
20 changes: 12 additions & 8 deletions experiments/diffing/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ fn main() -> anyhow::Result<()> {

let start = Instant::now();
let all_commits = commit_id
.ancestors(|oid, buf| db.find_commit_iter(oid, buf, &mut odb::pack::cache::Never).ok())
.ancestors(|oid, buf| {
db.find_commit_iter(oid, buf, &mut odb::pack::cache::Never)
.ok()
.map(|t| t.0)
})
.collect::<Result<Vec<_>, _>>()?;
let num_diffs = all_commits.len();
let elapsed = start.elapsed();
Expand All @@ -64,17 +68,17 @@ fn main() -> anyhow::Result<()> {
obj_cache: &mut memory_lru::MemoryLruCache<ObjectId, ObjectInfo>,
db: &odb::linked::Store,
pack_cache: &mut impl odb::pack::cache::DecodeEntry,
) -> Option<odb::data::Object<'b>> {
) -> Option<git_repository::objs::Data<'b>> {
let oid = oid.to_owned();
match obj_cache.get(&oid) {
Some(ObjectInfo { kind, data }) => {
buf.resize(data.len(), 0);
buf.copy_from_slice(data);
Some(odb::data::Object::new(*kind, buf))
Some(git_repository::objs::Data::new(*kind, buf))
}
None => {
let obj = db.find(oid, buf, pack_cache).ok();
if let Some(ref obj) = obj {
if let Some((obj, _location)) = &obj {
obj_cache.insert(
oid,
ObjectInfo {
Expand All @@ -83,7 +87,7 @@ fn main() -> anyhow::Result<()> {
},
);
}
obj
obj.map(|t| t.0)
}
}
}
Expand Down Expand Up @@ -201,7 +205,7 @@ fn do_libgit2_treediff(commits: &[ObjectId], repo_dir: &std::path::Path, mode: C
fn do_gitoxide_tree_diff<C, L>(commits: &[ObjectId], make_find: C, mode: Computation) -> anyhow::Result<usize>
where
C: Fn() -> L + Sync,
L: for<'b> FnMut(&oid, &'b mut Vec<u8>) -> Option<odb::data::Object<'b>>,
L: for<'b> FnMut(&oid, &'b mut Vec<u8>) -> Option<git_repository::objs::Data<'b>>,
{
let changes: usize = match mode {
Computation::MultiThreaded => {
Expand Down Expand Up @@ -252,14 +256,14 @@ where

fn find_tree_iter<'b, L>(id: &oid, buf: &'b mut Vec<u8>, mut find: L) -> Option<TreeRefIter<'b>>
where
L: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Option<odb::data::Object<'a>>,
L: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Option<git_repository::objs::Data<'a>>,
{
find(id, buf).and_then(|o| o.try_into_tree_iter())
}

fn tree_iter_by_commit<'b, L>(id: &oid, buf: &'b mut Vec<u8>, mut find: L) -> TreeRefIter<'b>
where
L: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Option<odb::data::Object<'a>>,
L: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Option<git_repository::objs::Data<'a>>,
{
let tid = find(id, buf)
.expect("commit present")
Expand Down
3 changes: 2 additions & 1 deletion experiments/object-access/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,9 @@ fn do_link_git_in_parallel<C>(
where
C: odb::pack::cache::DecodeEntry,
{
use rayon::prelude::*;
use std::iter::FromIterator;

use rayon::prelude::*;
let bytes = std::sync::atomic::AtomicU64::default();
let repo = link_git::odb::Odb {
loose: link_git::odb::backend::Loose::at(repo.objects_dir()),
Expand Down
1 change: 1 addition & 0 deletions experiments/odb-redesign/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ thread-safe = []
[dependencies]
git-pack = { path = "../../git-pack", version = "*" }
git-odb = { path = "../../git-odb", version = "*" }
git-object = { path = "../../git-object", version = "*" }
git-hash = { path = "../../git-hash", version ="^0.8.0" }
git-ref = { path = "../../git-ref", version ="^0.10.0"}
parking_lot = { version = "0.11.0", default-features = false }
Expand Down
18 changes: 8 additions & 10 deletions experiments/odb-redesign/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,10 @@ mod features {
}

mod odb {
use git_hash::oid;
use std::path::PathBuf;

use git_odb::{
data::Object,
pack::{bundle::Location, cache::DecodeEntry, find::Entry, Bundle},
};
use git_hash::oid;
use git_odb::pack::{bundle::Location, cache::DecodeEntry, find::Entry, Bundle};

use crate::{
features,
Expand Down Expand Up @@ -483,9 +480,10 @@ mod odb {
.do_load(|path| {
git_pack::data::File::at(path).map(features::OwnShared::new).map_err(
|err| match err {
git_odb::data::header::decode::Error::Io { source, .. } => {
source
}
git_odb::pack::data::header::decode::Error::Io {
source,
..
} => source,
other => std::io::Error::new(std::io::ErrorKind::Other, other),
},
)
Expand Down Expand Up @@ -572,7 +570,7 @@ mod odb {
}
}

impl git_odb::Find for Handle {
impl git_odb::pack::Find for Handle {
type Error = git_odb::compound::find::Error;

fn contains(&self, id: impl AsRef<oid>) -> bool {
Expand All @@ -584,7 +582,7 @@ mod odb {
id: impl AsRef<git_hash::oid>,
buffer: &'a mut Vec<u8>,
pack_cache: &mut impl DecodeEntry,
) -> Result<Option<Object<'a>>, Self::Error> {
) -> Result<Option<(git_object::Data<'a>, Option<git_pack::bundle::Location>)>, Self::Error> {
// TODO: if the generation changes, we need to clear the pack-cache as it depends on pack-ids.
// Can we simplify this so it's more obvious what generation does?
todo!()
Expand Down
23 changes: 16 additions & 7 deletions experiments/traversal/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ fn main() -> anyhow::Result<()> {

let start = Instant::now();
let all_commits = commit_id
.ancestors(|oid, buf| db.find_commit_iter(oid, buf, &mut odb::pack::cache::Never).ok())
.ancestors(|oid, buf| {
db.find_commit_iter(oid, buf, &mut odb::pack::cache::Never)
.ok()
.map(|t| t.0)
})
.collect::<Result<Vec<_>, _>>()?;
let elapsed = start.elapsed();
println!(
Expand Down Expand Up @@ -145,7 +149,7 @@ where
C: odb::pack::cache::DecodeEntry,
{
let mut cache = new_cache();
let ancestors = tip.ancestors(|oid, buf| db.find_commit_iter(oid, buf, &mut cache).ok());
let ancestors = tip.ancestors(|oid, buf| db.find_commit_iter(oid, buf, &mut cache).ok().map(|t| t.0));
let mut commits = 0;
for commit_id in ancestors {
let _ = commit_id?;
Expand Down Expand Up @@ -208,13 +212,17 @@ where
for commit in commits {
let tree_id = db
.try_find(commit, &mut buf, &mut cache)?
.and_then(|o| o.try_into_commit_iter().and_then(|mut c| c.tree_id()))
.and_then(|(o, _l)| o.try_into_commit_iter().and_then(|mut c| c.tree_id()))
.expect("commit as starting point");

let mut count = Count { entries: 0, seen };
db.find_tree_iter(tree_id, &mut buf2, &mut cache)?.traverse(
db.find_tree_iter(tree_id, &mut buf2, &mut cache)?.0.traverse(
&mut state,
|oid, buf| db.find(oid, buf, &mut cache).ok().and_then(|o| o.try_into_tree_iter()),
|oid, buf| {
db.find(oid, buf, &mut cache)
.ok()
.and_then(|(o, _l)| o.try_into_tree_iter())
},
&mut count,
)?;
entries += count.entries as u64;
Expand Down Expand Up @@ -271,12 +279,13 @@ where
|(count, buf, buf2, cache, state), commit| {
let tid = db
.find_commit_iter(commit, buf, cache)?
.0
.tree_id()
.expect("commit as starting point");
count.entries = 0;
db.find_tree_iter(tid, buf2, cache)?.traverse(
db.find_tree_iter(tid, buf2, cache)?.0.traverse(
state,
|oid, buf| db.find_tree_iter(oid, buf, cache).ok(),
|oid, buf| db.find_tree_iter(oid, buf, cache).ok().map(|t| t.0),
count,
)?;
entries.fetch_add(count.entries as u64, std::sync::atomic::Ordering::Relaxed);
Expand Down
16 changes: 10 additions & 6 deletions git-diff/tests/visit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ mod changes {
use git_diff::tree::{recorder, recorder::Change::*};
use git_hash::{oid, ObjectId};
use git_object::{bstr::ByteSlice, tree::EntryMode, TreeRefIter};
use git_odb::{linked, pack, Find};
use git_odb::{linked, pack, pack::Find};

use crate::hex_to_id;

Expand All @@ -26,6 +26,7 @@ mod changes {
let tree_id = db
.try_find(commit, buf, &mut pack::cache::Never)?
.ok_or_else(|| format!("start commit {:?} to be present", commit))?
.0
.decode()?
.into_commit()
.expect("id is actually a commit")
Expand All @@ -34,6 +35,7 @@ mod changes {
Ok(db
.try_find(tree_id, buf, &mut pack::cache::Never)?
.expect("main tree present")
.0
.try_into_tree_iter()
.expect("id to be a tree"))
}
Expand All @@ -53,7 +55,7 @@ mod changes {
db.try_find(oid, buf, &mut pack::cache::Never)
.ok()
.flatten()
.and_then(|obj| obj.try_into_tree_iter())
.and_then(|obj| obj.0.try_into_tree_iter())
},
&mut recorder,
)?;
Expand All @@ -66,6 +68,7 @@ mod changes {
let commit = db
.try_find(commit_id, &mut buf, &mut pack::cache::Never)?
.ok_or_else(|| format!("start commit {:?} to be present", commit_id))?
.0
.decode()?
.into_commit()
.expect("id is actually a commit");
Expand All @@ -78,17 +81,18 @@ mod changes {
let current_tree = db
.try_find(main_tree_id, &mut buf, &mut pack::cache::Never)?
.expect("main tree present")
.0
.try_into_tree_iter()
.expect("id to be a tree");
let mut buf2 = Vec::new();
let previous_tree: Option<_> = {
parent_commit_id
.and_then(|id| db.try_find(id, &mut buf2, &mut pack::cache::Never).ok().flatten())
.and_then(|c| c.decode().ok())
.and_then(|(c, _l)| c.decode().ok())
.and_then(|c| c.into_commit())
.map(|c| c.tree())
.and_then(|tree| db.try_find(tree, &mut buf2, &mut pack::cache::Never).ok().flatten())
.and_then(|tree| tree.try_into_tree_iter())
.and_then(|(tree, _)| tree.try_into_tree_iter())
};

let mut recorder = git_diff::tree::Recorder::default();
Expand All @@ -99,7 +103,7 @@ mod changes {
db.try_find(oid, buf, &mut pack::cache::Never)
.ok()
.flatten()
.and_then(|obj| obj.try_into_tree_iter())
.and_then(|(obj, _)| obj.try_into_tree_iter())
},
&mut recorder,
)?;
Expand Down Expand Up @@ -133,7 +137,7 @@ mod changes {
db.try_find(oid, buf, &mut pack::cache::Never)
.ok()
.flatten()
.and_then(|o| o.try_into_commit_iter())
.and_then(|t| t.0.try_into_commit_iter())
})
.collect::<Vec<_>>()
.into_iter()
Expand Down
4 changes: 1 addition & 3 deletions git-object/src/data.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
//! Contains a borrowed Object bound to a buffer holding its decompressed data.

use crate::{BlobRef, CommitRef, CommitRefIter, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter};

use crate::Data;
use crate::{BlobRef, CommitRef, CommitRefIter, Data, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter};

impl<'a> Data<'a> {
/// Constructs a new data object from `kind` and `data`.
Expand Down
3 changes: 2 additions & 1 deletion git-object/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ pub mod tag;
pub mod tree;

mod blob;
mod data;
///
pub mod data;

mod traits;
pub use traits::WriteTo;
Expand Down
65 changes: 65 additions & 0 deletions git-odb/src/find.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
///
pub mod existing {
use git_hash::ObjectId;

/// The error returned by the [`find(…)`][crate::FindExt::find()] trait methods.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error<T: std::error::Error + 'static> {
#[error(transparent)]
Find(T),
#[error("An object with id {} could not be found", .oid)]
NotFound { oid: ObjectId },
}
}

///
pub mod existing_object {
use git_hash::ObjectId;

/// The error returned by the various [`find_*`][crate::FindExt::find_commit()] trait methods.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error<T: std::error::Error + 'static> {
#[error(transparent)]
Find(T),
#[error(transparent)]
Decode(git_object::decode::Error),
#[error("An object with id {} could not be found", .oid)]
NotFound { oid: ObjectId },
#[error("Expected object of kind {} something else", .expected)]
ObjectKind { expected: git_object::Kind },
}
}

///
pub mod existing_iter {
use git_hash::ObjectId;

/// The error returned by the various [`find_*`][crate::FindExt::find_commit()] trait methods.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error<T: std::error::Error + 'static> {
#[error(transparent)]
Find(T),
#[error("An object with id {} could not be found", .oid)]
NotFound { oid: ObjectId },
#[error("Expected object of kind {} something else", .expected)]
ObjectKind { expected: git_object::Kind },
}
}

/// An Entry in a pack providing access to its data.
///
/// Its commonly retrieved by reading from a pack index file followed by a read from a pack data file.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
#[allow(missing_docs)]
pub struct Entry<'a> {
/// The pack-data encoded bytes of the pack data entry as present in the pack file, including the header followed by compressed data.
pub data: &'a [u8],
/// The crc32 hash over the entirety of `data`, or None if the pack file format doesn't support it yet.
pub crc32: Option<u32>,
/// The version of the pack file containing `data`
pub version: crate::pack::data::Version,
}
5 changes: 3 additions & 2 deletions git-odb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
//! * [`linked::Store`]
//! * A database containing various [`compound::Stores`][compound::Store] as gathered from `alternates` files.
pub use git_pack as pack;
pub use pack::{data, Find, FindExt};

mod store;
pub use store::{compound, linked, loose, sink, Sink};

pub mod alternate;

///
pub mod find;
mod traits;
pub use traits::Write;
pub use traits::{Find, FindExt, Write};

0 comments on commit e22a710

Please sign in to comment.