Skip to content

Commit

Permalink
Merge branch 'fix-819'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Apr 20, 2023
2 parents b5d2654 + 07e11cf commit 69faad0
Show file tree
Hide file tree
Showing 19 changed files with 304 additions and 44 deletions.
6 changes: 1 addition & 5 deletions gix-object/src/data.rs
Expand Up @@ -69,11 +69,7 @@ pub mod verify {
/// hash of `self`.
pub fn verify_checksum(&self, desired: impl AsRef<gix_hash::oid>) -> Result<(), Error> {
let desired = desired.as_ref();
let mut hasher = gix_features::hash::hasher(desired.kind());
hasher.update(&crate::encode::loose_header(self.kind, self.data.len()));
hasher.update(self.data);

let actual_id = gix_hash::ObjectId::from(hasher.digest());
let actual_id = crate::compute_hash(desired.kind(), self.kind, self.data);
if desired != actual_id {
return Err(Error::ChecksumMismatch {
desired: desired.into(),
Expand Down
11 changes: 11 additions & 0 deletions gix-object/src/lib.rs
Expand Up @@ -375,3 +375,14 @@ pub mod decode {
Ok((kind, size, size_end + 1))
}
}

/// A standalone function to compute a hash of kind `hash_kind` for an object of `object_kind` and its `data`.
pub fn compute_hash(hash_kind: gix_hash::Kind, object_kind: Kind, data: &[u8]) -> gix_hash::ObjectId {
let header = encode::loose_header(object_kind, data.len());

let mut hasher = gix_features::hash::hasher(hash_kind);
hasher.update(&header);
hasher.update(data);

hasher.digest().into()
}
File renamed without changes.
14 changes: 14 additions & 0 deletions gix-object/tests/object.rs
Expand Up @@ -4,6 +4,20 @@ use gix_hash::ObjectId;

mod encode;
mod immutable;
mod loose;

#[test]
fn compute_hash() {
let hk = gix_hash::Kind::Sha1;
assert_eq!(
gix_object::compute_hash(hk, gix_object::Kind::Blob, &[]),
gix_hash::ObjectId::empty_blob(hk)
);
assert_eq!(
gix_object::compute_hash(hk, gix_object::Kind::Tree, &[]),
gix_hash::ObjectId::empty_tree(hk)
);
}

type Result<T = ()> = std::result::Result<T, Box<dyn std::error::Error>>;

Expand Down
3 changes: 1 addition & 2 deletions gix-odb/src/lib.rs
Expand Up @@ -65,8 +65,7 @@ pub fn sink(object_hash: gix_hash::Kind) -> Sink {
}
}

///
pub mod sink;
mod sink;

///
pub mod find;
Expand Down
36 changes: 16 additions & 20 deletions gix-odb/src/sink.rs
Expand Up @@ -30,7 +30,6 @@ impl crate::traits::Write for Sink {
mut from: impl io::Read,
) -> Result<gix_hash::ObjectId, Self::Error> {
let mut size = size.try_into().expect("object size to fit into usize");
use gix_features::hash::Sha1;
let mut buf = [0u8; 8096];
let header = gix_object::encode::loose_header(kind, size);

Expand All @@ -40,27 +39,24 @@ impl crate::traits::Write for Sink {
}
Ok(())
};
match self.object_hash {
gix_hash::Kind::Sha1 => {
let mut hasher = Sha1::default();
hasher.update(&header);
possibly_compress(&header)?;

while size != 0 {
let bytes = size.min(buf.len());
from.read_exact(&mut buf[..bytes])?;
hasher.update(&buf[..bytes]);
possibly_compress(&buf[..bytes])?;
size -= bytes;
}
if let Some(compressor) = self.compressor.as_ref() {
let mut c = compressor.borrow_mut();
c.flush()?;
c.reset();
}
let mut hasher = gix_features::hash::hasher(self.object_hash);
hasher.update(&header);
possibly_compress(&header)?;

Ok(hasher.digest().into())
}
while size != 0 {
let bytes = size.min(buf.len());
from.read_exact(&mut buf[..bytes])?;
hasher.update(&buf[..bytes]);
possibly_compress(&buf[..bytes])?;
size -= bytes;
}
if let Some(compressor) = self.compressor.as_ref() {
let mut c = compressor.borrow_mut();
c.flush()?;
c.reset();
}

Ok(hasher.digest().into())
}
}
41 changes: 40 additions & 1 deletion gix-odb/src/store_impls/loose/write.rs
Expand Up @@ -98,6 +98,16 @@ impl crate::traits::Write for Store {

type CompressedTempfile = deflate::Write<NamedTempFile>;

/// Access
impl Store {
/// Return the path to the object with `id`.
///
/// Note that is may not exist yet.
pub fn object_path(&self, id: &gix_hash::oid) -> PathBuf {
loose::hash_path(id, self.path.clone())
}
}

impl Store {
fn dest(&self) -> Result<hash::Write<CompressedTempfile>, Error> {
Ok(hash::Write::new(
Expand Down Expand Up @@ -126,7 +136,36 @@ impl Store {
}
}
let file = file.into_inner();
file.persist(&object_path).map_err(|err| Error::Persist {
let res = file.persist(&object_path);
// On windows, we assume that such errors are due to its special filesystem semantics,
// on any other platform that would be a legitimate error though.
#[cfg(windows)]
if let Err(err) = &res {
if err.error.kind() == std::io::ErrorKind::PermissionDenied
|| err.error.kind() == std::io::ErrorKind::AlreadyExists
{
return Ok(id);
}
}
#[cfg(unix)]
if let Ok(mut perm) = object_path.metadata().map(|m| m.permissions()) {
use std::os::unix::fs::PermissionsExt;
/// For now we assume the default with standard umask. This can be more sophisticated,
/// but we have the bare minimum.
fn comp_mode(_mode: u32) -> u32 {
0o444
}
let new_mode = comp_mode(perm.mode());
if (perm.mode() ^ new_mode) & !0o170000 != 0 {
perm.set_mode(new_mode);
std::fs::set_permissions(&object_path, perm).map_err(|err| Error::Io {
source: err,
message: "Failed to set permission bits",
path: object_path.clone(),
})?;
}
}
res.map_err(|err| Error::Persist {
source: err,
target: object_path,
})?;
Expand Down
1 change: 1 addition & 0 deletions gix-odb/tests/fixtures/generated-archives/.gitignore
@@ -0,0 +1 @@
repo_with_loose_objects.tar.xz
12 changes: 12 additions & 0 deletions gix-odb/tests/fixtures/repo_with_loose_objects.sh
@@ -0,0 +1,12 @@
#!/bin/bash
set -eu -o pipefail

git init -q

git checkout -b main
touch this
git add this
git commit -q -m c1
echo hello >> this
git commit -q -am c2

56 changes: 56 additions & 0 deletions gix-odb/tests/odb/store/loose.rs
Expand Up @@ -70,6 +70,62 @@ mod write {
}
Ok(())
}

#[test]
#[cfg(unix)]
fn it_writes_objects_with_similar_permissions() -> crate::Result {
let hk = gix_hash::Kind::Sha1;
let git_store = loose::Store::at(
gix_testtools::scripted_fixture_read_only("repo_with_loose_objects.sh")?.join(".git/objects"),
hk,
);
let expected_perm = git_store
.object_path(&gix_hash::ObjectId::empty_blob(hk))
.metadata()?
.permissions();

let tmp = tempfile::TempDir::new()?;
let store = loose::Store::at(tmp.path(), hk);
store.write_buf(gix_object::Kind::Blob, &[])?;
let actual_perm = store
.object_path(&gix_hash::ObjectId::empty_blob(hk))
.metadata()?
.permissions();
assert_eq!(
actual_perm, expected_perm,
"we explicitly equalize permissions to be similar to what `git` would do"
);
Ok(())
}

#[test]
fn collisions_do_not_cause_failure() -> crate::Result {
let dir = tempfile::tempdir()?;

fn write_empty_trees(dir: &std::path::Path) {
let db = loose::Store::at(dir, gix_hash::Kind::Sha1);
let empty_tree = gix_object::Tree::empty();
for _ in 0..2 {
let id = db.write(&empty_tree).expect("works");
assert!(db.contains(id), "written objects are actually available");

let empty_blob = db.write_buf(gix_object::Kind::Blob, &[]).expect("works");
assert!(db.contains(empty_blob), "written objects are actually available");
let id = db
.write_stream(gix_object::Kind::Blob, 0, &mut [].as_slice())
.expect("works");
assert_eq!(id, empty_blob);
assert!(db.contains(empty_blob), "written objects are actually available");
}
}

gix_features::parallel::threads(|scope| {
scope.spawn(|| write_empty_trees(dir.path()));
scope.spawn(|| write_empty_trees(dir.path()));
});

Ok(())
}
}

mod contains {
Expand Down
6 changes: 1 addition & 5 deletions gix-pack/src/index/traverse/mod.rs
Expand Up @@ -216,11 +216,7 @@ where
E: std::error::Error + Send + Sync + 'static,
{
if check.object_checksum() {
let mut hasher = gix_features::hash::hasher(index_entry.oid.kind());
hasher.update(&gix_object::encode::loose_header(object_kind, decompressed.len()));
hasher.update(decompressed);

let actual_oid = gix_hash::ObjectId::from(hasher.digest());
let actual_oid = gix_object::compute_hash(index_entry.oid.kind(), object_kind, decompressed);
if actual_oid != index_entry.oid {
return Err(Error::PackObjectMismatch {
actual: actual_oid,
Expand Down
6 changes: 1 addition & 5 deletions gix-worktree/src/status/content.rs
Expand Up @@ -53,11 +53,7 @@ impl CompareBlobs for FastEq {
return Ok(Some(()));
}
let blob = worktree_blob.read_data()?;
let header = loose_header(gix_object::Kind::Blob, blob.len());
let mut hasher = hash::hasher(entry.id.kind());
hasher.update(&header);
hasher.update(blob);
let file_hash: ObjectId = hasher.digest().into();
let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob);
Ok((entry.id != file_hash).then_some(()))
}
}
Expand Down
8 changes: 8 additions & 0 deletions gix/src/object/mod.rs
Expand Up @@ -90,6 +90,14 @@ impl<'repo> Object<'repo> {
}
}

/// Transform this object into a tag, or panic if it is none.
pub fn into_tag(self) -> Tag<'repo> {
match self.try_into() {
Ok(tag) => tag,
Err(this) => panic!("Tried to use {} as commit, but was {}", this.id, this.kind),
}
}

/// Transform this object into a commit, or return it as part of the `Err` if it is no commit.
pub fn try_into_commit(self) -> Result<Commit<'repo>, try_into::Error> {
self.try_into().map_err(|this: Self| try_into::Error {
Expand Down
11 changes: 11 additions & 0 deletions gix/src/object/tag.rs
@@ -1,6 +1,17 @@
use crate::{ext::ObjectIdExt, Tag};

impl<'repo> Tag<'repo> {
/// Decode the entire tag object and return it for accessing all tag information.
///
/// This never allocates.
///
/// Note that the returned commit object does make lookup easy and should be
/// used for successive calls to string-ish information to avoid decoding the object
/// more than once.
pub fn decode(&self) -> Result<gix_object::TagRef<'_>, gix_object::decode::Error> {
gix_object::TagRef::from_bytes(&self.data)
}

/// Decode this tag partially and return the id of its target.
pub fn target_id(&self) -> Result<crate::Id<'repo>, gix_object::decode::Error> {
gix_object::TagRefIter::from_bytes(&self.data)
Expand Down
52 changes: 46 additions & 6 deletions gix/src/repository/object.rs
@@ -1,5 +1,6 @@
#![allow(clippy::result_large_err)]
use std::convert::TryInto;
use std::ops::DerefMut;

use gix_hash::ObjectId;
use gix_odb::{Find, FindExt, Write};
Expand Down Expand Up @@ -58,32 +59,71 @@ impl crate::Repository {
}
}

fn shared_empty_buf(&self) -> std::cell::RefMut<'_, Vec<u8>> {
let mut bufs = self.bufs.borrow_mut();
if bufs.last().is_none() {
bufs.push(Vec::with_capacity(512));
}
std::cell::RefMut::map(bufs, |bufs| {
let buf = bufs.last_mut().expect("we assure one is present");
buf.clear();
buf
})
}

/// Write the given object into the object database and return its object id.
///
/// Note that we hash the object in memory to avoid storing objects that are already present. That way,
/// we avoid writing duplicate objects using slow disks that will eventually have to be garbage collected.
pub fn write_object(&self, object: impl gix_object::WriteTo) -> Result<Id<'_>, object::write::Error> {
let mut buf = self.shared_empty_buf();
object.write_to(buf.deref_mut())?;

let oid = gix_object::compute_hash(self.object_hash(), object.kind(), &buf);
if self.objects.contains(oid) {
return Ok(oid.attach(self));
}

self.objects
.write(object)
.write_buf(object.kind(), &buf)
.map(|oid| oid.attach(self))
.map_err(Into::into)
}

/// Write a blob from the given `bytes`.
///
/// We avoid writing duplicate objects to slow disks that will eventually have to be garbage collected by
/// pre-hashing the data, and checking if the object is already present.
pub fn write_blob(&self, bytes: impl AsRef<[u8]>) -> Result<Id<'_>, object::write::Error> {
let bytes = bytes.as_ref();
let oid = gix_object::compute_hash(self.object_hash(), gix_object::Kind::Blob, bytes);
if self.objects.contains(oid) {
return Ok(oid.attach(self));
}
self.objects
.write_buf(gix_object::Kind::Blob, bytes.as_ref())
.write_buf(gix_object::Kind::Blob, bytes)
.map(|oid| oid.attach(self))
}

/// Write a blob from the given `Read` implementation.
///
/// Note that we hash the object in memory to avoid storing objects that are already present. That way,
/// we avoid writing duplicate objects using slow disks that will eventually have to be garbage collected.
///
/// If that is prohibitive, use the object database directly.
pub fn write_blob_stream(
&self,
mut bytes: impl std::io::Read + std::io::Seek,
) -> Result<Id<'_>, object::write::Error> {
let current = bytes.stream_position()?;
let len = bytes.seek(std::io::SeekFrom::End(0))? - current;
bytes.seek(std::io::SeekFrom::Start(current))?;
let mut buf = self.shared_empty_buf();
std::io::copy(&mut bytes, buf.deref_mut())?;
let oid = gix_object::compute_hash(self.object_hash(), gix_object::Kind::Blob, &buf);
if self.objects.contains(oid) {
return Ok(oid.attach(self));
}

self.objects
.write_stream(gix_object::Kind::Blob, len, bytes)
.write_buf(gix_object::Kind::Blob, &buf)
.map(|oid| oid.attach(self))
}

Expand Down
Git LFS file not shown

0 comments on commit 69faad0

Please sign in to comment.