diff --git a/Cargo.lock b/Cargo.lock index 62a1dfe258f..84b0cfae4e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1133,11 +1133,13 @@ name = "git-object" version = "0.15.0" dependencies = [ "bstr", + "btoi", "git-actor", "git-hash", "git-testtools", "git-validate", "hex", + "itoa", "nom", "pretty_assertions", "quick-error", diff --git a/git-actor/src/signature/mod.rs b/git-actor/src/signature/mod.rs index 8932914f93c..cdb745ffb5e 100644 --- a/git-actor/src/signature/mod.rs +++ b/git-actor/src/signature/mod.rs @@ -97,6 +97,10 @@ mod write { pub fn write_to(&self, out: impl io::Write) -> io::Result<()> { self.to_ref().write_to(out) } + /// Computes the number of bytes necessary to serialize this signature + pub fn size(&self) -> usize { + self.to_ref().size() + } } impl<'a> SignatureRef<'a> { @@ -104,11 +108,14 @@ mod write { pub fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { out.write_all(validated_token(self.name)?)?; out.write_all(SPACE)?; - out.write_all(&b"<"[..])?; + out.write_all(b"<")?; out.write_all(validated_token(self.email)?)?; - out.write_all(&b"> "[..])?; - self.time.write_to(out)?; - Ok(()) + out.write_all(b"> ")?; + self.time.write_to(out) + } + /// Computes the number of bytes necessary to serialize this signature + pub fn size(&self) -> usize { + self.name.len() + self.email.len() + self.time.size() + 4 } } diff --git a/git-actor/src/time.rs b/git-actor/src/time.rs index 14f7fc5684f..458648a44b0 100644 --- a/git-actor/src/time.rs +++ b/git-actor/src/time.rs @@ -17,10 +17,10 @@ impl Time { pub fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { itoa::write(&mut out, self.time)?; out.write_all(SPACE)?; - out.write_all(&[match self.sign { - Sign::Plus => b'+', - Sign::Minus => b'-', - }])?; + out.write_all(match self.sign { + Sign::Plus => b"+", + Sign::Minus => b"-", + })?; const ZERO: &[u8; 1] = b"0"; @@ -40,4 +40,31 @@ impl Time { } itoa::write(&mut out, minutes).map(|_| ()) } + /// Computes the number of bytes necessary to render this time + pub fn size(&self) -> usize { + // TODO: this is not year 2038 safe... + (if self.time >= 1_000_000_000 { + 10 + } else if self.time >= 100_000_000 { + 9 + } else if self.time >= 10_000_000 { + 8 + } else if self.time >= 1_000_000 { + 7 + } else if self.time >= 100_000 { + 6 + } else if self.time >= 10_000 { + 5 + } else if self.time >= 1_000 { + 4 + } else if self.time >= 100 { + 3 + } else if self.time >= 10 { + 2 + } else { + 1 + }) + 2 + + 2 + + 2 + } } diff --git a/git-object/Cargo.toml b/git-object/Cargo.toml index 65b7114de54..12d5a4ee26a 100644 --- a/git-object/Cargo.toml +++ b/git-object/Cargo.toml @@ -23,11 +23,13 @@ git-hash = { version ="^0.8.0", path = "../git-hash" } git-validate = { version ="^0.5.3", path = "../git-validate" } git-actor = { version ="^0.6.0", path = "../git-actor" } +btoi = "0.4.2" +itoa = "0.4.6" quick-error = "2.0.0" hex = "0.4.2" bstr = { version = "0.2.13", default-features = false, features = ["std", "unicode"] } nom = { version = "7", default-features = false, features = ["std"]} -smallvec = "1.4.0" +smallvec = { version = "1.4.0", features = ["write"] } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} [dev-dependencies] diff --git a/git-object/src/blob.rs b/git-object/src/blob.rs index 32945cdf033..ff2eeafc831 100644 --- a/git-object/src/blob.rs +++ b/git-object/src/blob.rs @@ -8,6 +8,10 @@ impl<'a> crate::WriteTo for BlobRef<'a> { out.write_all(self.data) } + fn size(&self) -> usize { + self.data.len() + } + fn kind(&self) -> Kind { Kind::Blob } @@ -19,6 +23,10 @@ impl crate::WriteTo for Blob { self.to_ref().write_to(out) } + fn size(&self) -> usize { + self.to_ref().size() + } + fn kind(&self) -> Kind { Kind::Blob } diff --git a/git-object/src/commit/write.rs b/git-object/src/commit/write.rs index 3eceb9e2e9e..b62d3874010 100644 --- a/git-object/src/commit/write.rs +++ b/git-object/src/commit/write.rs @@ -1,3 +1,4 @@ +use bstr::ByteSlice; use std::io; use crate::{encode, encode::NL, Commit, CommitRef, Kind}; @@ -21,6 +22,38 @@ impl crate::WriteTo for Commit { out.write_all(&self.message) } + fn size(&self) -> usize { + let hashsize = self.tree.kind().len_in_hex(); + b"tree".len() + + 1 + + hashsize + + 1 + + self.parents.iter().count() * (b"parent".len() + 1 + hashsize + 1) + + b"author".len() + + 1 + + self.author.size() + + 1 + + b"committer".len() + + 1 + + self.committer.size() + + 1 + + self + .encoding + .as_ref() + .map(|e| b"encoding".len() + 1 + e.len() + 1) + .unwrap_or(0) + + self + .extra_headers + .iter() + .map(|(name, value)| { + // each header *value* is preceded by a space and followed by a newline + name.len() + value.split_str("\n").map(|s| s.len() + 2).sum::() + }) + .sum::() + + 1 + + self.message.len() + } + fn kind(&self) -> Kind { Kind::Commit } @@ -45,6 +78,38 @@ impl<'a> crate::WriteTo for CommitRef<'a> { out.write_all(self.message) } + fn size(&self) -> usize { + let hashsize = self.tree().kind().len_in_hex(); + b"tree".len() + + 1 + + hashsize + + 1 + + self.parents.iter().count() * (b"parent".len() + 1 + hashsize + 1) + + b"author".len() + + 1 + + self.author.size() + + 1 + + b"committer".len() + + 1 + + self.committer.size() + + 1 + + self + .encoding + .as_ref() + .map(|e| b"encoding".len() + 1 + e.len() + 1) + .unwrap_or(0) + + self + .extra_headers + .iter() + .map(|(name, value)| { + // each header *value* is preceded by a space and followed by a newline + name.len() + value.split_str("\n").map(|s| s.len() + 2).sum::() + }) + .sum::() + + 1 + + self.message.len() + } + fn kind(&self) -> Kind { Kind::Commit } diff --git a/git-object/src/encode.rs b/git-object/src/encode.rs index c2b556e1ba5..480441dc1cb 100644 --- a/git-object/src/encode.rs +++ b/git-object/src/encode.rs @@ -1,4 +1,5 @@ -use std::io; +//! Encoding utilities +use std::io::{self, Write}; use bstr::{BString, ByteSlice}; use quick_error::quick_error; @@ -15,13 +16,28 @@ quick_error! { } } +macro_rules! check { + ($e: expr) => { + $e.expect("Writing to a Vec should never fail.") + }; +} +/// Generates a loose header buffer +pub fn loose_header(kind: crate::Kind, size: usize) -> smallvec::SmallVec<[u8; 28]> { + let mut v = smallvec::SmallVec::new(); + check!(v.write_all(kind.as_bytes())); + check!(v.write_all(SPACE)); + check!(itoa::write(&mut v, size)); + check!(v.write_all(b"\0")); + v +} + impl From for io::Error { fn from(other: Error) -> io::Error { io::Error::new(io::ErrorKind::Other, other) } } -pub fn header_field_multi_line(name: &[u8], value: &[u8], mut out: impl io::Write) -> io::Result<()> { +pub(crate) fn header_field_multi_line(name: &[u8], value: &[u8], mut out: impl io::Write) -> io::Result<()> { let mut lines = value.as_bstr().split_str(b"\n"); trusted_header_field(name, lines.next().ok_or(Error::EmptyValue)?, &mut out)?; for line in lines { @@ -32,14 +48,14 @@ pub fn header_field_multi_line(name: &[u8], value: &[u8], mut out: impl io::Writ Ok(()) } -pub fn trusted_header_field(name: &[u8], value: &[u8], mut out: impl io::Write) -> io::Result<()> { +pub(crate) fn trusted_header_field(name: &[u8], value: &[u8], mut out: impl io::Write) -> io::Result<()> { out.write_all(name)?; out.write_all(SPACE)?; out.write_all(value)?; out.write_all(NL) } -pub fn trusted_header_signature( +pub(crate) fn trusted_header_signature( name: &[u8], value: &git_actor::SignatureRef<'_>, mut out: impl io::Write, @@ -50,14 +66,14 @@ pub fn trusted_header_signature( out.write_all(NL) } -pub fn trusted_header_id(name: &[u8], value: &git_hash::ObjectId, mut out: impl io::Write) -> io::Result<()> { +pub(crate) fn trusted_header_id(name: &[u8], value: &git_hash::ObjectId, mut out: impl io::Write) -> io::Result<()> { out.write_all(name)?; - out.write_all(&SPACE[..])?; + out.write_all(SPACE)?; value.write_hex_to(&mut out)?; - out.write_all(&NL[..]) + out.write_all(NL) } -pub fn header_field(name: &[u8], value: &[u8], out: impl io::Write) -> io::Result<()> { +pub(crate) fn header_field(name: &[u8], value: &[u8], out: impl io::Write) -> io::Result<()> { if value.is_empty() { return Err(Error::EmptyValue.into()); } diff --git a/git-object/src/lib.rs b/git-object/src/lib.rs index 572ab6cb69c..8c0ef553fe4 100644 --- a/git-object/src/lib.rs +++ b/git-object/src/lib.rs @@ -23,7 +23,7 @@ mod blob; mod traits; pub use traits::WriteTo; -mod encode; +pub mod encode; pub(crate) mod parse; /// @@ -228,79 +228,122 @@ impl Tree { } /// -#[cfg(feature = "verbose-object-parsing-errors")] pub mod decode { - use crate::bstr::{BString, ByteSlice}; + #[cfg(feature = "verbose-object-parsing-errors")] + mod _decode { + use crate::bstr::{BString, ByteSlice}; - /// The type to be used for parse errors. - pub type ParseError<'a> = nom::error::VerboseError<&'a [u8]>; - /// The owned type to be used for parse errors. - pub type ParseErrorOwned = nom::error::VerboseError; + /// The type to be used for parse errors. + pub type ParseError<'a> = nom::error::VerboseError<&'a [u8]>; + /// The owned type to be used for parse errors. + pub type ParseErrorOwned = nom::error::VerboseError; - /// A type to indicate errors during parsing and to abstract away details related to `nom`. - #[derive(Debug, Clone)] - pub struct Error { - /// The actual error - pub inner: ParseErrorOwned, - } + /// A type to indicate errors during parsing and to abstract away details related to `nom`. + #[derive(Debug, Clone)] + pub struct Error { + /// The actual error + pub inner: ParseErrorOwned, + } - impl<'a> From>> for Error { - fn from(v: nom::Err>) -> Self { - Error { - inner: match v { - nom::Err::Error(err) | nom::Err::Failure(err) => nom::error::VerboseError { - errors: err - .errors - .into_iter() - .map(|(i, v)| (i.as_bstr().to_owned(), v)) - .collect(), + impl<'a> From>> for Error { + fn from(v: nom::Err>) -> Self { + Error { + inner: match v { + nom::Err::Error(err) | nom::Err::Failure(err) => nom::error::VerboseError { + errors: err + .errors + .into_iter() + .map(|(i, v)| (i.as_bstr().to_owned(), v)) + .collect(), + }, + nom::Err::Incomplete(_) => unreachable!("we don't have streaming parsers"), }, - nom::Err::Incomplete(_) => unreachable!("we don't have streaming parsers"), - }, + } } } - } - impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.inner.fmt(f) + impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.inner.fmt(f) + } } } - impl std::error::Error for Error {} -} + /// + #[cfg(not(feature = "verbose-object-parsing-errors"))] + mod _decode { + /// The type to be used for parse errors, discards everything and is zero size + pub type ParseError<'a> = (); + /// The owned type to be used for parse errors, discards everything and is zero size + pub type ParseErrorOwned = (); -/// -#[cfg(not(feature = "verbose-object-parsing-errors"))] -pub mod decode { - /// The type to be used for parse errors, discards everything and is zero size - pub type ParseError<'a> = (); - /// The owned type to be used for parse errors, discards everything and is zero size - pub type ParseErrorOwned = (); + /// A type to indicate errors during parsing and to abstract away details related to `nom`. + #[derive(Debug, Clone)] + pub struct Error { + /// The actual error + pub inner: ParseErrorOwned, + } - /// A type to indicate errors during parsing and to abstract away details related to `nom`. - #[derive(Debug, Clone)] - pub struct Error { - /// The actual error - pub inner: ParseErrorOwned, - } + impl<'a> From>> for Error { + fn from(v: nom::Err>) -> Self { + Error { + inner: match v { + nom::Err::Error(err) | nom::Err::Failure(err) => err, + nom::Err::Incomplete(_) => unreachable!("we don't have streaming parsers"), + }, + } + } + } - impl<'a> From>> for Error { - fn from(v: nom::Err>) -> Self { - Error { - inner: match v { - nom::Err::Error(err) | nom::Err::Failure(err) => err, - nom::Err::Incomplete(_) => unreachable!("we don't have streaming parsers"), - }, + impl std::fmt::Display for Error { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Ok(()) } } } + pub use _decode::{Error, ParseError, ParseErrorOwned}; + impl std::error::Error for Error {} - impl std::fmt::Display for Error { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - Ok(()) + use quick_error::quick_error; + quick_error! { + /// Returned by [`loose_header()`] + #[derive(Debug)] + #[allow(missing_docs)] + pub enum LooseHeaderDecodeError { + ParseIntegerError( + source: btoi::ParseIntegerError, + message: &'static str, + number: Vec + ) { + display("{}: {:?}", message, std::str::from_utf8(number)) + } + InvalidHeader(s: &'static str) { + display("{}", s) + } + ObjectHeader(err: super::kind::Error) { from() } } } - impl std::error::Error for Error {} + use bstr::ByteSlice; + /// Decode a loose object header, being ` \0`, returns + /// ([`kind`](super::Kind), `size`, `consumed bytes`). + /// + /// `size` is the uncompressed size of the payload in bytes. + pub fn loose_header(input: &[u8]) -> Result<(super::Kind, usize, usize), LooseHeaderDecodeError> { + use LooseHeaderDecodeError::*; + let kind_end = input.find_byte(0x20).ok_or(InvalidHeader("Expected ' '"))?; + let kind = super::Kind::from_bytes(&input[..kind_end])?; + let size_end = input + .find_byte(0x0) + .ok_or(InvalidHeader("Did not find 0 byte in header"))?; + let size_bytes = &input[kind_end + 1..size_end]; + let size = btoi::btoi(size_bytes).map_err(|source| { + ParseIntegerError( + source, + "Object size in header could not be parsed", + size_bytes.to_owned(), + ) + })?; + Ok((kind, size, size_end + 1)) + } } diff --git a/git-object/src/object/mod.rs b/git-object/src/object/mod.rs index b1b7c58d027..f7f827cf2b4 100644 --- a/git-object/src/object/mod.rs +++ b/git-object/src/object/mod.rs @@ -20,6 +20,16 @@ mod write { } } + fn size(&self) -> usize { + use crate::ObjectRef::*; + match self { + Tree(v) => v.size(), + Blob(v) => v.size(), + Commit(v) => v.size(), + Tag(v) => v.size(), + } + } + fn kind(&self) -> Kind { self.kind() } @@ -38,6 +48,16 @@ mod write { } } + fn size(&self) -> usize { + use crate::Object::*; + match self { + Tree(v) => v.size(), + Blob(v) => v.size(), + Commit(v) => v.size(), + Tag(v) => v.size(), + } + } + fn kind(&self) -> Kind { self.kind() } @@ -142,9 +162,26 @@ impl Object { } } +use crate::decode::{loose_header, Error as DecodeError, LooseHeaderDecodeError}; use crate::{BlobRef, CommitRef, Kind, ObjectRef, TagRef, TreeRef}; +use quick_error::quick_error; + +quick_error! { + #[derive(Debug)] + #[allow(missing_docs)] + pub enum LooseDecodeError { + InvalidHeader(err: LooseHeaderDecodeError) { from() } + InvalidContent(err: DecodeError) { from() } + } +} impl<'a> ObjectRef<'a> { + /// Deserialize an object from a loose serialisation + pub fn from_loose(data: &'a [u8]) -> Result, LooseDecodeError> { + let (kind, size, offset) = loose_header(data)?; + Ok(Self::from_bytes(kind, &data[offset..offset + size])?) + } + /// Deserialize an object of `kind` from the given `data`. pub fn from_bytes(kind: Kind, data: &'a [u8]) -> Result, crate::decode::Error> { Ok(match kind { diff --git a/git-object/src/tag/write.rs b/git-object/src/tag/write.rs index 527f1649e77..f91a3cf7e11 100644 --- a/git-object/src/tag/write.rs +++ b/git-object/src/tag/write.rs @@ -47,6 +47,32 @@ impl crate::WriteTo for Tag { Ok(()) } + fn size(&self) -> usize { + b"object".len() + + 1 + + self.target.kind().len_in_hex() + + 1 + + b"type".len() + + 1 + + self.target_kind.as_bytes().len() + + 1 + + b"tag".len() + + 1 + + self.name.len() + + 1 + + self + .tagger + .as_ref() + .map(|t| b"tagger".len() + 1 + t.size() + 1) + .unwrap_or(0) + + if self.message.is_empty() { + 0 + } else { + self.message.len() + 1 + } + + self.pgp_signature.as_ref().map(|m| m.len() + 1).unwrap_or(0) + } + fn kind(&self) -> Kind { Kind::Tag } @@ -72,6 +98,32 @@ impl<'a> crate::WriteTo for TagRef<'a> { Ok(()) } + fn size(&self) -> usize { + b"object".len() + + 1 + + self.target().kind().len_in_hex() + + 1 + + b"type".len() + + 1 + + self.target_kind.as_bytes().len() + + 1 + + b"tag".len() + + 1 + + self.name.len() + + 1 + + self + .tagger + .as_ref() + .map(|t| b"tagger".len() + 1 + t.size() + 1) + .unwrap_or(0) + + if self.message.is_empty() { + 0 + } else { + self.message.len() + 1 + } + + self.pgp_signature.as_ref().map(|m| m.len() + 1).unwrap_or(0) + } + fn kind(&self) -> Kind { Kind::Tag } diff --git a/git-object/src/traits.rs b/git-object/src/traits.rs index 9b828769472..193cd78c348 100644 --- a/git-object/src/traits.rs +++ b/git-object/src/traits.rs @@ -9,6 +9,20 @@ pub trait WriteTo { /// Returns the type of this object. fn kind(&self) -> Kind; + + /// Returns the size of this object's representation (the amount + /// of data which would be written by [`write_to`](Self::write_to)). + /// + /// [`size`](Self::size)'s value has no bearing on the validity of + /// the object, as such it's possible for [`size`](Self::size) to + /// return a sensible value but [`write_to`](Self::write_to) to + /// fail because the object was not actually valid in some way. + fn size(&self) -> usize; + + /// Returns a loose object header based on the object's data + fn loose_header(&self) -> smallvec::SmallVec<[u8; 28]> { + crate::encode::loose_header(self.kind(), self.size()) + } } impl WriteTo for &T @@ -19,6 +33,10 @@ where ::write_to(self, out) } + fn size(&self) -> usize { + ::size(self) + } + fn kind(&self) -> Kind { ::kind(self) } diff --git a/git-object/src/tree/write.rs b/git-object/src/tree/write.rs index 08f53aec818..8c19b944773 100644 --- a/git-object/src/tree/write.rs +++ b/git-object/src/tree/write.rs @@ -54,6 +54,13 @@ impl crate::WriteTo for Tree { Ok(()) } + fn size(&self) -> usize { + self.entries + .iter() + .map(|Entry { mode, filename, oid }| mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len()) + .sum() + } + fn kind(&self) -> Kind { Kind::Tree } @@ -87,6 +94,15 @@ impl<'a> crate::WriteTo for TreeRef<'a> { Ok(()) } + fn size(&self) -> usize { + self.entries + .iter() + .map(|EntryRef { mode, filename, oid }| { + mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len() + }) + .sum() + } + fn kind(&self) -> Kind { Kind::Tree } diff --git a/git-object/tests/encode/mod.rs b/git-object/tests/encode/mod.rs index ddf51273fc8..f524880b37b 100644 --- a/git-object/tests/encode/mod.rs +++ b/git-object/tests/encode/mod.rs @@ -1,14 +1,26 @@ +use quick_error::quick_error; +quick_error! { + /// Because the TryFrom implementations don't return proper errors + /// on failure + #[derive(Debug)] + enum Error { + TryFromError {} + } +} + macro_rules! round_trip { ($owned:ty, $borrowed:ty, $( $files:literal ), +) => { #[test] fn round_trip() -> Result<(), Box> { + use std::convert::TryFrom; + use std::io::Write; use crate::fixture_bytes; - use git_object::{ObjectRef, Object}; + use git_object::{ObjectRef, Object, WriteTo}; use bstr::ByteSlice; + for input in &[ $( $files ),* ] { - use git_object::WriteTo; let input = fixture_bytes(input); // Test the parse->borrowed->owned->write chain for an object kind let mut output = Vec::new(); @@ -31,6 +43,17 @@ macro_rules! round_trip { output.clear(); item.write_to(&mut output)?; assert_eq!(output.as_bstr(), input.as_bstr()); + + // Test the loose serialisation -> parse chain for an object kind + let item = <$borrowed>::from_bytes(&input)?; + output.clear(); + // serialise to a tagged loose object + let w = &mut output; + w.write_all(&item.loose_header())?; + item.write_to(w)?; + let parsed = ObjectRef::from_loose(&output)?; + let item2 = <$borrowed>::try_from(parsed).or(Err(super::Error::TryFromError))?; + assert_eq!(item2, item); } Ok(()) } diff --git a/git-object/tests/loose.rs b/git-object/tests/loose.rs new file mode 100644 index 00000000000..2bf113c0e0b --- /dev/null +++ b/git-object/tests/loose.rs @@ -0,0 +1,20 @@ +use bstr::ByteSlice; +use git_object::{decode, encode, Kind}; + +#[test] +fn all() -> Result<(), Box> { + for (kind, size, expected) in &[ + (Kind::Tree, 1234, "tree 1234\0".as_bytes()), + (Kind::Blob, 0, b"blob 0\0"), + (Kind::Commit, 24241, b"commit 24241\0"), + (Kind::Tag, 9999999999, b"tag 9999999999\0"), + ] { + let buf = encode::loose_header(*kind, *size); + assert_eq!(buf.as_bstr(), expected.as_bstr()); + let (actual_kind, actual_size, actual_read) = decode::loose_header(&buf)?; + assert_eq!(actual_kind, *kind); + assert_eq!(actual_size, *size); + assert_eq!(actual_read, buf.len()); + } + Ok(()) +} diff --git a/git-odb/src/store/loose/find.rs b/git-odb/src/store/loose/find.rs index c32c409ffab..edf63027c16 100644 --- a/git-odb/src/store/loose/find.rs +++ b/git-odb/src/store/loose/find.rs @@ -1,7 +1,8 @@ -use std::{convert::TryInto, fs, io::Read, path::PathBuf}; +use std::{fs, io::Read, path::PathBuf}; use git_features::zlib; -use git_pack::{data, loose::object::header}; +use git_object::decode; +use git_pack::data; use crate::store::loose::{sha1_path, Store, HEADER_READ_UNCOMPRESSED_BYTES}; @@ -15,7 +16,7 @@ pub enum Error { path: PathBuf, }, #[error(transparent)] - Decode(#[from] header::Error), + Decode(#[from] decode::LooseHeaderDecodeError), #[error("Could not {action} data at '{path}'")] Io { source: std::io::Error, @@ -102,8 +103,8 @@ impl Store { ); let decompressed_start = bytes_read; - let (kind, size, header_size) = header::decode(&buf[decompressed_start..decompressed_start + consumed_out])?; - let size: usize = size.try_into().expect("object size fits into machine architecture"); + let (kind, size, header_size) = + decode::loose_header(&buf[decompressed_start..decompressed_start + consumed_out])?; if status == zlib::Status::StreamEnd { let decompressed_body_bytes_sans_header = diff --git a/git-odb/src/store/loose/write.rs b/git-odb/src/store/loose/write.rs index 3b346cf39ab..c7b2ff9a8a3 100644 --- a/git-odb/src/store/loose/write.rs +++ b/git-odb/src/store/loose/write.rs @@ -1,6 +1,7 @@ -use std::{fs, io, io::Write, path::PathBuf}; +use std::{convert::TryInto, fs, io, io::Write, path::PathBuf}; use git_features::{hash, zlib::stream::deflate}; +use git_object::WriteTo; use tempfile::NamedTempFile; use super::Store; @@ -28,6 +29,22 @@ pub enum Error { impl crate::write::Write for Store { type Error = Error; + fn write(&self, object: impl WriteTo, hash: git_hash::Kind) -> Result { + let mut to = self.dest(hash)?; + to.write_all(&object.loose_header()).map_err(|err| Error::Io { + source: err, + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; + object.write_to(&mut to).map_err(|err| Error::Io { + source: err, + message: "stream all data into tempfile in", + path: self.path.to_owned(), + })?; + to.flush()?; + self.finalize_object(to) + } + /// Write the given buffer in `from` to disk in one syscall at best. /// /// This will cost at least 4 IO operations. @@ -37,18 +54,21 @@ impl crate::write::Write for Store { from: &[u8], hash: git_hash::Kind, ) -> Result { - match hash { - git_hash::Kind::Sha1 => { - let mut to = self.write_header(kind, from.len() as u64, hash)?; - to.write_all(from).map_err(|err| Error::Io { - source: err, - message: "stream all data into tempfile in", - path: self.path.to_owned(), - })?; - to.flush()?; - self.finalize_object(to) - } - } + let mut to = self.dest(hash)?; + to.write_all(&git_object::encode::loose_header(kind, from.len())) + .map_err(|err| Error::Io { + source: err, + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; + + to.write_all(from).map_err(|err| Error::Io { + source: err, + message: "stream all data into tempfile in", + path: self.path.to_owned(), + })?; + to.flush()?; + self.finalize_object(to) } /// Write the given stream in `from` to disk with at least one syscall. @@ -61,45 +81,39 @@ impl crate::write::Write for Store { mut from: impl io::Read, hash: git_hash::Kind, ) -> Result { - match hash { - git_hash::Kind::Sha1 => { - let mut to = self.write_header(kind, size, hash)?; - io::copy(&mut from, &mut to).map_err(|err| Error::Io { - source: err, - message: "stream all data into tempfile in", - path: self.path.to_owned(), - })?; - to.flush()?; - self.finalize_object(to) - } - } + let mut to = self.dest(hash)?; + to.write_all(&git_object::encode::loose_header( + kind, + size.try_into().expect("object size to fit into usize"), + )) + .map_err(|err| Error::Io { + source: err, + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; + + io::copy(&mut from, &mut to).map_err(|err| Error::Io { + source: err, + message: "stream all data into tempfile in", + path: self.path.to_owned(), + })?; + to.flush()?; + self.finalize_object(to) } } type CompressedTempfile = deflate::Write; impl Store { - fn write_header( - &self, - kind: git_object::Kind, - size: u64, - hash: git_hash::Kind, - ) -> Result, Error> { - let mut to = hash::Write::new( + fn dest(&self, hash: git_hash::Kind) -> Result, Error> { + Ok(hash::Write::new( deflate::Write::new(NamedTempFile::new_in(&self.path).map_err(|err| Error::Io { source: err, message: "create named temp file in", path: self.path.to_owned(), })?), hash, - ); - - git_pack::loose::object::header::encode(kind, size, &mut to).map_err(|err| Error::Io { - source: err, - message: "write header to tempfile in", - path: self.path.to_owned(), - })?; - Ok(to) + )) } fn finalize_object( diff --git a/git-odb/src/store/sink.rs b/git-odb/src/store/sink.rs index 88e3911834e..76cbb222d8b 100644 --- a/git-odb/src/store/sink.rs +++ b/git-odb/src/store/sink.rs @@ -41,8 +41,10 @@ impl crate::write::Write for Sink { mut from: impl io::Read, hash: git_hash::Kind, ) -> Result { + let mut size = size.try_into().expect("object size to fit into usize"); use git_features::hash::Sha1; let mut buf = [0u8; 8096]; + let header = git_object::encode::loose_header(kind, size); let possibly_compress = |buf: &[u8]| -> io::Result<()> { if let Some(compressor) = self.compressor.as_ref() { @@ -53,11 +55,9 @@ impl crate::write::Write for Sink { match hash { git_hash::Kind::Sha1 => { let mut hasher = Sha1::default(); - let header_len = git_pack::loose::object::header::encode(kind, size, &mut buf[..])?; - hasher.update(&buf[..header_len]); - possibly_compress(&buf[..header_len])?; + hasher.update(&header); + possibly_compress(&header)?; - let mut size: usize = size.try_into().expect("object size to fit into usize"); while size != 0 { let bytes = size.min(buf.len()); from.read_exact(&mut buf[..bytes])?; diff --git a/git-pack/src/data/object.rs b/git-pack/src/data/object.rs index 4de8ae9baf4..610d2556557 100644 --- a/git-pack/src/data/object.rs +++ b/git-pack/src/data/object.rs @@ -4,6 +4,8 @@ use git_object::{BlobRef, CommitRef, CommitRefIter, ObjectRef, TagRef, TagRefIte use crate::data::Object; +// FIXME: this entire thing should be in git-object! (probably) with +// the possible exception of checksum checking impl<'a> Object<'a> { /// Constructs a new data object from `kind` and `data`. pub fn new(kind: git_object::Kind, data: &'a [u8]) -> Object<'a> { @@ -57,12 +59,8 @@ impl<'a> Object<'a> { /// Types supporting object hash verification pub mod verify { - use std::io; - use git_features::hash; - use crate::loose; - /// Returned by [`crate::data::Object::verify_checksum()`] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] @@ -80,12 +78,11 @@ pub mod verify { /// hash of `self`. pub fn verify_checksum(&self, desired: impl AsRef) -> Result<(), Error> { let desired = desired.as_ref(); - let mut sink = hash::Write::new(io::sink(), desired.kind()); - - loose::object::header::encode(self.kind, self.data.len() as u64, &mut sink).expect("hash to always work"); - sink.hash.update(self.data); + let mut hasher = hash::hasher(desired.kind()); + hasher.update(&git_object::encode::loose_header(self.kind, self.data.len())); + hasher.update(self.data); - let actual_id = git_hash::ObjectId::from(sink.hash.digest()); + let actual_id = git_hash::ObjectId::from(hasher.digest()); if desired != actual_id { return Err(Error::ChecksumMismatch { desired: desired.into(), diff --git a/git-pack/src/index/traverse/indexed.rs b/git-pack/src/index/traverse/indexed.rs index 50d761c93b9..8c6a58a871a 100644 --- a/git-pack/src/index/traverse/indexed.rs +++ b/git-pack/src/index/traverse/indexed.rs @@ -78,14 +78,14 @@ impl index::File { thread_limit, &should_interrupt, pack.pack_end() as u64, - || (new_processor(), [0u8; 64]), + || new_processor(), |data, progress, Context { entry: pack_entry, entry_end, decompressed: bytes, - state: (ref mut processor, ref mut header_buf), + state: ref mut processor, level, }| { let object_kind = pack_entry.header.as_kind().expect("non-delta object"); @@ -99,7 +99,6 @@ impl index::File { object_kind, bytes, progress, - header_buf, &data.index_entry, || { // TODO: Fix this - we overwrite the header of 'data' which also changes the computed entry size, diff --git a/git-pack/src/index/traverse/mod.rs b/git-pack/src/index/traverse/mod.rs index 7edfc84651a..cd7d89ecb1a 100644 --- a/git-pack/src/index/traverse/mod.rs +++ b/git-pack/src/index/traverse/mod.rs @@ -159,7 +159,6 @@ impl index::File { cache: &mut C, buf: &mut Vec, progress: &mut P, - header_buf: &mut [u8; 64], index_entry: &crate::index::Entry, processor: &mut impl FnMut(git_object::Kind, &[u8], &index::Entry, &mut P) -> Result<(), E>, ) -> Result> @@ -194,7 +193,6 @@ impl index::File { object_kind, buf, progress, - header_buf, index_entry, || pack.entry_crc32(index_entry.pack_offset, entry_len), processor, @@ -209,7 +207,6 @@ fn process_entry( object_kind: git_object::Kind, decompressed: &[u8], progress: &mut P, - header_buf: &mut [u8; 64], index_entry: &crate::index::Entry, pack_entry_crc32: impl FnOnce() -> u32, processor: &mut impl FnMut(git_object::Kind, &[u8], &index::Entry, &mut P) -> Result<(), E>, @@ -219,11 +216,8 @@ where E: std::error::Error + Send + Sync + 'static, { if check.object_checksum() { - let header_size = - crate::loose::object::header::encode(object_kind, decompressed.len() as u64, &mut header_buf[..]) - .expect("header buffer to be big enough"); let mut hasher = git_features::hash::Sha1::default(); - hasher.update(&header_buf[..header_size]); + hasher.update(&git_object::encode::loose_header(object_kind, decompressed.len())); hasher.update(decompressed); let actual_oid = git_hash::ObjectId::new_sha1(hasher.digest()); diff --git a/git-pack/src/index/traverse/with_lookup.rs b/git-pack/src/index/traverse/with_lookup.rs index e113b78b401..7c30c030d7b 100644 --- a/git-pack/src/index/traverse/with_lookup.rs +++ b/git-pack/src/index/traverse/with_lookup.rs @@ -126,7 +126,6 @@ impl index::File { ))), ); let mut stats = Vec::with_capacity(entries.len()); - let mut header_buf = [0u8; 64]; for index_entry in entries.iter() { let result = self.decode_and_process_entry( check, @@ -134,7 +133,6 @@ impl index::File { cache, buf, progress, - &mut header_buf, index_entry, processor, ); diff --git a/git-pack/src/index/write/mod.rs b/git-pack/src/index/write/mod.rs index 194d3dfc6bc..92b25c627c0 100644 --- a/git-pack/src/index/write/mod.rs +++ b/git-pack/src/index/write/mod.rs @@ -3,10 +3,7 @@ use std::{convert::TryInto, io, sync::atomic::AtomicBool}; pub use error::Error; use git_features::progress::{self, Progress}; -use crate::{ - cache::delta::{traverse::Context, Tree}, - loose, -}; +use crate::cache::delta::{traverse::Context, Tree}; mod encode; mod error; @@ -225,11 +222,10 @@ fn modify_base( hash: git_hash::Kind, ) { fn compute_hash(kind: git_object::Kind, bytes: &[u8], hash_kind: git_hash::Kind) -> git_hash::ObjectId { - let mut write = git_features::hash::Write::new(io::sink(), hash_kind); - loose::object::header::encode(kind, bytes.len() as u64, &mut write) - .expect("write to sink and hash cannot fail"); - write.hash.update(bytes); - git_hash::ObjectId::from(write.hash.digest()) + let mut hasher = git_features::hash::hasher(hash_kind); + hasher.update(&git_object::encode::loose_header(kind, bytes.len())); + hasher.update(bytes); + git_hash::ObjectId::from(hasher.digest()) } let object_kind = pack_entry.header.as_kind().expect("base object as source of iteration"); diff --git a/git-pack/src/lib.rs b/git-pack/src/lib.rs old mode 100644 new mode 100755 index 25c49ad55c1..7a323270c3d --- a/git-pack/src/lib.rs +++ b/git-pack/src/lib.rs @@ -35,6 +35,3 @@ pub mod data; mod find_traits; /// pub mod index; - -/// -pub mod loose; diff --git a/git-pack/src/loose.rs b/git-pack/src/loose.rs deleted file mode 100644 index b4d9f244675..00000000000 --- a/git-pack/src/loose.rs +++ /dev/null @@ -1,98 +0,0 @@ -/// -pub mod object { - /// - pub mod header { - //! loose object header encoding and decoding. - //! - //! Note that these are still relevant for packs as they are part of the computed hash of any git object, packed or not. - //! It just so happened that loose objects where the first ones used for implementing an object database. - use byteorder::WriteBytesExt; - use git_object as object; - - /// Returned by [`decode()`] - #[derive(thiserror::Error, Debug)] - #[allow(missing_docs)] - pub enum Error { - #[error("{message}: {:?}", std::str::from_utf8(.number))] - ParseIntegerError { - source: btoi::ParseIntegerError, - message: &'static str, - number: Vec, - }, - #[error("{0}")] - InvalidHeader(&'static str), - #[error(transparent)] - ObjectHeader(#[from] object::kind::Error), - } - - /// Decode a loose object header, being ` \0`, returns ([`Kind`][object::Kind], `size`, `consumed bytes`). - /// - /// `size` is the uncompressed size of the payload in bytes. - pub fn decode(input: &[u8]) -> Result<(object::Kind, u64, usize), Error> { - let header_end = input - .iter() - .position(|&b| b == 0) - .ok_or(Error::InvalidHeader("Did not find 0 byte in header"))?; - let header = &input[..header_end]; - let mut split = header.split(|&b| b == b' '); - match (split.next(), split.next()) { - (Some(kind), Some(size)) => Ok(( - object::Kind::from_bytes(kind)?, - btoi::btoi(size).map_err(|source| Error::ParseIntegerError { - message: "Object size in header could not be parsed", - number: size.to_owned(), - source, - })?, - header_end + 1, // account for 0 byte - )), - _ => Err(Error::InvalidHeader("Expected ' '")), - } - } - - fn kind_to_bytes_with_space(object: object::Kind) -> &'static [u8] { - use object::Kind::*; - match object { - Tree => b"tree ", - Blob => b"blob ", - Commit => b"commit ", - Tag => b"tag ", - } - } - - /// Encode the objects `Kind` and `size` into a format suitable for use with [`decode()`]. - pub fn encode(object: object::Kind, size: u64, mut out: impl std::io::Write) -> Result { - let mut written = out.write(kind_to_bytes_with_space(object))?; - written += itoa::write(&mut out, size)?; - out.write_u8(0)?; - Ok(written + 1) - } - - #[cfg(test)] - mod tests { - mod encode_decode_round_trip { - use git_object::bstr::ByteSlice; - - use crate::loose::object::header; - - #[test] - fn all() -> Result<(), Box> { - let mut buf = [0; 20]; - for (kind, size, expected) in &[ - (git_object::Kind::Tree, 1234, &b"tree 1234\0"[..]), - (git_object::Kind::Blob, 0, b"blob 0\0"), - (git_object::Kind::Commit, 24241, b"commit 24241\0"), - (git_object::Kind::Tag, 9999999999, b"tag 9999999999\0"), - ] { - let written = header::encode(*kind, *size, &mut buf[..])?; - assert_eq!(buf[..written].as_bstr(), expected.as_bstr()); - let (actual_kind, actual_size, actual_read) = header::decode(&buf[..written])?; - assert_eq!(actual_kind, *kind); - assert_eq!(actual_size, *size); - assert_eq!(actual_read, written); - } - Ok(()) - } - } - } - } -}