Skip to content

Commit

Permalink
Fully implement --encode and --re-encode flags
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Jul 17, 2020
1 parent 6bed200 commit a7cfac8
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 24 deletions.
2 changes: 1 addition & 1 deletion git-object/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use bstr::{BStr, BString, ByteSlice};

/// For convenience
/// For convenience to allow using `bstr` without adding it to own cargo manifest
pub use bstr;

pub mod borrowed;
Expand Down
2 changes: 1 addition & 1 deletion git-object/src/owned/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::io;
quick_error! {
#[derive(Debug)]
pub enum Error {
NewlineInFilename(name: BString){
NewlineInFilename(name: BString) {
display("Newlines are invalid in file paths: {:?}", name)
}
}
Expand Down
42 changes: 39 additions & 3 deletions git-odb/src/pack/index/verify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{
pack::{cache, data::decode},
};
use git_features::progress::{self, Progress};
use git_object::{owned, SHA1_SIZE};
use git_object::{borrowed, bstr::BString, owned, SHA1_SIZE};
use quick_error::quick_error;
use smallvec::alloc::collections::BTreeMap;
use std::time::Instant;
Expand All @@ -24,6 +24,16 @@ quick_error! {
display("Object {} at offset {} could not be decoded", id, offset)
cause(err)
}
ObjectDecode(err: borrowed::Error, kind: git_object::Kind, oid: owned::Id) {
display("{} object {} could not be decoded", kind, oid)
cause(err)
}
ObjectEncodeMismatch(kind: git_object::Kind, oid: owned::Id, expected: BString, actual: BString) {
display("{} object {} could not be decoded, wanted\n{}\ngot\n{}", kind, oid, expected, actual)
}
ObjectEncode(err: std::io::Error) {
from()
}
PackMismatch { expected: owned::Id, actual: owned::Id } {
display("The packfiles checksum didn't match the index file checksum: expected {}, got {}", expected, actual)
}
Expand Down Expand Up @@ -244,6 +254,7 @@ impl index::File {
(
make_cache(),
Vec::with_capacity(2048),
Vec::with_capacity(2048),
reduce_progress.lock().unwrap().add_child(format!("thread {}", index)),
)
};
Expand All @@ -253,9 +264,12 @@ impl index::File {
input_chunks,
thread_limit,
state_per_thread,
|entries: &[index::Entry], (cache, buf, progress)| -> Result<Vec<decode::Outcome>, Error> {
|entries: &[index::Entry],
(cache, buf, ref mut encode_buf, progress)|
-> Result<Vec<decode::Outcome>, Error> {
progress.init(Some(entries.len() as u32), Some("entries"));
let mut stats = Vec::with_capacity(entries.len());
let mut header_buf = [0u8; 64];
for (idx, index_entry) in entries.iter().enumerate() {
let pack_entry = pack.entry(index_entry.pack_offset);
let pack_entry_data_offset = pack_entry.data_offset;
Expand All @@ -275,7 +289,6 @@ impl index::File {
let consumed_input = entry_stats.compressed_size;
stats.push(entry_stats);

let mut header_buf = [0u8; 64];
let header_size =
crate::loose::object::header::encode(object_kind, buf.len(), &mut header_buf[..])
.expect("header buffer to be big enough");
Expand Down Expand Up @@ -305,6 +318,29 @@ impl index::File {
});
}
}
if let Mode::Sha1CRC32Decode | Mode::Sha1CRC32DecodeEncode = mode {
use git_object::Kind::*;
match object_kind {
Tree | Commit | Tag => {
let obj = borrowed::Object::from_bytes(object_kind, buf.as_slice())
.map_err(|err| Error::ObjectDecode(err, object_kind, index_entry.oid))?;
if let Mode::Sha1CRC32DecodeEncode = mode {
let object = owned::Object::from(obj);
encode_buf.clear();
object.write_to(&mut *encode_buf)?;
if encode_buf != buf {
return Err(Error::ObjectEncodeMismatch(
object_kind,
index_entry.oid,
buf.clone().into(),
encode_buf.clone().into(),
));
}
}
}
Blob => {}
};
}
progress.set(idx as u32);
}
Ok(stats)
Expand Down
4 changes: 3 additions & 1 deletion gitoxide-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use git_odb::pack::{self, index};
use std::str::FromStr;
use std::{io, path::Path};

pub use index::verify::Mode as VerifyMode;

#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
pub enum OutputFormat {
Human,
Expand Down Expand Up @@ -59,7 +61,7 @@ impl Default for Context<Vec<u8>, Vec<u8>> {
Context {
output_statistics: None,
thread_limit: None,
mode: VerifyMode::Sha1CRC32,
mode: index::verify::Mode::Sha1CRC32,
out: Vec::new(),
err: Vec::new(),
}
Expand Down
39 changes: 23 additions & 16 deletions src/plumbing/lean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,6 @@ mod options {
/// print the program version.
pub version: bool,

#[argh(switch)]
/// Decode and parse tags, commits and trees to validate their correctness beyond hashing correctly.
///
/// Malformed objects should not usually occur, but could be injected on purpose or accident.
/// This will reduce overall performance.
pub decode: bool,

#[argh(switch)]
/// Decode and parse tags, commits and trees to validate their correctness, and re-encode them.
///
/// This flag is primarily to test the implementation of encoding, and requires to decode the object first.
/// Encoding an object after decoding it should yield exactly the same bytes.
/// This will reduce overall performance even more, as re-encoding requires to transform zero-copy objects into
/// owned objects, causing plenty of allocation to occour.
pub re_encode: bool,

#[argh(option, short = 't')]
/// the amount of threads to use for some operations.
///
Expand All @@ -46,6 +30,22 @@ mod options {
#[derive(FromArgs, PartialEq, Debug)]
#[argh(subcommand, name = "verify-pack")]
pub struct VerifyPack {
#[argh(switch)]
/// decode and parse tags, commits and trees to validate their correctness beyond hashing correctly.
///
/// Malformed objects should not usually occur, but could be injected on purpose or accident.
/// This will reduce overall performance.
pub decode: bool,

#[argh(switch)]
/// decode and parse tags, commits and trees to validate their correctness, and re-encode them.
///
/// This flag is primarily to test the implementation of encoding, and requires to decode the object first.
/// Encoding an object after decoding it should yield exactly the same bytes.
/// This will reduce overall performance even more, as re-encoding requires to transform zero-copy objects into
/// owned objects, causing plenty of allocation to occour.
pub re_encode: bool,

/// output statistical information about the pack
#[argh(switch, short = 's')]
pub statistics: bool,
Expand Down Expand Up @@ -97,6 +97,8 @@ pub fn main() -> Result<()> {
path,
verbose,
statistics,
decode,
re_encode,
}) => {
let (_handle, progress) = prepare(verbose, "verify-pack");
core::verify_pack_or_pack_index(
Expand All @@ -109,6 +111,11 @@ pub fn main() -> Result<()> {
None
},
thread_limit,
mode: match (decode, re_encode) {
(true, false) => core::VerifyMode::Sha1CRC32Decode,
(true, true) | (false, true) => core::VerifyMode::Sha1CRC32DecodeEncode,
(false, false) => core::VerifyMode::Sha1CRC32,
},
out: stdout(),
err: stderr(),
},
Expand Down
27 changes: 26 additions & 1 deletion src/plumbing/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@ mod options {
#[structopt(long, conflicts_with("verbose"))]
progress: bool,

#[structopt(long, conflicts_with("re-encode"))]
/// decode and parse tags, commits and trees to validate their correctness beyond hashing correctly.
///
/// Malformed objects should not usually occur, but could be injected on purpose or accident.
/// This will reduce overall performance.
decode: bool,

#[structopt(long)]
/// decode and parse tags, commits and trees to validate their correctness, and re-encode them.
///
/// This flag is primarily to test the implementation of encoding, and requires to decode the object first.
/// Encoding an object after decoding it should yield exactly the same bytes.
/// This will reduce overall performance even more, as re-encoding requires to transform zero-copy objects into
/// owned objects, causing plenty of allocation to occour.
re_encode: bool,

/// the progress TUI will stay up even though the work is already completed.
///
/// Use this to be able to read progress messages or additional information visible in the TUI log pane.
Expand Down Expand Up @@ -160,6 +176,8 @@ pub fn main() -> Result<()> {
verbose,
progress,
format,
decode,
re_encode,
progress_keep_open,
statistics,
} => prepare_and_run(
Expand All @@ -168,12 +186,19 @@ pub fn main() -> Result<()> {
progress,
progress_keep_open,
move |progress, out, err| {
let mode = match (decode, re_encode) {
(true, false) => core::VerifyMode::Sha1CRC32Decode,
(true, true) | (false, true) => core::VerifyMode::Sha1CRC32DecodeEncode,
(false, false) => core::VerifyMode::Sha1CRC32,
};
let output_statistics = if statistics { Some(format) } else { None };
core::verify_pack_or_pack_index(
path,
progress,
core::Context {
output_statistics,
thread_limit,
output_statistics: if statistics { Some(format) } else { None },
mode,
out,
err,
},
Expand Down
2 changes: 1 addition & 1 deletion tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* [x] tree
* [x] commit
* **pack verify**
* add '--some-flag' to run every non-blob through a decode/encode cycle to see if all objects can be parsed
* [ ] add '--some-flag' to run every non-blob through a decode/encode cycle to see if all objects can be parsed
* add that to the stress test
* **plumbing - explode pack**
* [ ] write loose object
Expand Down
6 changes: 6 additions & 0 deletions tests/stateless-journey.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ title "CLI ${kind}"
expect_run $SUCCESSFULLY "$exe_plumbing" verify-pack --decode "$PACK_INDEX_FILE"
}
)
(with "decode"
it "verifies the pack index successfully and with desired output, and re-encodes all objects" && {
WITH_SNAPSHOT="$snapshot/plumbing-verify-pack-index-success" \
expect_run $SUCCESSFULLY "$exe_plumbing" verify-pack --re-encode "$PACK_INDEX_FILE"
}
)
if test "$kind" = "max"; then
(with "statistics (JSON)"
it "verifies the pack index successfully and with desired output" && {
Expand Down

0 comments on commit a7cfac8

Please sign in to comment.