Skip to content

Commit

Permalink
refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Apr 25, 2021
1 parent b4027e3 commit ca98221
Show file tree
Hide file tree
Showing 17 changed files with 85 additions and 80 deletions.
2 changes: 1 addition & 1 deletion git-odb/src/pack/bundle/write/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pub enum Error {
#[error("An IO error occurred when reading the pack or creating a temporary file")]
Io(#[from] io::Error),
#[error(transparent)]
PackIter(#[from] pack::data::iter::Error),
PackIter(#[from] pack::data::input::Error),
#[error("Could not move a temporary file into its desired place")]
PeristError(#[from] tempfile::PersistError),
#[error(transparent)]
Expand Down
10 changes: 5 additions & 5 deletions git-odb/src/pack/bundle/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ impl pack::Bundle {
// However, this is exactly what's happening in the ZipReader implementation that is eventually used.
// The performance impact of this is probably negligible, compared to all the other work that is done anyway :D.
let buffered_pack = io::BufReader::new(pack);
let pack_entries_iter = pack::data::Iter::new_from_header(
let pack_entries_iter = pack::data::EntriesFromBytesIter::new_from_header(
buffered_pack,
options.iteration_mode,
pack::data::iter::CompressedBytesMode::Crc32,
pack::data::input::CompressedBytesMode::Crc32,
)?;
let pack_kind = pack_entries_iter.kind();
let (outcome, data_path, index_path) =
Expand Down Expand Up @@ -91,10 +91,10 @@ impl pack::Bundle {
};
let eight_pages = 4096 * 8;
let buffered_pack = io::BufReader::with_capacity(eight_pages, pack);
let pack_entries_iter = pack::data::Iter::new_from_header(
let pack_entries_iter = pack::data::EntriesFromBytesIter::new_from_header(
buffered_pack,
options.iteration_mode,
pack::data::iter::CompressedBytesMode::Crc32,
pack::data::input::CompressedBytesMode::Crc32,
)?;
let pack_kind = pack_entries_iter.kind();
let num_objects = pack_entries_iter.size_hint().0;
Expand Down Expand Up @@ -122,7 +122,7 @@ impl pack::Bundle {
}: Options,
data_file: Arc<parking_lot::Mutex<NamedTempFile>>,
data_path: PathBuf,
pack_entries_iter: impl Iterator<Item = Result<pack::data::iter::Entry, pack::data::iter::Error>>,
pack_entries_iter: impl Iterator<Item = Result<pack::data::input::Entry, pack::data::input::Error>>,
) -> Result<(pack::index::write::Outcome, Option<PathBuf>, Option<PathBuf>), Error> {
let indexing_progress = progress.add_child("create index file");
Ok(match directory {
Expand Down
4 changes: 2 additions & 2 deletions git-odb/src/pack/bundle/write/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub struct Options {
/// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used.
pub thread_limit: Option<usize>,
/// Determine how much processing to spend on protecting against corruption or recovering from errors.
pub iteration_mode: pack::data::iter::Mode,
pub iteration_mode: pack::data::input::Mode,
/// The version of pack index to write, should be [`pack::index::Version::default()`]
pub index_kind: pack::index::Version,
}
Expand All @@ -20,7 +20,7 @@ impl Default for Options {
fn default() -> Self {
Options {
thread_limit: None,
iteration_mode: pack::data::iter::Mode::Verify,
iteration_mode: pack::data::input::Mode::Verify,
index_kind: Default::default(),
}
}
Expand Down
26 changes: 15 additions & 11 deletions git-odb/src/pack/data/iter.rs → git-odb/src/pack/data/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use flate2::Decompress;
use git_features::hash::Sha1;
use std::{fs, io};

/// Returned by [`Iter::new_from_header()`] and as part of `Item` of [`Iter`]
/// Returned by [`EntriesFromBytesIter::new_from_header()`] and as part of `Item` of [`EntriesFromBytesIter`]
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum Error {
Expand All @@ -20,7 +20,7 @@ pub enum Error {
IncompletePack { actual: u64, expected: u64 },
}

/// An item of the iteration produced by [`Iter`]
/// An item of the iteration produced by [`EntriesFromBytesIter`]
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub struct Entry {
Expand Down Expand Up @@ -51,7 +51,7 @@ pub struct Entry {
/// An iterator over [`Entries`][Entry] in a byte stream.
///
/// The iterator used as part of [Bundle::write_stream_to_directory(…)][pack::Bundle::write_stream_to_directory()].
pub struct Iter<R> {
pub struct EntriesFromBytesIter<R> {
read: R,
decompressor: Option<Box<Decompress>>,
offset: u64,
Expand Down Expand Up @@ -116,7 +116,7 @@ impl CompressedBytesMode {
}
}

impl<R> Iter<R>
impl<R> EntriesFromBytesIter<R>
where
R: io::BufRead,
{
Expand All @@ -133,7 +133,11 @@ where
/// Obtain an iterator from a `read` stream to a pack data file and configure it using `mode` and `compressed`.
///
/// Note that `read` is expected at the beginning of a valid pack data file with a header, entries and a trailer.
pub fn new_from_header(mut read: R, mode: Mode, compressed: CompressedBytesMode) -> Result<Iter<R>, Error> {
pub fn new_from_header(
mut read: R,
mode: Mode,
compressed: CompressedBytesMode,
) -> Result<EntriesFromBytesIter<R>, Error> {
let mut header_data = [0u8; 12];
read.read_exact(&mut header_data)?;

Expand All @@ -143,7 +147,7 @@ where
pack::data::Version::V2,
"let's stop here if we see undocumented pack formats"
);
Ok(Iter {
Ok(EntriesFromBytesIter {
read,
decompressor: None,
compressed,
Expand Down Expand Up @@ -289,7 +293,7 @@ fn read_and_pass_to<R: io::Read, W: io::Write>(read: &mut R, to: W) -> PassThrou
PassThrough { read, write: to }
}

impl<R> Iterator for Iter<R>
impl<R> Iterator for EntriesFromBytesIter<R>
where
R: io::BufRead,
{
Expand All @@ -315,7 +319,7 @@ where
(self.objects_left as usize, Some(self.objects_left as usize))
}
}
impl<R> std::iter::ExactSizeIterator for Iter<R> where R: io::BufRead {}
impl<R> std::iter::ExactSizeIterator for EntriesFromBytesIter<R> where R: io::BufRead {}

struct PassThrough<R, W> {
read: R,
Expand Down Expand Up @@ -357,9 +361,9 @@ where
}

impl pack::data::File {
/// Returns an iterator over [`Entries`][pack::data::iter::Entry], without making use of the memory mapping.
pub fn streaming_iter(&self) -> Result<Iter<impl io::BufRead>, Error> {
/// Returns an iterator over [`Entries`][pack::data::input::Entry], without making use of the memory mapping.
pub fn streaming_iter(&self) -> Result<EntriesFromBytesIter<impl io::BufRead>, Error> {
let reader = io::BufReader::with_capacity(4096 * 8, fs::File::open(&self.path)?);
Iter::new_from_header(reader, Mode::Verify, CompressedBytesMode::KeepAndCrc32)
EntriesFromBytesIter::new_from_header(reader, Mode::Verify, CompressedBytesMode::KeepAndCrc32)
}
}
8 changes: 5 additions & 3 deletions git-odb/src/pack/data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ pub mod header;
pub mod entry;
pub use entry::Entry;

pub mod encode;
///
pub mod iter;
pub use iter::Iter;
pub mod input;
pub use input::EntriesFromBytesIter;

/// Utilities to encode pack data entries and write them to a `Write` implementation to resemble a pack data file.
pub mod output;

/// A slice into a pack file denoting a pack entry.
///
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
//! Utilities to encode pack data entries and write them to a `Write` implementation to resemble a pack data file.
use crate::pack::data;
use git_hash::ObjectId;

///
pub mod entries;
pub use entries::entries;
pub mod objects;
pub use objects::to_entry_iter;

///
pub mod write;
Expand Down Expand Up @@ -62,7 +61,7 @@ impl Entry {

///
pub mod entry {
use crate::{data, pack::data::encode};
use crate::{data, pack::data::output};
use git_hash::ObjectId;
use std::io::Write;

Expand All @@ -87,18 +86,18 @@ pub mod entry {
},
}

/// The error returned by [`encode::Entry::from_data()`].
/// The error returned by [`output::Entry::from_data()`].
#[allow(missing_docs)]
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("{0}")]
ZlibDeflate(#[from] std::io::Error),
}

impl encode::Entry {
impl output::Entry {
/// Create a new instance from the given `oid` and its corresponding git `obj`ect data.
pub fn from_data(oid: impl Into<ObjectId>, obj: &data::Object<'_>) -> Result<Self, Error> {
Ok(encode::Entry {
Ok(output::Entry {
id: oid.into(),
object_kind: obj.kind,
entry_kind: Kind::Base,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::{pack, pack::data::encode};
use crate::{pack, pack::data::output};
use git_features::{hash, parallel, progress::Progress};
use git_hash::{oid, ObjectId};

/// The error returned the pack generation functions in [this module][crate::pack::data::encode].
/// The error returned the pack generation functions in [this module][crate::pack::data::output].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error<LocateErr>
Expand All @@ -16,24 +16,24 @@ where
#[error("Entry expected to have hash {expected}, but it had {actual}")]
PackToPackCopyCrc32Mismatch { actual: u32, expected: u32 },
#[error(transparent)]
NewEntry(encode::entry::Error),
NewEntry(output::entry::Error),
}

/// The way input objects are handled
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub enum ObjectExpansion {
pub enum Expansion {
/// Don't do anything with the input objects except for transforming them into pack entries
AsIs,
}

impl Default for ObjectExpansion {
impl Default for Expansion {
fn default() -> Self {
ObjectExpansion::AsIs
Expansion::AsIs
}
}

/// Configuration options for the pack generation functions provied in [this module][crate::pack::data::encode].
/// Configuration options for the pack generation functions provied in [this module][crate::pack::data::output].
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub struct Options {
Expand All @@ -45,7 +45,7 @@ pub struct Options {
/// The pack data version to produce
pub version: crate::pack::data::Version,
/// The way input objects are handled
pub input_object_expansion: ObjectExpansion,
pub input_object_expansion: Expansion,
}

impl Default for Options {
Expand Down Expand Up @@ -88,7 +88,7 @@ impl Default for Options {
/// so with minimal overhead (especially compared to `gixp index-from-pack`)~~ Probably works now by chaining Iterators
/// or keeping enough state to write a pack and then generate an index with recorded data.
///
pub fn entries<Locate, Iter, Oid, Cache>(
pub fn to_entry_iter<Locate, Iter, Oid, Cache>(
db: Locate,
make_cache: impl Fn() -> Cache + Send + Clone + Sync + 'static,
objects: Iter,
Expand All @@ -99,9 +99,9 @@ pub fn entries<Locate, Iter, Oid, Cache>(
input_object_expansion,
chunk_size,
}: Options,
) -> impl Iterator<Item = Result<Vec<encode::Entry>, Error<Locate::Error>>>
) -> impl Iterator<Item = Result<Vec<output::Entry>, Error<Locate::Error>>>
+ parallel::reduce::Finalize<
Reduce = parallel::reduce::IdentityWithResult<Vec<encode::Entry>, Error<Locate::Error>>,
Reduce = parallel::reduce::IdentityWithResult<Vec<output::Entry>, Error<Locate::Error>>,
>
where
Locate: crate::Locate + Clone + Send + Sync + 'static,
Expand Down Expand Up @@ -136,7 +136,7 @@ where
)
},
move |oids: Vec<Oid>, (buf, cache)| {
use ObjectExpansion::*;
use Expansion::*;
let mut out = Vec::new();
match input_object_expansion {
AsIs => {
Expand All @@ -154,18 +154,18 @@ where
}
}
if pack_entry.header.is_base() {
encode::Entry {
output::Entry {
id: id.as_ref().into(),
object_kind: pack_entry.header.to_kind().expect("non-delta"),
entry_kind: encode::entry::Kind::Base,
entry_kind: output::entry::Kind::Base,
decompressed_size: obj.data.len(),
compressed_data: entry.data[pack_entry.data_offset as usize..].into(),
}
} else {
encode::Entry::from_data(id.as_ref(), &obj).map_err(Error::NewEntry)?
output::Entry::from_data(id.as_ref(), &obj).map_err(Error::NewEntry)?
}
}
_ => encode::Entry::from_data(id.as_ref(), &obj).map_err(Error::NewEntry)?,
_ => output::Entry::from_data(id.as_ref(), &obj).map_err(Error::NewEntry)?,
});
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{hash, pack, pack::data::encode};
use crate::{hash, pack, pack::data::output};
use std::io::Write;

/// The error returned by `next()` in the [`Entries`] iterator.
Expand All @@ -14,10 +14,10 @@ where
Input(E),
}

/// An implementation of [`Iterator`] to write [encoded entries][encode::Entry] to an inner implementation each time
/// An implementation of [`Iterator`] to write [encoded entries][output::Entry] to an inner implementation each time
/// `next()` is called.
pub struct Entries<I, W> {
/// An iterator for input [`encode::Entry`] instances
/// An iterator for input [`output::Entry`] instances
pub input: I,
/// A way of writing encoded bytes.
output: hash::Write<W>,
Expand All @@ -32,11 +32,11 @@ pub struct Entries<I, W> {

impl<I, W, E> Entries<I, W>
where
I: Iterator<Item = Result<Vec<encode::Entry>, E>>,
I: Iterator<Item = Result<Vec<output::Entry>, E>>,
W: std::io::Write,
E: std::error::Error + 'static,
{
/// Create a new instance reading [entries][encode::Entry] from an `input` iterator and write pack data bytes to
/// Create a new instance reading [entries][output::Entry] from an `input` iterator and write pack data bytes to
/// `output` writer, resembling a pack of `version` with exactly `num_entries` amount of objects contained in it.
/// `hash_kind` is the kind of hash to use for the pack checksum and maybe other places, depending on the version.
///
Expand Down Expand Up @@ -99,7 +99,7 @@ where

impl<I, W, E> Iterator for Entries<I, W>
where
I: Iterator<Item = Result<Vec<encode::Entry>, E>>,
I: Iterator<Item = Result<Vec<output::Entry>, E>>,
W: std::io::Write,
E: std::error::Error + 'static,
{
Expand Down
2 changes: 1 addition & 1 deletion git-odb/src/pack/index/write/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub enum Error {
#[error("An IO error occurred when reading the pack or creating a temporary file")]
Io(#[from] io::Error),
#[error("A pack entry could not be extracted")]
PackEntryDecode(#[from] pack::data::iter::Error),
PackEntryDecode(#[from] pack::data::input::Error),
#[error("Indices of type {} cannot be written, only {} are supported", *.0 as usize, pack::index::Version::default() as usize)]
Unsupported(pack::index::Version),
#[error("Ref delta objects are not supported as there is no way to look them up. Resolve them beforehand.")]
Expand Down
4 changes: 2 additions & 2 deletions git-odb/src/pack/index/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl pack::index::File {
pub fn write_data_iter_to_stream<F, F2>(
kind: pack::index::Version,
make_resolver: F,
entries: impl Iterator<Item = Result<pack::data::iter::Entry, pack::data::iter::Error>>,
entries: impl Iterator<Item = Result<pack::data::input::Entry, pack::data::input::Error>>,
thread_limit: Option<usize>,
mut root_progress: impl Progress,
out: impl io::Write,
Expand Down Expand Up @@ -87,7 +87,7 @@ impl pack::index::File {
let mut pack_entries_end: u64 = 0;

for (eid, entry) in entries.enumerate() {
let pack::data::iter::Entry {
let pack::data::input::Entry {
header,
pack_offset,
crc32,
Expand Down
2 changes: 1 addition & 1 deletion git-odb/tests/odb/pack/bundle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ mod write_to_directory {
progress::Discard,
bundle::write::Options {
thread_limit: None,
iteration_mode: pack::data::iter::Mode::Verify,
iteration_mode: pack::data::input::Mode::Verify,
index_kind: pack::index::Version::V2,
},
)
Expand Down

0 comments on commit ca98221

Please sign in to comment.