Skip to content

Commit

Permalink
traverse(refactor)!: Avoid duplicate module paths in 'tree' and 'comm…
Browse files Browse the repository at this point in the history
…it' (#164)
  • Loading branch information
Byron committed Sep 13, 2021
1 parent 066f59b commit 2f2d856
Show file tree
Hide file tree
Showing 8 changed files with 147 additions and 145 deletions.
4 changes: 2 additions & 2 deletions experiments/traversal/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ where
seen: HashSet<ObjectId>,
}

impl tree::visit::Visit for Count {
impl tree::Visit for Count {
fn pop_front_tracked_path_and_set_current(&mut self) {}
fn push_back_tracked_path_component(&mut self, _component: &BStr) {}
fn push_path_component(&mut self, _component: &BStr) {}
Expand Down Expand Up @@ -229,7 +229,7 @@ where
seen: &'a DashSet<ObjectId>,
}

impl<'a> tree::visit::Visit for Count<'a> {
impl<'a> tree::Visit for Count<'a> {
fn pop_front_tracked_path_and_set_current(&mut self) {}
fn push_back_tracked_path_component(&mut self, _component: &BStr) {}
fn push_path_component(&mut self, _component: &BStr) {}
Expand Down
2 changes: 1 addition & 1 deletion git-pack/src/data/output/count/objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ mod tree {
pub mod traverse {
use git_hash::ObjectId;
use git_object::{bstr::BStr, tree::EntryRef};
use git_traverse::tree::visit::{Action, Visit};
use git_traverse::tree::{visit::Action, Visit};

use crate::data::output::count::objects::util::InsertImmutable;

Expand Down
8 changes: 4 additions & 4 deletions git-repository/src/ext/object_id.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use git_hash::ObjectId;
use git_traverse::commit::ancestors::{Ancestors, State};
use git_traverse::commit::{ancestors, Ancestors};

use crate::easy;

Expand All @@ -8,7 +8,7 @@ pub trait Sealed {}
/// An extension trait to add functionality to [`ObjectId`]s.
pub trait ObjectIdExt: Sealed {
/// Create an iterator over the ancestry of the commits reachable from this id, which must be a commit.
fn ancestors<Find>(self, find: Find) -> Ancestors<Find, fn(&git_hash::oid) -> bool, State>
fn ancestors<Find>(self, find: Find) -> Ancestors<Find, fn(&git_hash::oid) -> bool, ancestors::State>
where
Find: for<'a> FnMut(&git_hash::oid, &'a mut Vec<u8>) -> Option<git_object::CommitRefIter<'a>>;

Expand All @@ -19,11 +19,11 @@ pub trait ObjectIdExt: Sealed {
impl Sealed for ObjectId {}

impl ObjectIdExt for ObjectId {
fn ancestors<Find>(self, find: Find) -> Ancestors<Find, fn(&git_hash::oid) -> bool, State>
fn ancestors<Find>(self, find: Find) -> Ancestors<Find, fn(&git_hash::oid) -> bool, ancestors::State>
where
Find: for<'a> FnMut(&git_hash::oid, &'a mut Vec<u8>) -> Option<git_object::CommitRefIter<'a>>,
{
Ancestors::new(Some(self), State::default(), find)
Ancestors::new(Some(self), ancestors::State::default(), find)
}

fn attach<A: easy::Access + Sized>(self, access: &A) -> easy::Oid<'_, A> {
Expand Down
19 changes: 9 additions & 10 deletions git-traverse/src/commit.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
/// An iterator over the ancestors one or more starting commits
pub struct Ancestors<Find, Predicate, StateMut> {
find: Find,
predicate: Predicate,
state: StateMut,
}

///
pub mod ancestors {
use std::{
Expand All @@ -9,6 +16,8 @@ pub mod ancestors {
use git_object::CommitRefIter;
use quick_error::quick_error;

use crate::commit::Ancestors;

quick_error! {
/// The error is part of the item returned by the [Ancestors] iterator.
#[derive(Debug)]
Expand Down Expand Up @@ -41,13 +50,6 @@ pub mod ancestors {
}
}

/// An iterator over the ancestors one or more starting commits
pub struct Ancestors<Find, Predicate, StateMut> {
find: Find,
predicate: Predicate,
state: StateMut,
}

impl<Find, StateMut> Ancestors<Find, fn(&oid) -> bool, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Option<CommitRefIter<'a>>,
Expand Down Expand Up @@ -149,6 +151,3 @@ pub mod ancestors {
}
}
}
// TODO: put this here as a breaking change.
#[doc(inline)]
pub use ancestors::Ancestors;
133 changes: 70 additions & 63 deletions git-traverse/src/tree/breadthfirst.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
use std::{borrow::BorrowMut, collections::VecDeque};
use std::collections::VecDeque;

use git_hash::{oid, ObjectId};
use git_object::{tree, TreeRefIter};
use git_hash::ObjectId;
use quick_error::quick_error;

use crate::tree::visit::Visit;

quick_error! {
/// The error is part of the item returned by the [`traverse()`] function.
#[derive(Debug)]
Expand Down Expand Up @@ -39,72 +36,82 @@ impl State {
}
}

/// Start a breadth-first iteration over the `root` trees entries.
///
/// * `root`
/// * the tree to iterate in a nested fashion.
/// * `state` - all state used for the iteration. If multiple iterations are performed, allocations can be minimized by reusing
/// this state.
/// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning
/// an iterator over entries if the object is present and is a tree. Caching should be implemented within this function
/// as needed. The return value is `Option<TreeIter>` which degenerates all error information. Not finding a commit should also
/// be considered an errors as all objects in the tree DAG should be present in the database. Hence [`Error::NotFound`] should
/// be escalated into a more specific error if its encountered by the caller.
/// * `delegate` - A way to observe entries and control the iteration while allowing the optimizer to let you pay only for what you use.
pub fn traverse<StateMut, Find, V>(
root: TreeRefIter<'_>,
mut state: StateMut,
mut find: Find,
delegate: &mut V,
) -> Result<(), Error>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Option<TreeRefIter<'a>>,
StateMut: BorrowMut<State>,
V: Visit,
{
let state = state.borrow_mut();
state.clear();
let mut tree = root;
loop {
for entry in tree {
let entry = entry?;
match entry.mode {
tree::EntryMode::Tree => {
use super::visit::Action::*;
delegate.push_path_component(entry.filename);
let action = delegate.visit_tree(&entry);
match action {
Skip => {}
Continue => {
delegate.pop_path_component();
delegate.push_back_tracked_path_component(entry.filename);
state.next.push_back((true, entry.oid.to_owned()))
pub(crate) mod impl_ {
use std::borrow::BorrowMut;

use git_hash::oid;
use git_object::{tree::EntryMode, TreeRefIter};

use super::{Error, State};
use crate::tree::Visit;

/// Start a breadth-first iteration over the `root` trees entries.
///
/// * `root`
/// * the tree to iterate in a nested fashion.
/// * `state` - all state used for the iteration. If multiple iterations are performed, allocations can be minimized by reusing
/// this state.
/// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning
/// an iterator over entries if the object is present and is a tree. Caching should be implemented within this function
/// as needed. The return value is `Option<TreeIter>` which degenerates all error information. Not finding a commit should also
/// be considered an errors as all objects in the tree DAG should be present in the database. Hence [`Error::NotFound`] should
/// be escalated into a more specific error if its encountered by the caller.
/// * `delegate` - A way to observe entries and control the iteration while allowing the optimizer to let you pay only for what you use.
pub fn traverse<StateMut, Find, V>(
root: TreeRefIter<'_>,
mut state: StateMut,
mut find: Find,
delegate: &mut V,
) -> Result<(), Error>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Option<TreeRefIter<'a>>,
StateMut: BorrowMut<State>,
V: Visit,
{
let state = state.borrow_mut();
state.clear();
let mut tree = root;
loop {
for entry in tree {
let entry = entry?;
match entry.mode {
EntryMode::Tree => {
use crate::tree::visit::Action::*;
delegate.push_path_component(entry.filename);
let action = delegate.visit_tree(&entry);
match action {
Skip => {}
Continue => {
delegate.pop_path_component();
delegate.push_back_tracked_path_component(entry.filename);
state.next.push_back((true, entry.oid.to_owned()))
}
Cancel => {
return Err(Error::Cancelled);
}
}
Cancel => {
}
_non_tree => {
delegate.push_path_component(entry.filename);
if delegate.visit_nontree(&entry).cancelled() {
return Err(Error::Cancelled);
}
}
}
_non_tree => {
delegate.push_path_component(entry.filename);
if delegate.visit_nontree(&entry).cancelled() {
return Err(Error::Cancelled);
}
}
delegate.pop_path_component();
}
delegate.pop_path_component();
}
match state.next.pop_front() {
Some((should_pop_path, oid)) => {
if should_pop_path {
delegate.pop_front_tracked_path_and_set_current();
}
match find(&oid, &mut state.buf) {
Some(tree_iter) => tree = tree_iter,
None => return Err(Error::NotFound { oid: oid.to_owned() }),
match state.next.pop_front() {
Some((should_pop_path, oid)) => {
if should_pop_path {
delegate.pop_front_tracked_path_and_set_current();
}
match find(&oid, &mut state.buf) {
Some(tree_iter) => tree = tree_iter,
None => return Err(Error::NotFound { oid: oid.to_owned() }),
}
}
None => break Ok(()),
}
None => break Ok(()),
}
}
}
65 changes: 58 additions & 7 deletions git-traverse/src/tree/mod.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,65 @@
use std::collections::VecDeque;

use git_object::bstr::{BStr, BString};

/// A trait to allow responding to a traversal designed to observe all entries in a tree, recursively while keeping track of
/// paths if desired.
pub trait Visit {
/// Sets the full path path in front of the queue so future calls to push and pop components affect it instead.
fn pop_front_tracked_path_and_set_current(&mut self);
/// Append a `component` to the end of a path, which may be empty.
fn push_back_tracked_path_component(&mut self, component: &BStr);
/// Append a `component` to the end of a path, which may be empty.
fn push_path_component(&mut self, component: &BStr);
/// Removes the last component from the path, which may leave it empty.
fn pop_path_component(&mut self);

/// Observe a tree entry that is a tree and return an instruction whether to continue or not.
/// [`Action::Skip`][visit::Action::Skip] can be used to prevent traversing it, for example if it's known to the caller already.
///
/// The implementation may use the current path to learn where in the tree the change is located.
fn visit_tree(&mut self, entry: &git_object::tree::EntryRef<'_>) -> visit::Action;

/// Observe a tree entry that is NO tree and return an instruction whether to continue or not.
/// [`Action::Skip`][visit::Action::Skip] has no effect here.
///
/// The implementation may use the current path to learn where in the tree the change is located.
fn visit_nontree(&mut self, entry: &git_object::tree::EntryRef<'_>) -> visit::Action;
}

/// A [Visit][Visit] implementation to record every observed change and keep track of the changed paths.
#[derive(Clone, Debug, Default)]
pub struct Recorder {
path_deque: VecDeque<BString>,
path: BString,
/// The observed entries.
pub records: Vec<recorder::Entry>,
}

///
pub mod visit;
#[doc(inline)]
pub use visit::Visit;
pub mod visit {
/// What to do after an entry was [recorded][super::Visit::visit_tree()].
#[derive(Clone, Copy, PartialOrd, PartialEq, Ord, Eq, Hash)]
pub enum Action {
/// Continue the traversal of entries.
Continue,
/// Stop the traversal of entries, making this te last call to [`visit_(tree|nontree)(…)`][super::Visit::visit_nontree()].
Cancel,
/// Don't dive into the entry, skipping children effectively. Only useful in [`visit_tree(…)`][super::Visit::visit_tree()].
Skip,
}

impl Action {
/// Returns true if this action means to stop the traversal.
pub fn cancelled(&self) -> bool {
matches!(self, Action::Cancel)
}
}
}

///
pub mod recorder;
#[doc(inline)]
pub use recorder::Recorder;

///
pub mod breadthfirst;
#[doc(inline)]
pub use breadthfirst::traverse as breadthfirst;
pub use breadthfirst::impl_::traverse as breadthfirst;
17 changes: 3 additions & 14 deletions git-traverse/src/tree/recorder.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
use std::collections::VecDeque;

use git_hash::ObjectId;
use git_object::{
bstr::{BStr, BString, ByteSlice, ByteVec},
tree,
};

use crate::tree::{visit, visit::Action};
use crate::tree::{visit::Action, Recorder, Visit};

/// An owned entry as observed by a call to [`visit_(tree|nontree)(…)`][visit::Visit::visit_tree()], enhanced with the full path to it.
/// An owned entry as observed by a call to [`visit_(tree|nontree)(…)`][Visit::visit_tree()], enhanced with the full path to it.
/// Otherwise similar to [`git_object::tree::EntryRef`].
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Entry {
Expand All @@ -32,15 +30,6 @@ impl Entry {
}
}

/// A [Visit][visit::Visit] implementation to record every observed change and keep track of the changed paths.
#[derive(Clone, Debug, Default)]
pub struct Recorder {
path_deque: VecDeque<BString>,
path: BString,
/// The observed entries.
pub records: Vec<Entry>,
}

impl Recorder {
fn pop_element(&mut self) {
if let Some(pos) = self.path.rfind_byte(b'/') {
Expand All @@ -62,7 +51,7 @@ impl Recorder {
}
}

impl visit::Visit for Recorder {
impl Visit for Recorder {
fn pop_front_tracked_path_and_set_current(&mut self) {
self.path = self
.path_deque
Expand Down

0 comments on commit 2f2d856

Please sign in to comment.