Skip to content

Commit

Permalink
frame for traversing tree entries (#301)
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Mar 6, 2022
1 parent d16821a commit 0e55fbb
Show file tree
Hide file tree
Showing 7 changed files with 311 additions and 89 deletions.
13 changes: 4 additions & 9 deletions git-repository/examples/stats.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
#![allow(unused)]

use git_odb::FindExt;
use git_repository as git;
use git_repository::Reference;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let repo = git::discover(".")?;
let repo = git::discover(".")?.apply_environment();
println!(
"Repo: {}",
repo.work_tree().as_deref().unwrap_or(repo.git_dir()).display()
Expand Down Expand Up @@ -63,8 +60,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}

mod visit {
use std::process::id;

use git_hash::oid;
use git_object::{bstr::BStr, tree::EntryRef};
use git_repository as git;
Expand Down Expand Up @@ -102,13 +97,13 @@ mod visit {
impl git_traverse::tree::Visit for Tree {
fn pop_front_tracked_path_and_set_current(&mut self) {}

fn push_back_tracked_path_component(&mut self, component: &BStr) {}
fn push_back_tracked_path_component(&mut self, _component: &BStr) {}

fn push_path_component(&mut self, component: &BStr) {}
fn push_path_component(&mut self, _component: &BStr) {}

fn pop_path_component(&mut self) {}

fn visit_tree(&mut self, entry: &EntryRef<'_>) -> Action {
fn visit_tree(&mut self, _entry: &EntryRef<'_>) -> Action {
self.num_trees += 1;
Action::Continue
}
Expand Down
41 changes: 41 additions & 0 deletions git-repository/src/object/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,44 @@ impl<'a, 'repo> Traversal<'a, 'repo> {
)
}
}

pub use iter::EntryRef;

///
mod iter {
use super::Tree;
use crate::Repository;

/// An entry within a tree
pub struct EntryRef<'repo, 'a> {
/// The actual entry ref we are wrapping.
pub inner: git_object::tree::EntryRef<'a>,

repo: &'repo Repository,
}

impl<'repo, 'a> EntryRef<'repo, 'a> {
/// The kind of object to which [`id()`][Self::id()] is pointing.
pub fn mode(&self) -> git_object::tree::EntryMode {
self.inner.mode
}

/// The name of the file in the parent tree.
pub fn filename(&self) -> &git_object::bstr::BStr {
self.inner.filename
}

/// Return the entries id, connected to the underlying repository.
pub fn id(&self) -> crate::Id<'repo> {
crate::Id::from_id(self.inner.oid, self.repo)
}
}

impl<'repo> Tree<'repo> {
/// Return an iterator over tree entries.
pub fn iter(&self) -> impl Iterator<Item = Result<EntryRef<'repo, '_>, git_object::decode::Error>> {
let repo = self.repo;
git_object::TreeRefIter::from_bytes(&self.data).map(move |e| e.map(|entry| EntryRef { inner: entry, repo }))
}
}
}
76 changes: 2 additions & 74 deletions gitoxide-core/src/repository.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,78 +7,6 @@ pub fn init(directory: Option<PathBuf>) -> Result<git_repository::Path> {
.with_context(|| "Repository initialization failed")
}

pub mod verify {
use std::{path::PathBuf, sync::atomic::AtomicBool};
pub mod tree;

use git_repository as git;
use git_repository::Progress;

use crate::{pack, OutputFormat};

/// A general purpose context for many operations provided here
pub struct Context {
/// If set, provide statistics to `out` in the given format
pub output_statistics: Option<OutputFormat>,
/// If set, don't use more than this amount of threads.
/// Otherwise, usually use as many threads as there are logical cores.
/// A value of 0 is interpreted as no-limit
pub thread_limit: Option<usize>,
pub verify_mode: pack::verify::Mode,
pub algorithm: pack::verify::Algorithm,
}

pub const PROGRESS_RANGE: std::ops::RangeInclusive<u8> = 1..=3;

pub fn integrity(
repo: PathBuf,
mut out: impl std::io::Write,
progress: impl Progress,
should_interrupt: &AtomicBool,
Context {
output_statistics,
thread_limit,
verify_mode,
algorithm,
}: Context,
) -> anyhow::Result<()> {
let repo = git_repository::open(repo)?;
#[cfg_attr(not(feature = "serde1"), allow(unused))]
let mut outcome = repo.objects.store_ref().verify_integrity(
progress,
should_interrupt,
git_repository::odb::pack::index::verify::integrity::Options {
verify_mode,
traversal: algorithm.into(),
thread_limit,
// TODO: a way to get the pack cache from a handle
make_pack_lookup_cache: || git_repository::odb::pack::cache::Never,
},
)?;
// TODO: make this work for indices in multiple workspaces, once we have workspace support
if let Some(index) = repo.load_index().transpose()? {
index.verify_integrity()?;
index.verify_entries()?;
index.verify_extensions(true, {
use git::odb::FindExt;
let objects = repo.objects;
move |oid, buf: &mut Vec<u8>| objects.find_tree_iter(oid, buf).ok()
})?;
outcome.progress.info(format!("Index at '{}' OK", index.path.display()));
}
match output_statistics {
Some(OutputFormat::Human) => writeln!(out, "Human output is currently unsupported, use JSON instead")?,
#[cfg(feature = "serde1")]
Some(OutputFormat::Json) => {
serde_json::to_writer_pretty(
out,
&serde_json::json!({
"index_statistics" : outcome.index_statistics,
"loose_object-stores" : outcome.loose_object_stores
}),
)?;
}
None => {}
}
Ok(())
}
}
pub mod verify;
130 changes: 130 additions & 0 deletions gitoxide-core/src/repository/tree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
use anyhow::bail;
use std::io;
use std::path::PathBuf;

use crate::OutputFormat;
use git_repository as git;
use git_repository::prelude::ObjectIdExt;

mod entries {
use git_repository as git;

use git::hash::oid;
use git::objs::{bstr::BStr, tree::EntryRef};
use git::traverse::tree::visit::Action;

pub struct Traverse {
pub num_trees: usize,
pub num_links: usize,
pub num_blobs: usize,
pub num_blobs_exec: usize,
pub num_submodules: usize,
pub num_bytes: u64,
pub repo: git::Repository,
}

impl Traverse {
pub fn new(repo: git::Repository) -> Self {
Traverse {
num_trees: 0,
num_links: 0,
num_blobs: 0,
num_blobs_exec: 0,
num_submodules: 0,
num_bytes: 0,
repo,
}
}

pub(crate) fn count_bytes(&mut self, oid: &oid) {
if let Ok(obj) = self.repo.find_object(oid) {
self.num_bytes += obj.data.len() as u64;
}
}
}

impl git::traverse::tree::Visit for Traverse {
fn pop_front_tracked_path_and_set_current(&mut self) {}

fn push_back_tracked_path_component(&mut self, _component: &BStr) {}

fn push_path_component(&mut self, _component: &BStr) {}

fn pop_path_component(&mut self) {}

fn visit_tree(&mut self, _entry: &EntryRef<'_>) -> Action {
self.num_trees += 1;
Action::Continue
}

fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> Action {
use git::objs::tree::EntryMode::*;
match entry.mode {
Commit => self.num_submodules += 1,
Blob => {
self.count_bytes(entry.oid);
self.num_blobs += 1
}
BlobExecutable => {
self.count_bytes(entry.oid);
self.num_blobs_exec += 1
}
Link => self.num_links += 1,
Tree => unreachable!("BUG"),
}
Action::Continue
}
}
}

pub fn entries(
repository: PathBuf,
treeish: Option<&str>,
recursive: bool,
extended: bool,
format: OutputFormat,
out: &mut dyn io::Write,
_err: &mut dyn io::Write,
) -> anyhow::Result<()> {
if format == OutputFormat::Json {
bail!("Only human output format is supported at the moment");
}

let tree_repo = git::open(repository)?;
let mut repo = tree_repo.clone().apply_environment();
repo.object_cache_size(128 * 1024);

let tree = match treeish {
Some(hex) => git::hash::ObjectId::from_hex(hex.as_bytes())
.map(|id| id.attach(&repo))?
.object()?
.try_into_tree()?,
None => repo.head()?.peel_to_commit_in_place()?.tree()?,
};

if recursive {
} else {
for entry in tree.iter() {
let entry = entry?;
format_entry(
&mut *out,
&entry.inner,
extended
.then(|| entry.id().object().map(|o| o.data.len()))
.transpose()?,
)?;
}
}

let mut delegate = entries::Traverse::new(tree_repo);
tree.traverse().breadthfirst(&mut delegate)?;
Ok(())
}

fn format_entry(
mut _out: impl io::Write,
_entry: &git::objs::tree::EntryRef<'_>,
_size: Option<usize>,
) -> std::io::Result<()> {
todo!()
}
73 changes: 73 additions & 0 deletions gitoxide-core/src/repository/verify.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use std::{path::PathBuf, sync::atomic::AtomicBool};

use git_repository as git;
use git_repository::Progress;

use crate::{pack, OutputFormat};

/// A general purpose context for many operations provided here
pub struct Context {
/// If set, provide statistics to `out` in the given format
pub output_statistics: Option<OutputFormat>,
/// If set, don't use more than this amount of threads.
/// Otherwise, usually use as many threads as there are logical cores.
/// A value of 0 is interpreted as no-limit
pub thread_limit: Option<usize>,
pub verify_mode: pack::verify::Mode,
pub algorithm: pack::verify::Algorithm,
}

pub const PROGRESS_RANGE: std::ops::RangeInclusive<u8> = 1..=3;

pub fn integrity(
repo: PathBuf,
mut out: impl std::io::Write,
progress: impl Progress,
should_interrupt: &AtomicBool,
Context {
output_statistics,
thread_limit,
verify_mode,
algorithm,
}: Context,
) -> anyhow::Result<()> {
let repo = git_repository::open(repo)?;
#[cfg_attr(not(feature = "serde1"), allow(unused))]
let mut outcome = repo.objects.store_ref().verify_integrity(
progress,
should_interrupt,
git_repository::odb::pack::index::verify::integrity::Options {
verify_mode,
traversal: algorithm.into(),
thread_limit,
// TODO: a way to get the pack cache from a handle
make_pack_lookup_cache: || git_repository::odb::pack::cache::Never,
},
)?;
// TODO: make this work for indices in multiple workspaces, once we have workspace support
if let Some(index) = repo.load_index().transpose()? {
index.verify_integrity()?;
index.verify_entries()?;
index.verify_extensions(true, {
use git::odb::FindExt;
let objects = repo.objects;
move |oid, buf: &mut Vec<u8>| objects.find_tree_iter(oid, buf).ok()
})?;
outcome.progress.info(format!("Index at '{}' OK", index.path.display()));
}
match output_statistics {
Some(OutputFormat::Human) => writeln!(out, "Human output is currently unsupported, use JSON instead")?,
#[cfg(feature = "serde1")]
Some(OutputFormat::Json) => {
serde_json::to_writer_pretty(
out,
&serde_json::json!({
"index_statistics" : outcome.index_statistics,
"loose_object-stores" : outcome.loose_object_stores
}),
)?;
}
None => {}
}
Ok(())
}

0 comments on commit 0e55fbb

Please sign in to comment.