Skip to content

Commit

Permalink
Merge branch 'write-index-files' into write-index-v2
Browse files Browse the repository at this point in the history
  • Loading branch information
Sidney Douw committed Aug 3, 2022
2 parents 2f506c7 + a66403c commit cddc2ca
Show file tree
Hide file tree
Showing 3 changed files with 231 additions and 152 deletions.
21 changes: 21 additions & 0 deletions git-index/src/entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ impl Flags {
pub fn stage(&self) -> Stage {
(*self & Flags::STAGE_MASK).bits >> 12
}

pub fn to_storage(&self) -> at_rest::Flags {
at_rest::Flags::from_bits(self.bits() as u16).unwrap()
// TODO: extended flags / v3
}
}

#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)]
Expand Down Expand Up @@ -173,3 +178,19 @@ mod _impls {
}
}
}

#[cfg(test)]
mod tests {
use crate::entry::at_rest;

#[test]
fn in_mem_flags_to_storage_flags_v2() {
let flag_bytes = u16::from_be_bytes(*b"\x00\x01");
let flags_at_rest = at_rest::Flags::from_bits(flag_bytes).unwrap();
let in_memory_flags = flags_at_rest.to_memory();

let output = in_memory_flags.to_storage();

assert_eq!(output.bits(), flag_bytes);
}
}
281 changes: 155 additions & 126 deletions git-index/src/write.rs
Original file line number Diff line number Diff line change
@@ -1,162 +1,191 @@
use std::{
collections::{hash_map, HashMap},
ops::Range,
};

use bstr::ByteVec;

use crate::{extension, State, Version};
use std::io::Write;

impl State {
pub fn write_to(&self, options: Options) -> Vec<u8> {
let mut writer = Writer::new(self, options);
writer.generate();
writer.data
}
}

#[derive(Default)]
pub struct Options {
hash_kind: git_hash::Kind,
}

struct Writer<'a> {
state: &'a State,
options: Options,
data: Vec<u8>,
index_table: HashMap<&'static str, Range<usize>>,
/// The hash kind to use when writing the index file.
///
/// It is not always possible to infer the hash kind when reading an index, so this is required.
pub hash_kind: git_hash::Kind,
pub version: Version,
pub tree_cache: bool,
pub end_of_index_entry: bool,
}

impl<'a> Writer<'a> {
pub fn new(state: &'a State, options: Options) -> Self {
impl Default for Options {
fn default() -> Self {
Self {
state,
options,
data: Vec::default(),
index_table: Default::default(),
hash_kind: git_hash::Kind::default(),
version: Version::V2,
tree_cache: true,
end_of_index_entry: true,
}
}
}

pub fn generate(&mut self) {
self.header();
self.entries();
impl State {
pub fn write_to(&self, out: &mut impl std::io::Write, options: Options) -> std::io::Result<()> {
let mut write_counter = WriteCounter::new(out);
let num_entries = self.entries().len() as u32;
let header_offset = header(&mut write_counter, options.version, num_entries)?;
let entries_offset = entries(&mut write_counter, self, header_offset)?;
let tree_offset = if options.tree_cache {
tree(&mut write_counter, self.tree())?
} else {
entries_offset
};

// TODO: Tree extension is always included, I think
if let Some(t) = self.state.tree() {
self.tree(t)
if num_entries > 0 && options.end_of_index_entry {
end_of_index_entry(write_counter.inner, options.hash_kind, entries_offset, tree_offset)?;
}

self.end_of_index();
Ok(())
}
}

fn header<T: std::io::Write>(
out: &mut WriteCounter<'_, T>,
version: Version,
num_entries: u32,
) -> Result<u32, std::io::Error> {
let signature = b"DIRC";

let version = match version {
Version::V2 => 2_u32.to_be_bytes(),
Version::V3 => 3_u32.to_be_bytes(),
Version::V4 => 4_u32.to_be_bytes(),
};

out.write_all(signature)?;
out.write_all(&version)?;
out.write_all(&num_entries.to_be_bytes())?;

fn push(&mut self, data: &[u8], key: &'static str) {
let start = self.data.len();
let end = start + data.len();
Ok(out.count)
}

match self.index_table.entry(key) {
hash_map::Entry::Occupied(mut e) => e.get_mut().end = end,
hash_map::Entry::Vacant(e) => {
e.insert(start..end);
fn entries<T: std::io::Write>(
out: &mut WriteCounter<'_, T>,
state: &State,
header_size: u32,
) -> Result<u32, std::io::Error> {
for entry in state.entries() {
out.write_all(&entry.stat.ctime.secs.to_be_bytes())?;
out.write_all(&entry.stat.ctime.nsecs.to_be_bytes())?;
out.write_all(&entry.stat.mtime.secs.to_be_bytes())?;
out.write_all(&entry.stat.mtime.nsecs.to_be_bytes())?;
out.write_all(&entry.stat.dev.to_be_bytes())?;
out.write_all(&entry.stat.ino.to_be_bytes())?;
out.write_all(&entry.mode.bits().to_be_bytes())?;
out.write_all(&entry.stat.uid.to_be_bytes())?;
out.write_all(&entry.stat.gid.to_be_bytes())?;
out.write_all(&entry.stat.size.to_be_bytes())?;
out.write_all(entry.id.as_bytes())?;
let path = entry.path(state);
out.write_all(&(entry.flags.to_storage().bits() | path.len() as u16).to_be_bytes())?;
out.write_all(path)?;
out.write_all(b"\0")?;

match (out.count - header_size) % 8 {
0 => {}
n => {
let byte_offset = 8 - n;
for _ in 0..byte_offset {
out.write_all(b"\0")?;
}
}
};

self.data.push_str(data);
}

fn header(&mut self) {
let signature = b"DIRC";
let version = match self.state.version() {
Version::V2 => 2_u32.to_be_bytes(),
Version::V3 => 3_u32.to_be_bytes(),
Version::V4 => 4_u32.to_be_bytes(),
};
let num_entries = self.state.entries().len() as u32;
Ok(out.count)
}

self.push(signature, "header");
self.push(&version, "header");
self.push(&(num_entries).to_be_bytes(), "header");
}
fn tree<T: std::io::Write>(
out: &mut WriteCounter<'_, T>,
tree: Option<&extension::Tree>,
) -> Result<u32, std::io::Error> {
if let Some(tree) = tree {
let signature = b"TREE";

fn entries(&mut self) {
for e in self.state.entries() {
self.push(&e.stat.ctime.secs.to_be_bytes(), "entries");
self.push(&e.stat.ctime.nsecs.to_be_bytes(), "entries");
self.push(&e.stat.mtime.secs.to_be_bytes(), "entries");
self.push(&e.stat.mtime.nsecs.to_be_bytes(), "entries");
self.push(&e.stat.dev.to_be_bytes(), "entries");
self.push(&e.stat.ino.to_be_bytes(), "entries");
self.push(&e.mode.bits().to_be_bytes(), "entries");
self.push(&e.stat.uid.to_be_bytes(), "entries");
self.push(&e.stat.gid.to_be_bytes(), "entries");
self.push(&e.stat.size.to_be_bytes(), "entries");
self.push(e.id.as_bytes(), "entries");
//FIXME: correct flag values
// probably convert 'in-memory' Flags to at_rest::Flags
// self.push(&e.flags.bits().to_be_bytes(), "entries");
self.push(b"\x00\x01\x61\x00", "entries");

println!("{:?}", e.flags.bits());
}
// TODO: Can this work without allocating?
let mut entries: Vec<u8> = Vec::new();
tree_entry(&mut entries, tree)?;

out.write_all(signature)?;
out.write_all(&(entries.len() as u32).to_be_bytes())?;
out.write_all(&entries)?;
}

fn tree(&mut self, tree: &extension::Tree) {
let signature = b"TREE";
let mut size: u32 = 0;
Ok(out.count)
}

self.push(signature, "tree");
self.push(&size.to_be_bytes(), "tree");
fn tree_entry(out: &mut impl std::io::Write, tree: &extension::Tree) -> Result<(), std::io::Error> {
let num_entries_ascii = tree.num_entries.to_string();
let num_children_ascii = tree.children.len().to_string();

self.tree_entry(tree);
out.write_all(tree.name.as_slice())?;
out.write_all(b"\0")?;
out.write_all(num_entries_ascii.as_bytes())?;
out.write_all(b" ")?;
out.write_all(num_children_ascii.as_bytes())?;
out.write_all(b"\n")?;
out.write_all(tree.id.as_bytes())?;

if let Some(range) = self.index_table.get("tree") {
size = (range.end - (range.start + 8)) as u32;
self.data[range.start + 4..range.start + 8].copy_from_slice(&size.to_be_bytes());
}
for child in &tree.children {
tree_entry(out, child)?;
}

fn tree_entry(&mut self, tree: &extension::Tree) {
let path = [tree.name.as_slice(), b"\0"].concat();
Ok(())
}

let num_entries_ascii = tree.num_entries.to_string();
let num_children_ascii = tree.children.len().to_string();
fn end_of_index_entry(
out: &mut impl std::io::Write,
hash_kind: git_hash::Kind,
entries_offset: u32,
tree_offset: u32,
) -> Result<(), std::io::Error> {
let signature = b"EOIE";
let extension_size = 4 + hash_kind.len_in_bytes() as u32;

let mut hasher = git_features::hash::hasher(hash_kind);
let tree_size = tree_offset - 8 - entries_offset;
if tree_size > 0 {
hasher.update(b"TREE");
hasher.update(&tree_size.to_be_bytes());
}
let hash = hasher.digest();

self.push(path.as_slice(), "tree");
self.push(num_entries_ascii.as_bytes(), "tree");
self.push(b" ", "tree");
self.push(num_children_ascii.as_bytes(), "tree");
self.push(b"\n", "tree");
self.push(tree.id.as_bytes(), "tree");
out.write_all(signature)?;
out.write_all(&extension_size.to_be_bytes())?;
out.write_all(&entries_offset.to_be_bytes())?;
out.write_all(&hash)?;

for child in &tree.children {
self.tree_entry(child);
}
}
Ok(())
}

fn end_of_index(&mut self) {
match self.index_table.get("entries") {
Some(range) => {
let signature = b"EOIE";
let extension_size = 4 + self.options.hash_kind.len_in_bytes() as u32;
let offset: u32 = range.end as u32;

let mut hasher = git_features::hash::hasher(self.options.hash_kind);

match self.index_table.get("tree") {
Some(range) => {
hasher.update(b"TREE");
hasher.update(&self.data[range.start + 4..range.start + 8]);
}
None => {}
}
struct WriteCounter<'a, T> {
count: u32,
inner: &'a mut T,
}

impl<'a, T> WriteCounter<'a, T>
where
T: std::io::Write,
{
pub fn new(inner: &'a mut T) -> Self {
WriteCounter { inner, count: 0 }
}
}

let hash = hasher.digest();
impl<'a, T> std::io::Write for WriteCounter<'a, T>
where
T: std::io::Write,
{
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let written = self.inner.write(buf)?;
self.count += written as u32;
Ok(written)
}

self.data.push_str(signature);
self.data.push_str(extension_size.to_be_bytes());
self.data.push_str(offset.to_be_bytes());
self.data.push_str(hash);
}
None => {}
}
fn flush(&mut self) -> std::io::Result<()> {
self.inner.flush()
}
}

0 comments on commit cddc2ca

Please sign in to comment.