diff --git a/src/disktree/tree.rs b/src/disktree/tree.rs index 6b521bb..9f5e3a1 100755 --- a/src/disktree/tree.rs +++ b/src/disktree/tree.rs @@ -8,7 +8,7 @@ use byteorder::ReadBytesExt; use memmap::MmapOptions; use std::{ fs::File, - io::{Cursor, Read, Seek, SeekFrom}, + io::{Cursor, Read, Seek, SeekFrom, Write}, marker::Send, ops::Range, path::Path, @@ -60,6 +60,25 @@ impl DiskTreeMap { } } + /// Merge several `DiskTreeMap`s. + /// + /// Each map must contain exact either: + /// - one res0 node + /// - any number nodes which are all children of exactly one res0 node. + pub fn merge(wtr: W, subtrees: &[DiskTreeMap], f: Option) -> Result + where + W: Write + Seek, + F: FnMut(&mut dyn Write, &&[u8]) -> std::result::Result<(), E>, + E: std::error::Error + Sync + Send + 'static, + { + if let Some(f) = f { + crate::disktree::writer::DiskTreeWriter::new(wtr).merge(subtrees, f) + } else { + let f = |wtr: &mut dyn Write, val: &&[u8]| wtr.write_all(val); + crate::disktree::writer::DiskTreeWriter::new(wtr).merge(subtrees, f) + } + } + /// Returns `(Cell, &[u8])`, if present. pub fn get(&self, cell: Cell) -> Result> { let base_cell_pos = Self::base_cell_dptr(cell); diff --git a/src/disktree/writer.rs b/src/disktree/writer.rs index 11cfd9e..62adcc0 100644 --- a/src/disktree/writer.rs +++ b/src/disktree/writer.rs @@ -1,12 +1,12 @@ use crate::{ compaction::Compactor, - disktree::{dptr::Dp, dtseek::DtSeek, tree::HDR_MAGIC, varint}, + disktree::{dptr::Dp, dtseek::DtSeek, tree::HDR_MAGIC, varint, DiskTreeMap}, error::{Error, Result}, node::Node, HexTreeMap, }; use byteorder::WriteBytesExt; -use std::io::Write; +use std::io::{Cursor, Write}; impl HexTreeMap where @@ -72,6 +72,76 @@ where Ok(()) } + pub fn merge(&mut self, subtrees: &[DiskTreeMap], mut f: F) -> Result + where + F: FnMut(&mut dyn Write, &&[u8]) -> std::result::Result<(), E>, + E: std::error::Error + Sync + Send + 'static, + { + { + // Write magic string + self.wtr.write_all(HDR_MAGIC)?; + // Write version field + const VERSION: u8 = 0; + self.wtr.write_u8(0xFE - VERSION)?; + } + + { + // We currently need to read the incoming disktrees to memory + // before writing them out. This creates a lifetime problem + // where we have to do res0 fixups immediately after writing a + // res0 node. Therefore, to ensure offsets are correct, we pad + // out the file to make sure we don't write over + // not-yet-written res0 base offsets. + let pos = self.pos()?; + self.wtr.write_all(&vec![0; 122 * Dp::size()])?; + self.seek(pos)?; + } + + let root_disk_trees = { + let mut root_disk_trees: Box<[Option<&DiskTreeMap>]> = (0..122).map(|_| None).collect(); + for disktree in subtrees { + let tree_roots = crate::disktree::iter::Iter::read_base_nodes(&mut Cursor::new( + (*disktree.0).as_ref(), + ))?; + if !tree_roots.is_empty() { + assert!(root_disk_trees[tree_roots[0].0 as usize].is_none()); + root_disk_trees[tree_roots[0].0 as usize] = Some(disktree); + } + } + root_disk_trees + }; + + for (idx, maybe_disktree) in root_disk_trees.iter().enumerate() { + match maybe_disktree { + None => Dp::null().write(&mut self.wtr)?, + Some(disktree) => { + let mut fixups: Vec<(Dp, &Node<&[u8]>)> = Vec::new(); + let mut hextree: HexTreeMap<&[u8]> = HexTreeMap::new(); + for res in disktree.iter()? { + let (cell, val) = res?; + hextree.insert(cell, val); + } + if let Some(node) = hextree + .nodes + .get(idx) + .expect("we already determined this node should exist") + .as_deref() + { + fixups.push((self.pos()?, node)); + Dp::null().write(&mut self.wtr)? + } + for (fixee_dptr, node) in fixups { + let node_dptr = self.write_node(node, &mut f)?; + self.seek(fixee_dptr)?; + node_dptr.write(&mut self.wtr)?; + } + } + } + } + + Ok(()) + } + fn write_node(&mut self, node: &Node, f: &mut F) -> Result where F: FnMut(&mut dyn Write, &V) -> std::result::Result<(), E>,