Skip to content

Commit

Permalink
Make strong type for disk pointer
Browse files Browse the repository at this point in the history
  • Loading branch information
JayKickliter committed Nov 9, 2023
1 parent 23e8aec commit 8b6ab04
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 90 deletions.
120 changes: 79 additions & 41 deletions src/disktree/dptr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,87 @@ use std::{
mem::size_of,
};

/// The on-disk little-endian byte-representation of an offset.
pub(crate) type Dptr = [u8; 5];
pub(crate) const DPTR_SZ: usize = size_of::<Dptr>();
#[allow(clippy::cast_possible_truncation)]
pub(crate) const DPTR_MAX: u64 = 2_u64.pow(DPTR_SZ as u32 * 8) - 1;
pub(crate) const DPTR_NULL: u64 = 0;

/// Read 5 bytes from disk and parses them as litte-endient `u64`.
pub(crate) fn read<R>(src: &mut R) -> Result<u64>
where
R: Read,
{
let mut buf = [0u8; size_of::<u64>()];
src.read_exact(&mut buf[..DPTR_SZ])?;
let dptr = u64::from_le_bytes(buf);
Ok(dptr)
/// A 'disk' pointer.
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[repr(transparent)]
pub(crate) struct DPtr(u64);

impl DPtr {
#[allow(clippy::cast_possible_truncation)]
const MAX: u64 = 2_u64.pow(Self::DISK_REPR_SZ as u32 * 8) - 1;
const DISK_REPR_SZ: usize = size_of::<DiskRepr>();
const NULL: u64 = 0;

pub(crate) const fn is_null(self) -> bool {
self.0 == Self::NULL
}

pub(crate) const fn null() -> DPtr {
DPtr(Self::NULL)
}

pub(crate) const fn size() -> u64 {
Self::DISK_REPR_SZ as u64
}

/// Read 5 bytes from disk and parses them as litte-endient `u64`.
pub(crate) fn read<R>(src: &mut R) -> Result<Self>
where
R: Read,
{
let mut buf = [0u8; size_of::<u64>()];
src.read_exact(&mut buf[..Self::DISK_REPR_SZ])?;
let dptr = u64::from_le_bytes(buf);
Ok(dptr.into())
}

/// Read 5 * `n` bytes from disk, for up to n=7, and parses them as
/// litte-endien `u64`s.
pub(crate) fn read_n<R>(src: &mut R, n: usize) -> Result<Vec<DPtr>>
where
R: Read,
{
debug_assert!(n <= 7);
let mut buf = [0; Self::DISK_REPR_SZ * 7];
src.read_exact(&mut buf[..(Self::DISK_REPR_SZ * n)])?;
Ok(buf[..(Self::DISK_REPR_SZ * n)]
.chunks(Self::DISK_REPR_SZ)
.map(|chunk| {
let mut buf = [0u8; size_of::<u64>()];
buf[..Self::DISK_REPR_SZ].copy_from_slice(chunk);
u64::from_le_bytes(buf)
})
.map(DPtr)
.collect())
}

/// Writes the 5 lower bytes of a `u64` to disk.
pub(crate) fn write<W>(self, dst: &mut W) -> Result
where
W: Write,
{
let buf = self.0.to_le_bytes();
Ok(dst.write_all(&buf[..Self::DISK_REPR_SZ])?)
}
}

/// Read 5 * `n` bytes from disk, for up to n=7, and parses them as
/// litte-endien `u64`s.
pub(crate) fn read_n<R>(src: &mut R, n: usize) -> Result<Vec<u64>>
where
R: Read,
{
assert!(n <= 7);
let mut buf = [0; DPTR_SZ * 7];
src.read_exact(&mut buf[..(DPTR_SZ * n)])?;
Ok(buf[..(DPTR_SZ * n)]
.chunks(DPTR_SZ)
.map(|chunk| {
let mut buf = [0u8; size_of::<u64>()];
buf[..DPTR_SZ].copy_from_slice(chunk);
u64::from_le_bytes(buf)
})
.collect())
impl From<DPtr> for u64 {
fn from(DPtr(raw): DPtr) -> u64 {
raw
}
}

/// Writes the 5 lower bytes of a `u64` to disk.
pub(crate) fn write<W>(dst: &mut W, dptr: u64) -> Result
where
W: Write,
{
assert!(dptr <= DPTR_MAX);
let buf = dptr.to_le_bytes();
Ok(dst.write_all(&buf[..DPTR_SZ])?)
// impl From<&DPtr> for u64 {
// fn from(DPtr(raw): &DPtr) -> u64 {
// *raw
// }
// }

impl From<u64> for DPtr {
fn from(raw: u64) -> DPtr {
assert!(raw <= Self::MAX);
DPtr(raw)
}
}

type DiskRepr = [u8; 5];
41 changes: 20 additions & 21 deletions src/disktree/iter.rs
Original file line number Diff line number Diff line change
@@ -1,62 +1,61 @@
use crate::{
cell::CellStack,
disktree::{dptr, tree::HDR_SZ, ReadVal},
disktree::{dptr::DPtr, tree::HDR_SZ, ReadVal},
error::Result,
};
use byteorder::ReadBytesExt;
use std::io::{Read, Seek, SeekFrom};

pub(crate) struct Iter<'a, R, F> {
cell_stack: CellStack,
curr: Option<(u8, u64)>,
curr: Option<(u8, DPtr)>,
rdr: &'a mut R,
recycle_bin: Vec<Vec<(u8, u64)>>,
stack: Vec<Vec<(u8, u64)>>,
recycle_bin: Vec<Vec<(u8, DPtr)>>,
stack: Vec<Vec<(u8, DPtr)>>,
f: F,
}

enum Node {
// File position for the fist byte of value data.
Leaf(u64),
Leaf(DPtr),
// (H3 Cell digit, file position of child's node tag)
Parent(Vec<(u8, u64)>),
Parent(Vec<(u8, DPtr)>),
}

impl<'a, R, F> Iter<'a, R, F>
where
R: Seek + Read,
{
fn seek_to(&mut self, pos: u64) -> Result {
self.rdr.seek(SeekFrom::Start(pos))?;
Ok(())
fn seek_to(&mut self, dptr: DPtr) -> Result<DPtr> {
Ok(DPtr::from(self.rdr.seek(SeekFrom::Start(u64::from(dptr)))?))
}

fn read_base_nodes(rdr: &mut R) -> Result<Vec<(u8, u64)>> {
fn read_base_nodes(rdr: &mut R) -> Result<Vec<(u8, DPtr)>> {
let mut buf = Vec::with_capacity(122);
rdr.seek(SeekFrom::Start(HDR_SZ))?;
for digit in 0..122 {
let dptr = dptr::read(rdr)?;
if dptr != dptr::DPTR_NULL {
buf.push((digit, dptr))
let dptr = DPtr::read(rdr)?;
if !dptr.is_null() {
buf.push((digit, dptr));
}
}
buf.reverse();
Ok(buf)
}

// `pos` is a position in the file of this node's tag.
fn read_node(&mut self, dptr: u64) -> Result<Node> {
self.seek_to(dptr)?;
fn read_node(&mut self, dptr: DPtr) -> Result<Node> {
let dptr = self.seek_to(dptr)?;
let node_tag = self.rdr.read_u8()?;
let base_pos = dptr + std::mem::size_of_val(&node_tag) as u64;
debug_assert_eq!(base_pos, self.rdr.stream_position().unwrap());
let base_pos = DPtr::from(u64::from(dptr) + std::mem::size_of_val(&node_tag) as u64);
debug_assert_eq!(base_pos, DPtr::from(self.rdr.stream_position().unwrap()));
assert!(node_tag == 0 || node_tag > 0b1000_0000);
if node_tag == 0 {
Ok(Node::Leaf(base_pos))
} else {
let mut children = self.node_buf();
let n_children = (node_tag & 0b0111_1111).count_ones() as usize;
let child_dptrs = dptr::read_n(&mut self.rdr, n_children)?;
let child_dptrs = DPtr::read_n(&mut self.rdr, n_children)?;
children.extend(
(0..7)
.rev()
Expand All @@ -71,7 +70,7 @@ where
/// allocates a new one.
///
/// See [`Iter::recycle_node_buf`].
fn node_buf(&mut self) -> Vec<(u8, u64)> {
fn node_buf(&mut self) -> Vec<(u8, DPtr)> {
let buf = self
.recycle_bin
.pop()
Expand All @@ -83,7 +82,7 @@ where
/// Accepts a used, empty, node buffer for later reuse.
///
/// See [`Iter::node_buf`].
fn recycle_node_buf(&mut self, buf: Vec<(u8, u64)>) {
fn recycle_node_buf(&mut self, buf: Vec<(u8, DPtr)>) {
debug_assert!(buf.is_empty());
self.recycle_bin.push(buf);
}
Expand Down Expand Up @@ -152,7 +151,7 @@ where
self.curr = Some((digit, dptr));
self.stack.push(children);
} else {
self.recycle_node_buf(children)
self.recycle_node_buf(children);
}
}
Ok(Node::Leaf(dptr)) => {
Expand Down
36 changes: 16 additions & 20 deletions src/disktree/tree.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
use crate::{
digits::Digits,
disktree::{
dptr::{self, DPTR_NULL, DPTR_SZ},
iter::Iter,
ReadVal,
},
disktree::{dptr::DPtr, iter::Iter, ReadVal},
error::{Error, Result},
Cell,
};
Expand Down Expand Up @@ -52,10 +48,10 @@ impl<R: Read + Seek> DiskTree<R> {

/// Returns a reader pre-seeked to the value for cell, if present.
pub fn seek_to_cell(&mut self, cell: Cell) -> Result<Option<(Cell, &mut R)>> {
let base_cell_pos = Self::base_cell_offset(cell);
let base_cell_pos = Self::base_cell_dptr(cell);
self.seek_to_pos(base_cell_pos)?;
let node_dptr = dptr::read(&mut self.0)?;
if node_dptr == DPTR_NULL {
let node_dptr = DPtr::read(&mut self.0)?;
if node_dptr.is_null() {
return Ok(None);
}
let digits = Digits::new(cell);
Expand All @@ -68,10 +64,10 @@ impl<R: Read + Seek> DiskTree<R> {

/// Returns `true` if the tree fully contains `cell`.
pub fn contains(&mut self, cell: Cell) -> Result<bool> {
let base_cell_pos = Self::base_cell_offset(cell);
let base_cell_pos = Self::base_cell_dptr(cell);
self.seek_to_pos(base_cell_pos)?;
let node_dptr = dptr::read(&mut self.0)?;
if node_dptr == DPTR_NULL {
let node_dptr = DPtr::read(&mut self.0)?;
if node_dptr.is_null() {
return Ok(false);
}
let digits = Digits::new(cell);
Expand Down Expand Up @@ -99,7 +95,7 @@ impl<R: Read + Seek> DiskTree<R> {
fn _get(
&mut self,
res: u8,
node_dptr: u64,
node_dptr: DPtr,
cell: Cell,
mut digits: Digits,
) -> Result<Option<(Cell, u64)>> {
Expand All @@ -114,9 +110,9 @@ impl<R: Read + Seek> DiskTree<R> {
))),
(Some(digit), _) => {
let bit_cnt = (((node_tag as u16) << (8 - digit)) & 0xFF).count_ones();
self.seek_forward(bit_cnt as u64 * DPTR_SZ as u64)?;
let child_dptr = dptr::read(&mut self.0)?;
if child_dptr == DPTR_NULL {
self.seek_forward(u64::from(bit_cnt) * DPtr::size())?;
let child_dptr = DPtr::read(&mut self.0)?;
if child_dptr.is_null() {
Ok(None)
} else {
self._get(res + 1, child_dptr, cell, digits)
Expand All @@ -128,8 +124,8 @@ impl<R: Read + Seek> DiskTree<R> {
}
}

fn seek_to_pos(&mut self, pos: u64) -> Result {
self.0.seek(SeekFrom::Start(pos))?;
fn seek_to_pos(&mut self, dptr: DPtr) -> Result {
self.0.seek(SeekFrom::Start(u64::from(dptr)))?;
Ok(())
}

Expand All @@ -138,8 +134,8 @@ impl<R: Read + Seek> DiskTree<R> {
Ok(())
}

/// Returns the offset to a base (res0) cell dptr.
fn base_cell_offset(cell: Cell) -> u64 {
HDR_SZ + (DPTR_SZ as u64) * (cell.base() as u64)
/// Returns the DPtr to a base (res0) cell dptr.
fn base_cell_dptr(cell: Cell) -> DPtr {
DPtr::from(HDR_SZ + DPtr::size() * (cell.base() as u64))
}
}
16 changes: 8 additions & 8 deletions src/disktree/writer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
compaction::Compactor,
disktree::dptr::{self, DPTR_NULL},
disktree::dptr::DPtr,
error::{Error, Result},
node::Node,
HexTreeMap,
Expand Down Expand Up @@ -38,31 +38,31 @@ impl<W: Write + Seek> DiskTreeWriter<W> {
// Node: | Dptr |
for base in hextree.nodes.iter() {
match base.as_deref() {
None => dptr::write(&mut self.0, DPTR_NULL)?,
None => DPtr::null().write(&mut self.0)?,
Some(node) => {
fixups.push((self.0.stream_position()?, node));
dptr::write(&mut self.0, DPTR_NULL)?
DPtr::null().write(&mut self.0)?
}
}
}

for (fixee_dptr, node) in fixups {
let node_dptr = self.write_node(node, &mut f)?;
self.0.seek(SeekFrom::Start(fixee_dptr))?;
dptr::write(&mut self.0, node_dptr)?;
node_dptr.write(&mut self.0)?;
}

Ok(())
}

/// Leaf: | 0_u8 | bincode bytes |
/// Parent: | 1_u8 | Dptr | Dptr | Dptr | Dptr | Dptr | Dptr | Dptr |
fn write_node<V, F, E>(&mut self, node: &Node<V>, f: &mut F) -> Result<u64>
fn write_node<V, F, E>(&mut self, node: &Node<V>, f: &mut F) -> Result<DPtr>
where
F: Fn(&mut W, &V) -> std::result::Result<(), E>,
E: std::error::Error + Sync + Send + 'static,
{
let node_pos = self.0.seek(SeekFrom::End(0))?;
let node_pos: DPtr = self.0.seek(SeekFrom::End(0))?.into();
let mut node_fixups: Vec<(u64, &Node<V>)> = Vec::new();
match node {
Node::Leaf(val) => {
Expand All @@ -86,7 +86,7 @@ impl<W: Write + Seek> DiskTreeWriter<W> {
// this node is empty.
tag = (tag >> 1) | 0b1000_0000;
node_fixups.push((self.0.stream_position()?, node));
dptr::write(&mut self.0, DPTR_NULL)?;
DPtr::null().write(&mut self.0)?;
}
}
}
Expand All @@ -101,7 +101,7 @@ impl<W: Write + Seek> DiskTreeWriter<W> {
for (fixee_dptr, node) in node_fixups {
let node_dptr = self.write_node(node, f)?;
self.0.seek(SeekFrom::Start(fixee_dptr))?;
dptr::write(&mut self.0, node_dptr)?;
node_dptr.write(&mut self.0)?;
}

Ok(node_pos)
Expand Down

0 comments on commit 8b6ab04

Please sign in to comment.