Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Dptr #39

Merged
merged 1 commit into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 51 additions & 13 deletions src/disktree/dptr.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
use crate::Result;
use std::{
convert::TryFrom,
io::{Read, Write},
mem::size_of,
ops::Add,
};

/// A 'disk' pointer.
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[repr(transparent)]
pub(crate) struct Dptr(u64);
pub(crate) struct Dp(u64);

impl Dptr {
impl Dp {
#[allow(clippy::cast_possible_truncation)]
const MAX: u64 = 2_u64.pow(Self::DISK_REPR_SZ as u32 * 8) - 1;
const DISK_REPR_SZ: usize = 5;
Expand All @@ -19,12 +21,12 @@ impl Dptr {
self.0 == Self::NULL
}

pub(crate) const fn null() -> Dptr {
Dptr(Self::NULL)
pub(crate) const fn null() -> Dp {
Dp(Self::NULL)
}

pub(crate) const fn size() -> u64 {
Self::DISK_REPR_SZ as u64
pub(crate) const fn size() -> usize {
Self::DISK_REPR_SZ
}

/// Read 5 bytes from disk and parses them as little-endian `u64`.
Expand All @@ -40,7 +42,7 @@ impl Dptr {

/// Read 5 * `n` bytes from disk, for up to n=7, and parses them as
/// little-endian `u64`s.
pub(crate) fn read_n<R>(src: &mut R, n: usize) -> Result<Vec<Dptr>>
pub(crate) fn read_n<R>(src: &mut R, n: usize) -> Result<Vec<Dp>>
where
R: Read,
{
Expand All @@ -54,7 +56,7 @@ impl Dptr {
buf[..Self::DISK_REPR_SZ].copy_from_slice(chunk);
u64::from_le_bytes(buf)
})
.map(Dptr)
.map(Dp::from)
.collect())
}

Expand All @@ -68,15 +70,51 @@ impl Dptr {
}
}

impl From<Dptr> for u64 {
fn from(Dptr(raw): Dptr) -> u64 {
impl Add<usize> for Dp {
type Output = Dp;

fn add(self, rhs: usize) -> Dp {
Dp::from(self.0 + rhs as u64)
}
}

impl Add<u64> for Dp {
type Output = Dp;

fn add(self, rhs: u64) -> Dp {
Dp::from(self.0 + rhs)
}
}

impl Add<u32> for Dp {
type Output = Dp;

fn add(self, rhs: u32) -> Dp {
Dp::from(self.0 + rhs as u64)
}
}

impl From<Dp> for u64 {
fn from(Dp(raw): Dp) -> u64 {
raw
}
}

impl From<u64> for Dptr {
fn from(raw: u64) -> Dptr {
impl From<u64> for Dp {
fn from(raw: u64) -> Dp {
assert!(raw <= Self::MAX);
Dptr(raw)
Dp(raw)
}
}

impl From<usize> for Dp {
fn from(raw: usize) -> Dp {
Dp::from(raw as u64)
}
}

impl From<Dp> for usize {
fn from(Dp(raw): Dp) -> usize {
usize::try_from(raw).unwrap()
}
}
26 changes: 26 additions & 0 deletions src/disktree/dtseek.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use crate::disktree::dptr::Dp;

pub(crate) trait DtSeek {
fn pos(&mut self) -> std::io::Result<Dp>;

fn seek(&mut self, dp: Dp) -> std::io::Result<Dp>;

fn fast_forward(&mut self) -> std::io::Result<Dp>;
}

impl<S> DtSeek for S
where
S: std::io::Seek,
{
fn pos(&mut self) -> std::io::Result<Dp> {
self.stream_position().map(Dp::from)
}

fn seek(&mut self, dp: Dp) -> std::io::Result<Dp> {
self.seek(std::io::SeekFrom::Start(dp.into())).map(Dp::from)
}

fn fast_forward(&mut self) -> std::io::Result<Dp> {
self.seek(std::io::SeekFrom::End(0)).map(Dp::from)
}
}
56 changes: 32 additions & 24 deletions src/disktree/iter.rs
Original file line number Diff line number Diff line change
@@ -1,40 +1,34 @@
use crate::{
cell::CellStack,
disktree::{dptr::Dptr, tree::HDR_SZ, varint},
error::Result,
disktree::{dptr::Dp, dtseek::DtSeek, tree::HDR_SZ, varint},
error::{Error, Result},
Cell,
};
use byteorder::ReadBytesExt;
use std::io::{Cursor, Seek, SeekFrom};
use std::io::Cursor;

pub(crate) struct Iter<'a> {
cell_stack: CellStack,
curr_node: Option<(u8, Dptr)>,
curr_node: Option<(u8, Dp)>,
disktree_buf: &'a [u8],
disktree_csr: Cursor<&'a [u8]>,
node_stack: Vec<Vec<(u8, Dptr)>>,
recycle_bin: Vec<Vec<(u8, Dptr)>>,
node_stack: Vec<Vec<(u8, Dp)>>,
recycle_bin: Vec<Vec<(u8, Dp)>>,
}

enum Node {
// File position for the fist byte of value data.
Leaf(Dptr),
Leaf(Dp),
// (H3 Cell digit, file position of child's node tag)
Parent(Vec<(u8, Dptr)>),
Parent(Vec<(u8, Dp)>),
}

impl<'a> Iter<'a> {
fn seek_to(&mut self, dptr: Dptr) -> Result<Dptr> {
Ok(Dptr::from(
self.disktree_csr.seek(SeekFrom::Start(u64::from(dptr)))?,
))
}

fn read_base_nodes(rdr: &mut Cursor<&[u8]>) -> Result<Vec<(u8, Dptr)>> {
pub(crate) fn read_base_nodes(rdr: &mut Cursor<&[u8]>) -> Result<Vec<(u8, Dp)>> {
let mut buf = Vec::with_capacity(122);
rdr.seek(SeekFrom::Start(HDR_SZ))?;
rdr.seek(HDR_SZ.into())?;
for digit in 0..122 {
let dptr = Dptr::read(rdr)?;
let dptr = Dp::read(rdr)?;
if !dptr.is_null() {
buf.push((digit, dptr));
}
Expand All @@ -44,15 +38,15 @@ impl<'a> Iter<'a> {
}

// `pos` is a position in the file of this node's tag.
fn read_node(&mut self, dptr: Dptr) -> Result<Node> {
let dptr = self.seek_to(dptr)?;
fn read_node(&mut self, dptr: Dp) -> Result<Node> {
let dptr = self.seek(dptr)?;
let node_tag = self.disktree_csr.read_u8()?;
if 0 == node_tag & 0b1000_0000 {
Ok(Node::Leaf(dptr))
} else {
let mut children = self.node_buf();
let n_children = (node_tag & 0b0111_1111).count_ones() as usize;
let child_dptrs = Dptr::read_n(&mut self.disktree_csr, n_children)?;
let child_dptrs = Dp::read_n(&mut self.disktree_csr, n_children)?;
children.extend(
(0..7)
.rev()
Expand All @@ -67,7 +61,7 @@ impl<'a> Iter<'a> {
/// allocates a new one.
///
/// See [`Iter::recycle_node_buf`].
fn node_buf(&mut self) -> Vec<(u8, Dptr)> {
fn node_buf(&mut self) -> Vec<(u8, Dp)> {
let buf = self
.recycle_bin
.pop()
Expand All @@ -79,7 +73,7 @@ impl<'a> Iter<'a> {
/// Accepts a used, empty, node buffer for later reuse.
///
/// See [`Iter::node_buf`].
fn recycle_node_buf(&mut self, buf: Vec<(u8, Dptr)>) {
fn recycle_node_buf(&mut self, buf: Vec<(u8, Dp)>) {
debug_assert!(buf.is_empty());
self.recycle_bin.push(buf);
}
Expand Down Expand Up @@ -150,9 +144,9 @@ impl<'a> Iterator for Iter<'a> {
}
Ok(Node::Leaf(dptr)) => {
self.curr_node = None;
if let Err(e) = self.seek_to(dptr) {
if let Err(e) = self.seek(dptr) {
self.stop_yielding();
return Some(Err(e));
return Some(Err(Error::from(e)));
}
match varint::read(&mut self.disktree_csr) {
Err(e) => {
Expand All @@ -174,3 +168,17 @@ impl<'a> Iterator for Iter<'a> {
None
}
}

impl<'a> DtSeek for Iter<'a> {
fn pos(&mut self) -> std::io::Result<Dp> {
self.disktree_csr.pos()
}

fn seek(&mut self, dp: Dp) -> std::io::Result<Dp> {
self.disktree_csr.seek(dp)
}

fn fast_forward(&mut self) -> std::io::Result<Dp> {
self.disktree_csr.fast_forward()
}
}
1 change: 1 addition & 0 deletions src/disktree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ compile_warning!("disktree may silently fail on non-64bit systems");
pub use tree::DiskTreeMap;

mod dptr;
mod dtseek;
mod iter;
mod node;
mod tree;
Expand Down
26 changes: 11 additions & 15 deletions src/disktree/node.rs
Original file line number Diff line number Diff line change
@@ -1,46 +1,42 @@
use crate::{
disktree::{dptr::Dptr, varint},
disktree::{dptr::Dp, dtseek::DtSeek, varint},
error::Result,
};
use byteorder::ReadBytesExt;
use std::{
io::{Read, Seek},
mem::size_of,
ops::Range,
};
use std::{io::Read, mem::size_of, ops::Range};

// Enough bytes to read node tag and 7 child dptrs.
const NODE_BUF_SZ: usize = size_of::<u8>() + 7 * Dptr::size() as usize;
const NODE_BUF_SZ: usize = size_of::<u8>() + 7 * Dp::size();

pub(crate) enum Node {
// value_begin..value_end
Leaf(Range<usize>),
// (H3 Cell digit, file position of child's node tag)
Parent([Option<Dptr>; 7]),
Parent([Option<Dp>; 7]),
}

impl Node {
pub(crate) fn read<R>(rdr: &mut R) -> Result<Node>
where
R: Seek + Read,
R: Read + DtSeek,
{
let start_pos = rdr.stream_position()?;
let start_pos = rdr.pos()?;
let mut buf = [0u8; NODE_BUF_SZ];
let bytes_read = rdr.read(&mut buf)?;
let buf_rdr = &mut &buf[..bytes_read];
let node_tag = buf_rdr.read_u8()?;
if 0 == node_tag & 0b1000_0000 {
let (val_len, n_read) = varint::read(&mut &buf[..bytes_read])?;
let begin = (start_pos + n_read) as usize;
let end = begin + val_len as usize;
Ok(Node::Leaf(begin..end))
let begin = start_pos + n_read;
let end = begin + val_len;
Ok(Node::Leaf(usize::from(begin)..usize::from(end)))
} else {
let mut children: [Option<Dptr>; 7] = [None, None, None, None, None, None, None];
let mut children: [Option<Dp>; 7] = [None, None, None, None, None, None, None];
for (_digit, child) in (0..7)
.zip(children.iter_mut())
.filter(|(digit, _)| node_tag & (1 << digit) != 0)
{
*child = Some(Dptr::read(buf_rdr)?);
*child = Some(Dp::read(buf_rdr)?);
}
Ok(Node::Parent(children))
}
Expand Down
14 changes: 7 additions & 7 deletions src/disktree/tree.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
digits::Digits,
disktree::{dptr::Dptr, iter::Iter, node::Node},
disktree::{dptr::Dp, iter::Iter, node::Node},
error::Result,
Cell, Error,
};
Expand All @@ -15,10 +15,10 @@ use std::{
};

pub(crate) const HDR_MAGIC: &[u8] = b"hextree\0";
pub(crate) const HDR_SZ: u64 = HDR_MAGIC.len() as u64 + 1;
pub(crate) const HDR_SZ: usize = HDR_MAGIC.len() + 1;

/// An on-disk hextree map.
pub struct DiskTreeMap(Box<dyn AsRef<[u8]> + Send + Sync + 'static>);
pub struct DiskTreeMap(pub(crate) Box<dyn AsRef<[u8]> + Send + Sync + 'static>);

impl DiskTreeMap {
/// Opens a `DiskTree` at the specified path.
Expand Down Expand Up @@ -65,7 +65,7 @@ impl DiskTreeMap {
let base_cell_pos = Self::base_cell_dptr(cell);
let mut csr = Cursor::new((*self.0).as_ref());
csr.seek(SeekFrom::Start(base_cell_pos.into()))?;
let node_dptr = Dptr::read(&mut csr)?;
let node_dptr = Dp::read(&mut csr)?;
if node_dptr.is_null() {
return Ok(None);
}
Expand All @@ -92,7 +92,7 @@ impl DiskTreeMap {
fn _get(
csr: &mut Cursor<&[u8]>,
res: u8,
node_dptr: Dptr,
node_dptr: Dp,
cell: Cell,
mut digits: Digits,
) -> Result<Option<(Cell, Range<usize>)>> {
Expand All @@ -115,7 +115,7 @@ impl DiskTreeMap {
}

/// Returns the DPtr to a base (res0) cell dptr.
fn base_cell_dptr(cell: Cell) -> Dptr {
Dptr::from(HDR_SZ + Dptr::size() * (cell.base() as u64))
fn base_cell_dptr(cell: Cell) -> Dp {
Dp::from(HDR_SZ + Dp::size() * cell.base() as usize)
}
}
Loading
Loading