Skip to content

Commit

Permalink
fix BTree creating shared references that are not entirely in-bounds
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfJung committed Dec 9, 2018
1 parent 1ccb5b2 commit 0e70c26
Showing 1 changed file with 86 additions and 29 deletions.
115 changes: 86 additions & 29 deletions src/liballoc/collections/btree/node.rs
Expand Up @@ -58,9 +58,34 @@ pub const CAPACITY: usize = 2 * B - 1;
/// these should always be put behind pointers, and specifically behind `BoxedNode` in the owned
/// case.
///
/// We put the metadata first so that its position is the same for every `K` and `V`, in order
/// to statically allocate a single dummy node to avoid allocations. This struct is `repr(C)` to
/// prevent them from being reordered.
/// We have a separate type for the header and rely on it matching the prefix of `LeafNode`, in
/// order to statically allocate a single dummy node to avoid allocations. This struct is
/// `repr(C)` to prevent them from being reordered. `LeafNode` does not just contain a
/// `NodeHeader` because we do not want unnecessary padding between `len` and the keys.
/// Crucially, `NodeHeader` can be safely transmuted to different K and V. (This is exploited
/// by `as_header`.)
/// See `into_key_slice` for an explanation of K2. K2 cannot be safely transmuted around
/// because the size of `NodeHeader` depends on its alignment!
#[repr(C)]
struct NodeHeader<K, V, K2 = ()> {
/// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`.
/// This either points to an actual node or is null.
parent: *const InternalNode<K, V>,

/// This node's index into the parent node's `edges` array.
/// `*node.parent.edges[node.parent_idx]` should be the same thing as `node`.
/// This is only guaranteed to be initialized when `parent` is non-null.
parent_idx: MaybeUninit<u16>,

/// The number of keys and values this node stores.
///
/// This next to `parent_idx` to encourage the compiler to join `len` and
/// `parent_idx` into the same 32-bit word, reducing space overhead.
len: u16,

/// See `into_key_slice`.
keys_start: [K2; 0],
}
#[repr(C)]
struct LeafNode<K, V> {
/// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`.
Expand Down Expand Up @@ -98,24 +123,25 @@ impl<K, V> LeafNode<K, V> {
len: 0
}
}
}

impl<K, V> NodeHeader<K, V> {
fn is_shared_root(&self) -> bool {
ptr::eq(self, &EMPTY_ROOT_NODE as *const _ as *const _)
}
}

// We need to implement Sync here in order to make a static instance.
unsafe impl Sync for LeafNode<(), ()> {}
unsafe impl Sync for NodeHeader<(), ()> {}

// An empty node used as a placeholder for the root node, to avoid allocations.
// We use () in order to save space, since no operation on an empty tree will
// We use just a header in order to save space, since no operation on an empty tree will
// ever take a pointer past the first key.
static EMPTY_ROOT_NODE: LeafNode<(), ()> = LeafNode {
static EMPTY_ROOT_NODE: NodeHeader<(), ()> = NodeHeader {
parent: ptr::null(),
parent_idx: MaybeUninit::uninitialized(),
len: 0,
keys: MaybeUninit::uninitialized(),
vals: MaybeUninit::uninitialized(),
keys_start: [],
};

/// The underlying representation of internal nodes. As with `LeafNode`s, these should be hidden
Expand Down Expand Up @@ -306,6 +332,11 @@ impl<K, V> Root<K, V> {
/// `Leaf`, the `NodeRef` points to a leaf node, when this is `Internal` the
/// `NodeRef` points to an internal node, and when this is `LeafOrInternal` the
/// `NodeRef` could be pointing to either type of node.
/// Note that in case of a leaf node, this might still be the shared root! Only turn
/// this into a `LeafNode` reference if you know it is not a root! Shared references
/// must be dereferencable *for the entire size of their pointee*, so `&InternalNode`
/// pointing to the shared root is UB.
/// Turning this into a `NodeHeader` is always safe.
pub struct NodeRef<BorrowType, K, V, Type> {
height: usize,
node: NonNull<LeafNode<K, V>>,
Expand Down Expand Up @@ -352,7 +383,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
/// Finds the length of the node. This is the number of keys or values. In an
/// internal node, the number of edges is `len() + 1`.
pub fn len(&self) -> usize {
self.as_leaf().len as usize
self.as_header().len as usize
}

/// Returns the height of this node in the whole tree. Zero height denotes the
Expand Down Expand Up @@ -382,14 +413,19 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
}
}

fn as_leaf(&self) -> &LeafNode<K, V> {
/// Assert that this is indeed a proper leaf node, and not the shared root.
unsafe fn as_leaf(&self) -> &LeafNode<K, V> {
self.node.as_ref()
}

fn as_header(&self) -> &NodeHeader<K, V> {
unsafe {
self.node.as_ref()
&*(self.node.as_ptr() as *const NodeHeader<K, V>)
}
}

pub fn is_shared_root(&self) -> bool {
self.as_leaf().is_shared_root()
self.as_header().is_shared_root()
}

pub fn keys(&self) -> &[K] {
Expand Down Expand Up @@ -418,7 +454,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
>,
Self
> {
let parent_as_leaf = self.as_leaf().parent as *const LeafNode<K, V>;
let parent_as_leaf = self.as_header().parent as *const LeafNode<K, V>;
if let Some(non_zero) = NonNull::new(parent_as_leaf as *mut _) {
Ok(Handle {
node: NodeRef {
Expand All @@ -427,7 +463,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
root: self.root,
_marker: PhantomData
},
idx: unsafe { usize::from(*self.as_leaf().parent_idx.get_ref()) },
idx: unsafe { usize::from(*self.as_header().parent_idx.get_ref()) },
_marker: PhantomData
})
} else {
Expand Down Expand Up @@ -535,9 +571,8 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
}

fn as_leaf_mut(&mut self) -> &mut LeafNode<K, V> {
unsafe {
self.node.as_mut()
}
// We are mutable, so we cannot be the root, so this is okay.
unsafe { self.node.as_mut() }
}

fn keys_mut(&mut self) -> &mut [K] {
Expand All @@ -551,28 +586,50 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> {

impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Immut<'a>, K, V, Type> {
fn into_key_slice(self) -> &'a [K] {
// When taking a pointer to the keys, if our key has a stricter
// alignment requirement than the shared root does, then the pointer
// would be out of bounds, which LLVM assumes will not happen. If the
// alignment is more strict, we need to make an empty slice that doesn't
// use an out of bounds pointer.
// We have to be careful here because we might be pointing to the shared root.
// In that case, we must not create an `&LeafNode`. We could just return
// an empty slice whenever the lenght is 0 (this includes the shared root),
// but we want to avoid that run-time check.
// Instead, we create a slice pointing into the node whenever possible.
// We can sometimes do this even for the shared root, as the slice will be
// empty. We cannot *always* do this because if the type is too highly
// aligned, the offset of `keys` in a "full node" might be outside the bounds
// of the header! So we do an alignment check first, that will be
// evaluated at compile-time, and only do any run-time check in the rare case
// that the alignment is very big.
if mem::align_of::<K>() > mem::align_of::<LeafNode<(), ()>>() && self.is_shared_root() {
&[]
} else {
// Here either it's not the root, or the alignment is less strict,
// in which case the keys pointer will point "one-past-the-end" of
// the node, which is allowed by LLVM.
// Thanks to the alignment check above, we know that `keys` will be
// in-bounds of some allocation even if this is the shared root!
// (We might be one-past-the-end, but that is allowed by LLVM.)
// Getting the pointer is tricky though. `NodeHeader` does not have a `keys`
// field because we want its size to not depend on the alignment of `K`
// (needed becuase `as_header` should be safe). We cannot call `as_leaf`
// because we might be the shared root.
// For this reason, `NodeHeader` has this `K2` parameter (that's usually `()`
// and hence just adds a size-0-align-1 field, not affecting layout).
// We know that we can transmute `NodeHeader<K, V, ()>` to `NodeHeader<K, V, K>`
// because we did the alignment check above, and hence `NodeHeader<K, V, K>`
// is not bigger than `NodeHeader<K, V, ()>`! Then we can use `NodeHeader<K, V, K>`
// to compute the pointer where the keys start.
// This entire hack will become unnecessary once
// <https://github.com/rust-lang/rfcs/pull/2582> lands, then we can just take a raw
// pointer to the `keys` field of `*const InternalNode<K, V>`.

// This is a non-debug-assert because it can be completely compile-time evaluated.
assert!(mem::size_of::<NodeHeader<K, V>>() == mem::size_of::<NodeHeader<K, V, K>>());
let header = self.as_header() as *const _ as *const NodeHeader<K, V, K>;
let keys = unsafe { &(*header).keys_start as *const _ as *const K };
unsafe {
slice::from_raw_parts(
self.as_leaf().keys.as_ptr() as *const K,
self.len()
)
slice::from_raw_parts(keys, self.len())
}
}
}

fn into_val_slice(self) -> &'a [V] {
debug_assert!(!self.is_shared_root());
// We cannot be the root, so `as_leaf` is okay
unsafe {
slice::from_raw_parts(
self.as_leaf().vals.as_ptr() as *const V,
Expand Down

0 comments on commit 0e70c26

Please sign in to comment.