Skip to content

Commit

Permalink
Properly track safety invariants
Browse files Browse the repository at this point in the history
  • Loading branch information
Manishearth committed Jan 6, 2024
1 parent 17a81c7 commit 37f7472
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 33 deletions.
32 changes: 32 additions & 0 deletions gix-pack/src/cache/delta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ pub struct Item<T> {
/// Indices into our Tree's `items`, one for each pack entry that depends on us.
///
/// Limited to u32 as that's the maximum amount of objects in a pack.
// SAFETY INVARIANT:
// - only one Item in a tree may have any given child index. `future_child_offsets`
// should also not contain any indices found in `children`.\
// - These indices should be in bounds for tree.child_items
children: Vec<u32>,
}

Expand All @@ -46,13 +50,20 @@ enum NodeKind {
/// It does this by making the guarantee that iteration only happens once.
pub struct Tree<T> {
/// The root nodes, i.e. base objects
// SAFETY invariant: see Item.children
root_items: Vec<Item<T>>,
/// The child nodes, i.e. those that rely a base object, like ref and ofs delta objects
// SAFETY invariant: see Item.children
child_items: Vec<Item<T>>,
/// The last encountered node was either a root or a child.
last_seen: Option<NodeKind>,
/// Future child offsets, associating their offset into the pack with their index in the items array.
/// (parent_offset, child_index)
// SAFETY invariant:
// - None of these child indices should already have parents
// i.e. future_child_offsets[i].1 should never be also found
// in Item.children. Indices should be found here at most once.
// - These indices should be in bounds for tree.child_items.
future_child_offsets: Vec<(crate::data::Offset, usize)>,
}

Expand Down Expand Up @@ -94,6 +105,12 @@ impl<T> Tree<T> {
) -> Result<(), traverse::Error> {
if !self.future_child_offsets.is_empty() {
for (parent_offset, child_index) in self.future_child_offsets.drain(..) {
// SAFETY invariants upheld:
// - We are draining from future_child_offsets and adding to children, keeping things the same.
// - We can rely on the `future_child_offsets` invariant to be sure that `children` is
// not getting any indices that are already in use in `children` elsewhere
// - The indices are in bounds for child_items since they were in bounds for future_child_offsets,
// we can carry over the invariant.
if let Ok(i) = self.child_items.binary_search_by_key(&parent_offset, |i| i.offset) {
self.child_items[i].children.push(child_index as u32);
} else if let Ok(i) = self.root_items.binary_search_by_key(&parent_offset, |i| i.offset) {
Expand All @@ -120,6 +137,7 @@ impl<T> Tree<T> {
offset,
next_offset: 0,
data,
// SAFETY INVARIANT upheld: there are no children
children: Default::default(),
});
Ok(())
Expand All @@ -135,6 +153,19 @@ impl<T> Tree<T> {
self.assert_is_incrementing_and_update_next_offset(offset)?;

let next_child_index = self.child_items.len();
// SAFETY INVARIANT upheld:
// - This is one of two methods that modifies `children` and future_child_offsets. Out
// of the two, it is the only one that produces new indices in the system.
// - This always pushes next_child_index to *either* `children` or `future_child_offsets`,
// maintaining the cross-field invariant there.
// - This method will always push to child_items (at the end), incrementing
// future values of next_child_index. This means next_child_index is always
// unique for this method call.
// - As the only method producing new indices, this is the only time
// next_child_index will be added to children/future_child_offsets, upholding the invariant.
// - Since next_child_index will always be a valid index by the end of this method,
// this always produces valid in-bounds indices, upholding the bounds invariant.

if let Ok(i) = self.child_items.binary_search_by_key(&base_offset, |i| i.offset) {
self.child_items[i].children.push(next_child_index as u32);
} else if let Ok(i) = self.root_items.binary_search_by_key(&base_offset, |i| i.offset) {
Expand All @@ -148,6 +179,7 @@ impl<T> Tree<T> {
offset,
next_offset: 0,
data,
// SAFETY INVARIANT upheld: there are no children
children: Default::default(),
});
Ok(())
Expand Down
26 changes: 16 additions & 10 deletions gix-pack/src/cache/delta/traverse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,16 +154,22 @@ where
},
{
move |node, state, threads_left, should_interrupt| {
resolve::deltas(
object_counter.clone(),
size_counter.clone(),
node,
state,
resolve_data,
object_hash.len_in_bytes(),
threads_left,
should_interrupt,
)
// SAFETY: This invariant is upheld since `child_items` and `node` come from the same Tree.
// This means we can rely on Tree's invariant that node.children will be the only `children` array in
// for nodes in this tree that will contain any of those children.
#[allow(unsafe_code)]
unsafe {
resolve::deltas(
object_counter.clone(),
size_counter.clone(),
node,
state,
resolve_data,
object_hash.len_in_bytes(),
threads_left,
should_interrupt,
)
}
}
},
|| (!should_interrupt.load(Ordering::Relaxed)).then(|| std::time::Duration::from_millis(50)),
Expand Down
45 changes: 31 additions & 14 deletions gix-pack/src/cache/delta/traverse/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,25 @@ mod root {

/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`].
pub(crate) struct Node<'a, T: Send> {
// SAFETY INVARIANT: see Node::new(). That function is the only one used
// to create or modify these fields.
item: &'a mut Item<T>,
child_items: &'a ItemSliceSync<'a, Item<T>>,
}

impl<'a, T: Send> Node<'a, T> {
/// SAFETY: The child_items must be unique among between users of the `ItemSliceSync`.
/// SAFETY: `item.children` must uniquely reference elements in child_items that no other currently alive
/// item does. All child_items must also have unique children, unless the child_item is itself `item`,
/// in which case no other live item should reference it in its `item.children`.
///
/// This safety invariant can be reliably upheld by making sure `item` comes from a Tree and `child_items`
/// was constructed using that Tree's child_items. This works since Tree has this invariant as well: all
/// child_items are referenced at most once (really, exactly once) by a node in the tree.
///
/// Note that this invariant is a bit more relaxed than that on `deltas()`, because this function can be called
/// for traversal within a child item, which happens in into_child_iter()
#[allow(unsafe_code)]
pub(in crate::cache::delta::traverse) unsafe fn new(
item: &'a mut Item<T>,
child_items: &'a ItemSliceSync<'a, Item<T>>,
) -> Self {
pub(super) unsafe fn new(item: &'a mut Item<T>, child_items: &'a ItemSliceSync<'a, Item<T>>) -> Self {
Node { item, child_items }
}
}
Expand Down Expand Up @@ -60,18 +68,20 @@ mod root {
/// Children are `Node`s referring to pack entries whose base object is this pack entry.
pub fn into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> + 'a {
let children = self.child_items;
// SAFETY: The index is a valid index into the children array.
// SAFETY: The resulting mutable pointer cannot be yielded by any other node.
#[allow(unsafe_code)]
self.item.children.iter().map(move |&index| Node {
item: unsafe { children.get_mut(index as usize) },
child_items: children,
self.item.children.iter().map(move |&index| {
// SAFETY: Due to the invariant on new(), we can rely on these indices
// being unique.
let item = unsafe { children.get_mut(index as usize) };
// SAFETY: Since every child_item is also required to uphold the uniqueness guarantee,
// creating a Node with one of the child_items that we are allowed access to is still fine.
unsafe { Node::new(item, child_items) }
})
}
}
}

pub(in crate::cache::delta::traverse) struct State<'items, F, MBFN, T: Send> {
pub(super) struct State<'items, F, MBFN, T: Send> {
pub delta_bytes: Vec<u8>,
pub fully_resolved_delta_bytes: Vec<u8>,
pub progress: Box<dyn Progress>,
Expand All @@ -80,8 +90,15 @@ pub(in crate::cache::delta::traverse) struct State<'items, F, MBFN, T: Send> {
pub child_items: &'items ItemSliceSync<'items, Item<T>>,
}

#[allow(clippy::too_many_arguments)]
pub(in crate::cache::delta::traverse) fn deltas<T, F, MBFN, E, R>(
/// SAFETY: `item.children` must uniquely reference elements in child_items that no other currently alive
/// item does. All child_items must also have unique children.
///
/// This safety invariant can be reliably upheld by making sure `item` comes from a Tree and `child_items`
/// was constructed using that Tree's child_items. This works since Tree has this invariant as well: all
/// child_items are referenced at most once (really, exactly once) by a node in the tree.
#[allow(clippy::too_many_arguments, unsafe_code)]
#[deny(unsafe_op_in_unsafe_fn)] // this is a big function, require unsafe for the one small unsafe op we have
pub(super) unsafe fn deltas<T, F, MBFN, E, R>(
objects: gix_features::progress::StepShared,
size: gix_features::progress::StepShared,
item: &mut Item<T>,
Expand Down Expand Up @@ -121,7 +138,7 @@ where
// each node is a base, and its children always start out as deltas which become a base after applying them.
// These will be pushed onto our stack until all are processed
let root_level = 0;
// SAFETY: The child items are unique, as `item` is the root of a tree of dependent child items.
// SAFETY: This invariant is required from the caller
#[allow(unsafe_code)]
let root_node = unsafe { root::Node::new(item, child_items) };
let mut nodes: Vec<_> = vec![(root_level, root_node)];
Expand Down
21 changes: 12 additions & 9 deletions gix-pack/src/cache/delta/traverse/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@ use std::marker::PhantomData;
/// more than one base. And that's what one would really have to do for two threads to encounter the same child.
///
/// Thus I believe it's impossible for this data structure to end up in a place where it violates its assumption.
pub(in crate::cache::delta::traverse) struct ItemSliceSync<'a, T>
pub(super) struct ItemSliceSync<'a, T>
where
T: Send,
{
items: *mut T,
#[cfg(debug_assertions)]
len: usize,
phantom: PhantomData<&'a T>,
phantom: PhantomData<&'a mut T>,
}

impl<'a, T> ItemSliceSync<'a, T>
where
T: Send,
{
pub(in crate::cache::delta::traverse) fn new(items: &'a mut [T]) -> Self {
pub(super) fn new(items: &'a mut [T]) -> Self {
ItemSliceSync {
items: items.as_mut_ptr(),
#[cfg(debug_assertions)]
Expand All @@ -41,21 +41,24 @@ where

// SAFETY: The index must point into the slice and must not be reused concurrently.
#[allow(unsafe_code)]
pub(in crate::cache::delta::traverse) unsafe fn get_mut(&self, index: usize) -> &'a mut T {
pub(super) unsafe fn get_mut(&self, index: usize) -> &'a mut T {
#[cfg(debug_assertions)]
if index >= self.len {
panic!("index out of bounds: the len is {} but the index is {index}", self.len);
}
// SAFETY: The index is within the slice
// SAFETY: The children array is alive by the 'a lifetime.
// SAFETY:
// - The index is within the slice (required by documentation)
// - We have mutable access to `items` as ensured by Self::new()
// - This is the only method on this type giving access to items
// - The documentation requires that this access is unique
unsafe { &mut *self.items.add(index) }
}
}

// SAFETY: T is `Send`, and we only use the pointer for creating new pointers.
// SAFETY: This is logically an &mut T, which is Send if T is Send
// (note: this is different from &T, which also needs T: Sync)
#[allow(unsafe_code)]
unsafe impl<T> Send for ItemSliceSync<'_, T> where T: Send {}
// SAFETY: T is `Send`, and as long as the user follows the contract of
// `get_mut()`, we only ever access one T at a time.
// SAFETY: This is logically an &mut T, which is Sync if T is Sync
#[allow(unsafe_code)]
unsafe impl<T> Sync for ItemSliceSync<'_, T> where T: Send {}

0 comments on commit 37f7472

Please sign in to comment.