diff --git a/src/libstd/collections/hashmap/bench.rs b/src/libstd/collections/hashmap/bench.rs index 66d97ba044847..21bbb38f48936 100644 --- a/src/libstd/collections/hashmap/bench.rs +++ b/src/libstd/collections/hashmap/bench.rs @@ -38,7 +38,7 @@ fn new_insert_drop(b : &mut Bencher) { } #[bench] -fn insert(b: &mut Bencher) { +fn grow_by_insertion(b: &mut Bencher) { use super::HashMap; let mut m = HashMap::new(); diff --git a/src/libstd/collections/hashmap/map.rs b/src/libstd/collections/hashmap/map.rs index 7a3779a91a080..a50c6a59f7e04 100644 --- a/src/libstd/collections/hashmap/map.rs +++ b/src/libstd/collections/hashmap/map.rs @@ -16,19 +16,27 @@ use collections::{Collection, Mutable, MutableSet, Map, MutableMap}; use default::Default; use fmt::Show; use fmt; -use RandomSipHasher; -use hash::{Hash, Hasher}; -use iter::{Iterator, FromIterator, Extendable, range}; +use hash::{Hash, Hasher, RandomSipHasher}; +use iter::{Iterator, FromIterator, Extendable}; use iter; use mem::replace; use num; -use ops::Deref; +use ops::{Deref, DerefMut}; use option::{Some, None, Option}; use result::{Ok, Err}; use ops::Index; -use super::table::{BucketWithTable, FullBucketImm, RawTable, FullBucket, FullBucketMut, Bucket}; use super::table; +use super::table::{ + Bucket, + Empty, + Full, + FullBucket, + FullBucketImm, + FullBucketMut, + RawTable, + SafeHash +}; static INITIAL_LOG2_CAP: uint = 5; pub static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5 @@ -36,8 +44,9 @@ pub static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5 /// The default behavior of HashMap implements a load factor of 90.9%. /// This behavior is characterized by the following conditions: /// -/// - if `size * 1.1 < cap < size * 4` then shouldn't resize -/// - if `cap < minimum_capacity * 2` then shouldn't shrink +/// - if size > 0.909 * capacity: grow +/// - if size < 0.25 * capacity: shrink (if this won't bring capacity lower +/// than the minimum) #[deriving(Clone)] struct DefaultResizePolicy { /// Doubled minimal capacity. The capacity must never drop below @@ -55,7 +64,12 @@ impl DefaultResizePolicy { #[inline] fn capacity_range(&self, new_size: uint) -> (uint, uint) { - ((new_size * 11) / 10, max(new_size << 3, self.minimum_capacity2)) + // Here, we are rephrasing the logic by specifying the ranges: + // + // - if `size * 1.1 < cap < size * 4`: don't resize + // - if `cap < minimum_capacity * 2`: don't shrink + // - otherwise, resize accordingly + ((new_size * 11) / 10, max(new_size << 2, self.minimum_capacity2)) } #[inline] @@ -65,9 +79,9 @@ impl DefaultResizePolicy { } // The main performance trick in this hashmap is called Robin Hood Hashing. -// It gains its excellent performance from one crucial operation: +// It gains its excellent performance from one essential operation: // -// If an insertion collides with an existing element, and that elements +// If an insertion collides with an existing element, and that element's // "probe distance" (how far away the element is from its ideal location) // is higher than how far we've already probed, swap the elements. // @@ -94,6 +108,15 @@ impl DefaultResizePolicy { // α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT // colliding after k tries is 1-α^k. // +// The paper from 1986 cited below mentions an implementation which keeps track +// of the distance-to-initial-bucket histogram. This approach is not suitable +// for modern architectures because it requires maintaining an internal data +// structure. This allows very good first guesses, but we are most concerned +// with guessing entire cache lines, not individual indexes. Furthermore, array +// accesses are no longer linear and in one direction, as we have now. There +// is also memory and cache pressure that this would entail that would be very +// difficult to properly see in a microbenchmark. +// // Future Improvements (FIXME!) // ============================ // @@ -106,15 +129,6 @@ impl DefaultResizePolicy { // Future Optimizations (FIXME!) // ============================= // -// The paper cited below mentions an implementation which keeps track of the -// distance-to-initial-bucket histogram. I'm suspicious of this approach because -// it requires maintaining an internal map. If this map were replaced with a -// hashmap, it would be faster, but now our data structure is self-referential -// and blows up. Also, this allows very good first guesses, but array accesses -// are no longer linear and in one direction, as we have now. There is also -// memory and cache pressure that this map would entail that would be very -// difficult to properly see in a microbenchmark. -// // Another possible design choice that I made without any real reason is // parameterizing the raw table over keys and values. Technically, all we need // is the size and alignment of keys and values, and the code should be just as @@ -125,12 +139,56 @@ impl DefaultResizePolicy { // This would definitely be an avenue worth exploring if people start complaining // about the size of rust executables. // -// There's also an "optimization" that has been omitted regarding how the -// hashtable allocates. The vector type has set the expectation that a hashtable -// which never has an element inserted should not allocate. I'm suspicious of -// implementing this for hashtables, because supporting it has no performance -// benefit over using an `Option>`, and is significantly more -// complicated. +// Annotate exceedingly likely branches in `table::make_hash` +// and `search_hashed_generic` to reduce instruction cache pressure +// and mispredictions once it becomes possible (blocked on issue #11092). +// +// Shrinking the table could simply reallocate in place after moving buckets +// to the first half. +// +// The growth algorithm (fragment of the Proof of Correctness) +// -------------------- +// +// The growth algorithm is basically a fast path of the naive reinsertion- +// during-resize algorithm. Other paths should never be taken. +// +// Consider growing a robin hood hashtable of capacity n. Normally, we do this +// by allocating a new table of capacity `2n`, and then individually reinsert +// each element in the old table into the new one. This guarantees that the +// new table is a valid robin hood hashtable with all the desired statistical +// properties. Remark that the order we reinsert the elements in should not +// matter. For simplicity and efficiency, we will consider only linear +// reinsertions, which consist of reinserting all elements in the old table +// into the new one by increasing order of index. However we will not be +// starting our reinsertions from index 0 in general. If we start from index +// i, for the purpose of reinsertion we will consider all elements with real +// index j < i to have virtual index n + j. +// +// Our hash generation scheme consists of generating a 64-bit hash and +// truncating the most significant bits. When moving to the new table, we +// simply introduce a new bit to the front of the hash. Therefore, if an +// elements has ideal index i in the old table, it can have one of two ideal +// locations in the new table. If the new bit is 0, then the new ideal index +// is i. If the new bit is 1, then the new ideal index is n + i. Intutively, +// we are producing two independent tables of size n, and for each element we +// independently choose which table to insert it into with equal probability. +// However the rather than wrapping around themselves on overflowing their +// indexes, the first table overflows into the first, and the first into the +// second. Visually, our new table will look something like: +// +// [yy_xxx_xxxx_xxx|xx_yyy_yyyy_yyy] +// +// Where x's are elements inserted into the first table, y's are elements +// inserted into the second, and _'s are empty sections. We now define a few +// key concepts that we will use later. Note that this is a very abstract +// perspective of the table. A real resized table would be at least half +// empty. +// +// Theorem: A linear robin hood reinsertion from the first ideal element +// produces identical results to a linear naive reinsertion from the same +// element. +// +// FIXME(Gankro, pczarn): review the proof and put it all in a separate doc.rs /// A hash map implementation which uses linear probing with Robin /// Hood bucket stealing. @@ -219,27 +277,31 @@ pub struct HashMap { // All hashes are keyed on these values, to prevent hash collision attacks. hasher: H, - table: table::RawTable, + table: RawTable, // We keep this at the end since it might as well have tail padding. resize_policy: DefaultResizePolicy, } /// Search for a pre-hashed key. -fn search_hashed_generic>>(table: M, hash: &table::SafeHash, is_match: |&K| -> bool) - -> Option> { +fn search_hashed_generic>>(table: M, + hash: &SafeHash, + is_match: |&K| -> bool) + -> SearchResult { let size = table.size(); let mut probe = Bucket::new(table, hash); let ib = probe.index(); while probe.index() != ib + size { let full = match probe.peek() { - table::Empty(_) => return None, // hit an empty bucket - table::Full(b) => b + Empty(b) => return TableRef(b.into_table()), // hit an empty bucket + Full(b) => b }; if full.distance() + ib < full.index() { - return None; + // We can finish the search early if we hit any bucket + // with a lower distance to initial bucket than we've probed. + return TableRef(full.into_table()); } // If the hash doesn't match, it can't be this one.. @@ -251,65 +313,149 @@ fn search_hashed_generic>>(table: M, hash: &table: // If the key doesn't match, it can't be this one.. if matched { - return Some(full); + return FoundExisting(full); } } probe = full.next(); } - None + TableRef(probe.into_table()) } -fn search_hashed>>(table: M, hash: &table::SafeHash, k: &K) - -> Option> { +fn search_hashed>>(table: M, hash: &SafeHash, k: &K) + -> SearchResult { search_hashed_generic(table, hash, |k_| *k == *k_) } fn pop_internal(starting_bucket: FullBucketMut) -> V { - let size = { - let table = starting_bucket.table(); - table.size() - }; let (empty, _k, retval) = starting_bucket.take(); let mut gap = match empty.gap_peek() { Some(b) => b, None => return retval }; - // COMPILER error! wrong enum optimization. sets ptr to 0 - for _ in range(0, size) { - if gap.full().distance() != 0 { - gap = match gap.shift() { - Some(b) => b, - None => return retval + while gap.full().distance() != 0 { + gap = match gap.shift() { + Some(b) => b, + None => break + }; + } + + // Now we've done all our shifting. Return the value we grabbed earlier. + return retval; +} + +/// Perform robin hood bucket stealing at the given `bucket`. You must +/// also pass the position of that bucket's initial bucket so we don't have +/// to recalculate it. +/// +/// `hash`, `k`, and `v` are the elements to "robin hood" into the hashtable. +fn robin_hood<'a, K: 'a, V: 'a>(mut bucket: FullBucketMut<'a, K, V>, + mut ib: uint, + mut hash: SafeHash, + mut k: K, + mut v: V) + -> &'a mut V { + let starting_index = bucket.index(); + let size = { + let table = bucket.table(); // FIXME "lifetime too short". + table.size() + }; + // There can be at most `size - dib` buckets to displace, because + // in the worst case, there are `size` elements and we already are + // `distance` buckets away from the initial one. + let idx_end = starting_index + size - bucket.distance(); + + loop { + let (old_hash, old_key, old_val) = bucket.replace(hash, k, v); + loop { + let probe = bucket.next(); + assert!(probe.index() != idx_end); + + let full_bucket = match probe.peek() { + table::Empty(bucket) => { + // Found a hole! + let b = bucket.put(old_hash, old_key, old_val); + // Now that it's stolen, just read the value's pointer + // right out of the table! + let (_, v) = Bucket::at_index(b.into_table(), starting_index).peek() + .expect_full() + .into_mut_refs(); + return v; + }, + table::Full(bucket) => bucket }; - continue; + + let probe_ib = full_bucket.index() - full_bucket.distance(); + + bucket = full_bucket; + + // Robin hood! Steal the spot. + if ib < probe_ib { + ib = probe_ib; + hash = old_hash; + k = old_key; + v = old_val; + break; + } + } + } +} + +/// A result that works like Option> but preserves +/// the reference that grants us access to the table in any case. +enum SearchResult { + // This is an entry that holds the given key: + FoundExisting(FullBucket), + + // There was no such entry. The reference is given back: + TableRef(M) +} + +impl SearchResult { + fn into_option(self) -> Option> { + match self { + FoundExisting(bucket) => Some(bucket), + TableRef(_) => None } + } +} - break; +/// A newtyped mutable reference to the hashmap that allows e.g. Deref to be +/// implemented without making changes to the visible interface of HashMap. +/// Used internally because it's accepted by the search functions above. +struct MapMutRef<'a, K: 'a, V: 'a, H: 'a> { + map_ref: &'a mut HashMap +} + +impl<'a, K, V, H> Deref> for MapMutRef<'a, K, V, H> { + fn deref(&self) -> &RawTable { + &self.map_ref.table } +} - // Now we're done all our shifting. Return the value we grabbed - // earlier. - return retval; +impl<'a, K, V, H> DerefMut> for MapMutRef<'a, K, V, H> { + fn deref_mut(&mut self) -> &mut RawTable { + &mut self.map_ref.table + } } impl, V, S, H: Hasher> HashMap { - fn make_hash>(&self, x: &X) -> table::SafeHash { + fn make_hash>(&self, x: &X) -> SafeHash { table::make_hash(&self.hasher, x) } fn search_equiv<'a, Q: Hash + Equiv>(&'a self, q: &Q) -> Option> { let hash = self.make_hash(q); - search_hashed_generic(&self.table, &hash, |k| q.equiv(k)) + search_hashed_generic(&self.table, &hash, |k| q.equiv(k)).into_option() } fn search_equiv_mut<'a, Q: Hash + Equiv>(&'a mut self, q: &Q) -> Option> { let hash = self.make_hash(q); - search_hashed_generic(&mut self.table, &hash, |k| q.equiv(k)) + search_hashed_generic(&mut self.table, &hash, |k| q.equiv(k)).into_option() } /// Search for a key, yielding the index if it's found in the hashtable. @@ -317,25 +463,29 @@ impl, V, S, H: Hasher> HashMap { /// search_hashed. fn search<'a>(&'a self, k: &K) -> Option> { let hash = self.make_hash(k); - search_hashed(&self.table, &hash, k) + search_hashed(&self.table, &hash, k).into_option() } fn search_mut<'a>(&'a mut self, k: &K) -> Option> { let hash = self.make_hash(k); - search_hashed(&mut self.table, &hash, k) + search_hashed(&mut self.table, &hash, k).into_option() } - fn insert_hashed_ordered(&mut self, hash: table::SafeHash, k: K, v: V) { + // The caller should ensure that invariants by Robin Hood Hashing hold. + fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) { let cap = self.table.capacity(); let mut buckets = Bucket::new(&mut self.table, &hash); let ib = buckets.index(); + while buckets.index() != ib + cap { + // We don't need to compare hashes for value swap. + // Not even DIBs for Robin Hood. buckets = match buckets.peek() { - table::Empty(empty) => { + Empty(empty) => { empty.put(hash, k, v); return; } - table::Full(b) => b.into_bucket() + Full(b) => b.into_bucket() }; buckets.next(); } @@ -361,8 +511,8 @@ impl, V, S, H: Hasher> Mutable for HashMap { while buckets.index() != cap { buckets = match buckets.peek() { - table::Empty(b) => b.next(), - table::Full(full) => { + Empty(b) => b.next(), + Full(full) => { let (b, _, _) = full.take(); b.next() } @@ -401,7 +551,7 @@ impl, V, S, H: Hasher> MutableMap for HashMap self.make_some_room(potential_new_size); let mut retval = None; - self.insert_or_replace_with(hash, k, v, |val_ref, val| { + self.insert_or_replace_with(hash, k, v, |_, val_ref, val| { retval = Some(replace(val_ref, val)); }); retval @@ -472,7 +622,7 @@ impl, V, S, H: Hasher> HashMap { HashMap { hasher: hasher, resize_policy: DefaultResizePolicy::new(INITIAL_CAPACITY), - table: table::RawTable::new(0), + table: RawTable::new(0), } } @@ -500,7 +650,7 @@ impl, V, S, H: Hasher> HashMap { HashMap { hasher: hasher, resize_policy: DefaultResizePolicy::new(cap), - table: table::RawTable::new(cap), + table: RawTable::new(cap), } } @@ -537,49 +687,78 @@ impl, V, S, H: Hasher> HashMap { assert!(self.table.size() <= new_capacity); assert!(num::is_power_of_two(new_capacity)); - let mut old_table = replace(&mut self.table, table::RawTable::new(new_capacity)); + let mut old_table = replace(&mut self.table, RawTable::new(new_capacity)); let old_size = old_table.size(); - if old_table.capacity() == 0 { + if old_table.capacity() == 0 || old_table.size() == 0 { return; } if new_capacity < old_table.capacity() { + // Shrink the table. Naive algorithm for resizing: for (h, k, v) in old_table.move_iter() { self.insert_hashed_nocheck(h, k, v); } } else { + // Grow the table. + // Specialization of the other branch. let mut bucket = Bucket::first(&mut old_table); + // "So a few of the first shall be last: for many be called, + // but few chosen." + // + // We'll most likely encounter a few buckets at the beginning that + // have their initial buckets near the end of the table. They were + // placed at the beginning as the probe wrapped around the table + // during insertion. We must skip forward to a bucket that won't + // get reinserted too early and won't unfairly steal others spot. + // This eliminates the need for robin hood. loop { - match bucket.peek() { - table::Full(full) => { + bucket = match bucket.peek() { + Full(full) => { if full.distance() == 0 { + // This bucket occupies its ideal spot. + // It indicates the start of another "cluster". bucket = full.into_bucket(); break; } - bucket = full.next(); + // Leaving this bucket in the last cluster for later. + full.into_bucket() } - table::Empty(b) => { - bucket = b.next(); - break; + Empty(b) => { + // Encountered a hole between clusters. + b.into_bucket() } }; + bucket.next(); } + // This is how the buckets might be laid out in memory: + // ($ marks an initialized bucket) + // ________________ + // |$$$_$$$$$$_$$$$$| + // + // But we've skipped the entire initial cluster of buckets + // and will continue iteration in this order: + // ________________ + // |$$$$$$_$$$$$ + // ^ wrap around once end is reached + // ________________ + // $$$_____________| + // ^ exit once table.size == 0 loop { bucket = match bucket.peek() { - table::Full(bucket) => { - { - let t = bucket.table(); - if t.size() == 0 { break } - } + Full(bucket) => { let h = bucket.hash(); let (b, k, v) = bucket.take(); self.insert_hashed_ordered(h, k, v); + { + let t = b.table(); // FIXME "lifetime too short". + if t.size() == 0 { break } + }; b.into_bucket() } - table::Empty(b) => b.into_bucket() + Empty(b) => b.into_bucket() }; bucket.next(); } @@ -612,41 +791,43 @@ impl, V, S, H: Hasher> HashMap { /// /// If the key already exists, the hashtable will be returned untouched /// and a reference to the existing element will be returned. - fn insert_hashed_nocheck<'a>( - &'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V { - self.insert_or_replace_with(hash, k, v, |_, _| ()) + fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> &mut V { + self.insert_or_replace_with(hash, k, v, |_, _, _| ()) } - fn insert_or_replace_with<'a>( - &'a mut self, hash: table::SafeHash, k: K, v: V, - found_existing: |&mut V, V| - ) -> &'a mut V { - + fn insert_or_replace_with<'a>(&'a mut self, + hash: SafeHash, + k: K, + v: V, + found_existing: |&mut K, &mut V, V|) + -> &'a mut V { // Worst case, we'll find one empty bucket among `size + 1` buckets. let size = self.table.size(); - let mut rbucket = Bucket::new(&mut self.table, &hash); - let ib = rbucket.index(); + let mut probe = Bucket::new(&mut self.table, &hash); + let ib = probe.index(); loop { - let mut bucket = match rbucket.peek() { - table::Empty(bucket) => { + let mut bucket = match probe.peek() { + Empty(bucket) => { // Found a hole! let bucket = bucket.put(hash, k, v); let (_, val) = bucket.into_mut_refs(); return val; }, - table::Full(bucket) => bucket + Full(bucket) => bucket }; if bucket.hash() == hash { - let (bucket_k, bucket_v) = bucket.read_mut(); - // FIXME #12147 the conditional return confuses - // borrowck if we return bucket_v directly - let bv: *mut V = bucket_v; - if k == *bucket_k { + let found_match = { + let (bucket_k, _) = bucket.read_mut(); + k == *bucket_k + }; + if found_match { + let (bucket_k, bucket_v) = bucket.into_mut_refs(); + debug_assert!(k == *bucket_k); // Key already exists. Get its reference. - found_existing(bucket_v, v); - return unsafe {&mut *bv}; + found_existing(bucket_k, bucket_v, v); + return bucket_v; } } @@ -654,53 +835,18 @@ impl, V, S, H: Hasher> HashMap { if (ib as int) < robin_ib { // Found a luckier bucket than me. Better steal his spot. - let (mut hash, mut k, mut v) = bucket.replace(hash, k, v); - let robin_index = bucket.index(); - let mut robin_ib = robin_ib as uint; - let mut rbucket = bucket.next(); - loop { - let mut bucket = match rbucket.peek() { - table::Empty(bucket) => { - // Found a hole! - let b = bucket.put(hash, k, v); - // Now that it's stolen, just read the value's pointer - // right out of the table! - let (_, v) = match Bucket::at_index(b.into_table(), robin_index).peek() { - table::Full(b) => b.into_mut_refs(), - _ => fail!() - }; - return v; - }, - table::Full(bucket) => bucket - }; - - let probe_ib = bucket.index() - bucket.distance(); - - // Robin hood! Steal the spot. - if robin_ib < probe_ib { - robin_ib = probe_ib; - let (old_hash, old_key, old_val) = bucket.replace(hash, k, v); - hash = old_hash; - k = old_key; - v = old_val; - } - rbucket = bucket.next(); - if rbucket.index() == ib + size + 1 { - fail!("HashMap fatal error: 100% load factor?") - } - } - } - rbucket = bucket.next(); - if rbucket.index() == ib + size + 1 { - fail!("Internal HashMap error: Out of space.") + return robin_hood(bucket, robin_ib as uint, hash, k, v); } + + probe = bucket.next(); + assert!(probe.index() != ib + size + 1); } } /// Inserts an element which has already been hashed, returning a reference /// to that element inside the hashtable. This is more efficient that using /// `insert`, since the key will not be rehashed. - fn insert_hashed<'a>(&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V { + fn insert_hashed(&mut self, hash: SafeHash, k: K, v: V) -> &mut V { let potential_new_size = self.table.size() + 1; self.make_some_room(potential_new_size); self.insert_hashed_nocheck(hash, k, v) @@ -721,7 +867,7 @@ impl, V, S, H: Hasher> HashMap { /// // Find the existing key /// assert_eq!(*map.find_or_insert("a", -2), 1); /// ``` - pub fn find_or_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V { + pub fn find_or_insert(&mut self, k: K, v: V) -> &mut V { self.find_with_or_insert_with(k, v, |_k, _v, _a| (), |_k, a| a) } @@ -768,7 +914,11 @@ impl, V, S, H: Hasher> HashMap { v: V, f: |&K, &mut V|) -> &'a mut V { - self.find_with_or_insert_with(k, v, |k, v, _a| f(k, v), |_k, a| a) + let potential_new_size = self.table.size() + 1; + self.make_some_room(potential_new_size); + + let hash = self.make_hash(&k); + self.insert_or_replace_with(hash, k, v, |kref, vref, _v| f(kref, vref)) } /// Modify and return the value corresponding to the key in the map, or @@ -820,21 +970,22 @@ impl, V, S, H: Hasher> HashMap { a: A, found: |&K, &mut V, A|, not_found: |&K, A| -> V) - -> &'a mut V { + -> &'a mut V + { let hash = self.make_hash(&k); - { - match search_hashed(&mut self.table, &hash, &k) { - Some(bucket) => { - let (_, v_ref) = bucket.into_mut_refs(); - found(&k, v_ref, a); - return v_ref; - } - _ => { - } - }; + let this = MapMutRef { map_ref: self }; + + match search_hashed(this, &hash, &k) { + FoundExisting(bucket) => { + let (_, v_ref) = bucket.into_mut_refs(); + found(&k, v_ref, a); + v_ref + } + TableRef(this) => { + let v = not_found(&k, a); + this.map_ref.insert_hashed(hash, k, v) + } } - let v = not_found(&k, a); - self.insert_hashed(hash, k, v) } /// Retrieves a value for the given key. @@ -996,7 +1147,7 @@ impl, V, S, H: Hasher> HashMap { /// println!("{}", key); /// } /// ``` - pub fn keys<'a>(&'a self) -> Keys<'a, K, V> { + pub fn keys(&self) -> Keys { self.iter().map(|(k, _v)| k) } @@ -1017,7 +1168,7 @@ impl, V, S, H: Hasher> HashMap { /// println!("{}", key); /// } /// ``` - pub fn values<'a>(&'a self) -> Values<'a, K, V> { + pub fn values(&self) -> Values { self.iter().map(|(_k, v)| v) } @@ -1038,8 +1189,8 @@ impl, V, S, H: Hasher> HashMap { /// println!("key: {} val: {}", key, val); /// } /// ``` - pub fn iter<'a>(&'a self) -> Entries<'a, K, V> { - self.table.iter() + pub fn iter(&self) -> Entries { + Entries { inner: self.table.iter() } } /// An iterator visiting all key-value pairs in arbitrary order, @@ -1065,8 +1216,8 @@ impl, V, S, H: Hasher> HashMap { /// println!("key: {} val: {}", key, val); /// } /// ``` - pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> { - self.table.mut_iter() + pub fn mut_iter(&mut self) -> MutEntries { + MutEntries { inner: self.table.mut_iter() } } /// Creates a consuming iterator, that is, one that moves each key-value @@ -1087,7 +1238,9 @@ impl, V, S, H: Hasher> HashMap { /// let vec: Vec<(&str, int)> = map.move_iter().collect(); /// ``` pub fn move_iter(self) -> MoveEntries { - self.table.move_iter().map(|(_, k, v)| (k, v)) + MoveEntries { + inner: self.table.move_iter().map(|(_, k, v)| (k, v)) + } } } @@ -1131,13 +1284,9 @@ impl, V: PartialEq, S, H: Hasher> PartialEq for HashMap) -> bool { if self.len() != other.len() { return false; } - self.iter() - .all(|(key, value)| { - match other.find(key) { - None => false, - Some(v) => *value == *v - } - }) + self.iter().all(|(key, value)| + other.find(key).map_or(false, |v| *value == *v) + ) } } @@ -1178,14 +1327,52 @@ impl, V, S, H: Hasher> Index for HashMap { }*/ /// HashMap iterator -pub type Entries<'a, K, V> = table::Entries<'a, K, V>; +pub struct Entries<'a, K: 'a, V: 'a> { + inner: table::Entries<'a, K, V> +} /// HashMap mutable values iterator -pub type MutEntries<'a, K, V> = table::MutEntries<'a, K, V>; +pub struct MutEntries<'a, K: 'a, V: 'a> { + inner: table::MutEntries<'a, K, V> +} /// HashMap move iterator -pub type MoveEntries = - iter::Map<'static, (table::SafeHash, K, V), (K, V), table::MoveEntries>; +pub struct MoveEntries { + inner: iter::Map<'static, (SafeHash, K, V), (K, V), table::MoveEntries> +} + +impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { + #[inline] + fn next(&mut self) -> Option<(&'a K, &'a V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (uint, Option) { + self.inner.size_hint() + } +} + +impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { + #[inline] + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (uint, Option) { + self.inner.size_hint() + } +} + +impl Iterator<(K, V)> for MoveEntries { + #[inline] + fn next(&mut self) -> Option<(K, V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (uint, Option) { + self.inner.size_hint() + } +} /// HashMap keys iterator pub type Keys<'a, K, V> = @@ -1266,7 +1453,6 @@ mod test_map { k: uint } - impl Dropable { fn new(k: uint) -> Dropable { let v = drop_vector.get().unwrap(); @@ -1371,6 +1557,7 @@ mod test_map { hm }; + // By the way, ensure that cloning doesn't screw up the dropping. drop(hm.clone()); { @@ -1505,6 +1692,28 @@ mod test_map { assert_eq!(*m.find(&1).unwrap(), 2); } + #[test] + fn test_update_with() { + let mut m = HashMap::with_capacity(4); + assert!(m.insert(1i, 2i)); + + for i in range(1i, 1000) { + assert_eq!( + i + 2, + *m.insert_or_update_with(i + 1, i + 2, |_k, _v| { + fail!("Key not yet present"); + }) + ); + assert_eq!( + i + 1, + *m.insert_or_update_with(i, i + 3, |k, v| { + assert_eq!(*k, i); + assert_eq!(*v, i + 1); + }) + ); + } + } + #[test] fn test_conflict_remove() { let mut m = HashMap::with_capacity(4); @@ -1698,6 +1907,7 @@ mod test_map { m.insert(i, i); i += 1; } + // three quarters full assert_eq!(m.len(), i); assert_eq!(m.table.capacity(), cap); @@ -1706,16 +1916,18 @@ mod test_map { m.insert(i, i); i += 1; } + // half full let new_cap = m.table.capacity(); assert_eq!(new_cap, cap * 2); - for _ in range(0, cap / 2) { + for _ in range(0, cap / 2 - 1) { i -= 1; m.remove(&i); assert_eq!(m.table.capacity(), new_cap); } - + // A little more than one quarter full. + // Shrinking starts as we remove more elements: for _ in range(0, cap / 2 - 1) { i -= 1; m.remove(&i); diff --git a/src/libstd/collections/hashmap/mod.rs b/src/libstd/collections/hashmap/mod.rs index f493e844526ee..b5612ce0f077d 100644 --- a/src/libstd/collections/hashmap/mod.rs +++ b/src/libstd/collections/hashmap/mod.rs @@ -12,6 +12,7 @@ pub use self::map::HashMap; pub use self::map::Entries; +pub use self::map::MutEntries; pub use self::map::MoveEntries; pub use self::map::Keys; pub use self::map::Values; diff --git a/src/libstd/collections/hashmap/set.rs b/src/libstd/collections/hashmap/set.rs index a1f71e333033f..4a2a04cbc9f66 100644 --- a/src/libstd/collections/hashmap/set.rs +++ b/src/libstd/collections/hashmap/set.rs @@ -16,8 +16,7 @@ use collections::{Collection, Mutable, Set, MutableSet, Map, MutableMap}; use default::Default; use fmt::Show; use fmt; -use RandomSipHasher; -use hash::{Hash, Hasher}; +use hash::{Hash, Hasher, RandomSipHasher}; use iter::{Iterator, FromIterator, FilterMap, Chain, Repeat, Zip, Extendable}; use iter; use option::{Some, None}; @@ -25,13 +24,13 @@ use result::{Ok, Err}; use super::{HashMap, Entries, MoveEntries, INITIAL_CAPACITY}; -/// HashSet iterator -pub type SetItems<'a, K> = - iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>; -/// HashSet move iterator -pub type SetMoveItems = - iter::Map<'static, (K, ()), K, MoveEntries>; +// Future Optimization (FIXME!) +// ============================= +// +// Iteration over zero sized values is a noop. There is no need +// for `bucket.val` in the case of HashSet. I suppose we would need HKT +// to get rid of it properly. /// An implementation of a hash set using the underlying representation of a /// HashMap where the value is (). As with the `HashMap` type, a `HashSet` @@ -444,6 +443,14 @@ impl, S, H: Hasher + Default> Default for HashSet { } } +/// HashSet iterator +pub type SetItems<'a, K> = + iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>; + +/// HashSet move iterator +pub type SetMoveItems = + iter::Map<'static, (K, ()), K, MoveEntries>; + // `Repeat` is used to feed the filter closure an explicit capture // of a reference to the other set /// Set operations iterator diff --git a/src/libstd/collections/hashmap/table.rs b/src/libstd/collections/hashmap/table.rs index 96d1a9ba2fbb5..54469baaef5fa 100644 --- a/src/libstd/collections/hashmap/table.rs +++ b/src/libstd/collections/hashmap/table.rs @@ -14,14 +14,13 @@ use clone::Clone; use cmp; use hash::{Hash, Hasher}; use iter::{Iterator, count}; +use kinds::marker; use mem::{min_align_of, size_of}; use mem; -use num::{CheckedMul, is_power_of_two}; +use num::{CheckedAdd, CheckedMul, is_power_of_two}; use ops::{Deref, DerefMut, Drop}; use option::{Some, None, Option}; -use ptr::RawPtr; -use ptr::set_memory; -use ptr::write; +use ptr::{RawPtr, copy_nonoverlapping_memory, zero_memory}; use ptr; use rt::heap::{allocate, deallocate}; @@ -34,17 +33,17 @@ static EMPTY_BUCKET: u64 = 0u64; /// `Vec>`, because we don't pay for the overhead of an /// option on every element, and we get a generally more cache-aware design. /// -/// Key invariants of this structure: +/// Essential invariants of this structure: /// -/// - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have -/// 'undefined' contents. Don't read from them. This invariant is -/// enforced outside this module with the `EmptyIndex`, `FullIndex`, +/// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw` +/// points to 'undefined' contents. Don't read from it. This invariant is +/// enforced outside this module with the `EmptyBucket`, `FullBucket`, /// and `SafeHash` types. /// -/// - An `EmptyIndex` is only constructed for a bucket at an index with +/// - An `EmptyBucket` is only constructed at an index with /// a hash of EMPTY_BUCKET. /// -/// - A `FullIndex` is only constructed for a bucket at an index with a +/// - A `FullBucket` is only constructed at an index with a /// non-EMPTY_BUCKET hash. /// /// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get @@ -56,48 +55,21 @@ static EMPTY_BUCKET: u64 = 0u64; /// `capacity`. This is set at creation and never changes. The arrays /// are unzipped to save space (we don't have to pay for the padding /// between odd sized elements, such as in a map from u64 to u8), and -/// be more cache aware (scanning through 8 hashes brings in 2 cache -/// lines, since they're all right beside each other). +/// be more cache aware (scanning through 8 hashes brings in at most +/// 2 cache lines, since they're all right beside each other). /// /// You can kind of think of this module/data structure as a safe wrapper /// around just the "table" part of the hashtable. It enforces some /// invariants at the type level and employs some performance trickery, /// but in general is just a tricked out `Vec>`. -/// -/// FIXME(cgaebel): -/// -/// Feb 11, 2014: This hashtable was just implemented, and, hard as I tried, -/// isn't yet totally safe. There's a "known exploit" that you can create -/// multiple FullIndexes for a bucket, `take` one, and then still `take` -/// the other causing undefined behavior. Currently, there's no story -/// for how to protect against this statically. Therefore, there are asserts -/// on `take`, `get`, `get_mut`, and `put` which check the bucket state. -/// With time, and when we're confident this works correctly, they should -/// be removed. Also, the bounds check in `peek` is especially painful, -/// as that's called in the innermost loops of the hashtable and has the -/// potential to be a major performance drain. Remove this too. -/// -/// Or, better than remove, only enable these checks for debug builds. -/// There's currently no "debug-only" asserts in rust, so if you're reading -/// this and going "what? of course there are debug-only asserts!", then -/// please make this use them! #[unsafe_no_drop_flag] pub struct RawTable { capacity: uint, size: uint, - hashes: *mut u64 -} - -/// A bucket that holds a reference to the table -pub trait BucketWithTable { - /// A bucket that holds a reference to the table - fn table<'a>(&'a self) -> &'a M; - - /// Move out the reference to the table. - fn into_table(self) -> M; - - /// Get the raw index. - fn index(&self) -> uint; + hashes: *mut u64, + // Because K/V do not appear directly in any of the types in the struct, + // inform rustc that in fact instances of K and V are reachable from here. + marker: marker::CovariantType<(K,V)>, } struct RawBucket { @@ -124,47 +96,66 @@ pub struct FullBucket { table: M } -pub type EmptyBucketImm<'table,K,V> = EmptyBucket>; -pub type FullBucketImm<'table,K,V> = FullBucket>; +pub type EmptyBucketImm<'table, K, V> = EmptyBucket>; +pub type FullBucketImm<'table, K, V> = FullBucket>; -pub type EmptyBucketMut<'table,K,V> = EmptyBucket>; -pub type FullBucketMut<'table,K,V> = FullBucket>; +pub type EmptyBucketMut<'table, K, V> = EmptyBucket>; +pub type FullBucketMut<'table, K, V> = FullBucket>; +pub enum BucketState { + Empty(EmptyBucket), + Full(FullBucket), +} + +// A GapThenFull encapsulates the state of two consecutive buckets at once. +// The first bucket, called the gap, is known to be empty. +// The second bucket is full. struct GapThenFull { gap: EmptyBucket, - full: FullBucket + full: FullBucket, } -impl>> GapThenFull { - pub fn full<'a>(&'a self) -> &'a FullBucket { - &self.full - } - - pub fn shift(mut self) -> Option> { - unsafe { - *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); - mem::overwrite(self.gap.raw.key, ptr::read(self.full.raw.key as *const K)); - mem::overwrite(self.gap.raw.val, ptr::read(self.full.raw.val as *const V)); - } - - let FullBucket { raw, idx, .. } = self.full; - - match self.full.next().peek() { - Empty(_) => None, - Full(bucket) => { - self.gap.raw = raw; - self.gap.idx = idx; +/// A hash that is not zero, since we use a hash of zero to represent empty +/// buckets. +#[deriving(PartialEq)] +pub struct SafeHash { + hash: u64, +} - self.full = bucket; - self.full.idx &= self.full.table.capacity - 1; +impl SafeHash { + /// Peek at the hash value, which is guaranteed to be non-zero. + #[inline(always)] + pub fn inspect(&self) -> u64 { self.hash } +} - Some(self) - } - } +/// We need to remove hashes of 0. That's reserved for empty buckets. +/// This function wraps up `hash_keyed` to be the only way outside this +/// module to generate a SafeHash. +pub fn make_hash, S, H: Hasher>(hasher: &H, t: &T) -> SafeHash { + match hasher.hash(t) { + // This constant is exceedingly likely to hash to the same + // bucket, but it won't be counted as empty! Just so we can maintain + // our precious uniform distribution of initial indexes. + EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 }, + h => SafeHash { hash: h }, } } -impl RawPtr for RawBucket { +// `replace` casts a `*u64` to a `*SafeHash`. Since we statically +// ensure that a `FullBucket` points to an index with a non-zero hash, +// and a `SafeHash` is just a `u64` with a different name, this is +// safe. +// +// This test ensures that a `SafeHash` really IS the same size as a +// `u64`. If you need to change the size of `SafeHash` (and +// consequently made this test fail), `replace` needs to be +// modified to no longer assume this. +#[test] +fn can_alias_safehash_as_u64() { + assert_eq!(size_of::(), size_of::()) +} + +impl RawBucket { unsafe fn offset(self, count: int) -> RawBucket { RawBucket { hash: self.hash.offset(count), @@ -172,35 +163,143 @@ impl RawPtr for RawBucket { val: self.val.offset(count), } } +} - fn null() -> RawBucket { - RawBucket { - hash: RawPtr::null(), - key: RawPtr::null(), - val: RawPtr::null() +// For parameterizing over mutability. +impl<'t, K, V> Deref> for &'t RawTable { + fn deref(&self) -> &RawTable { + &**self + } +} + +impl<'t, K, V> Deref> for &'t mut RawTable { + fn deref(&self) -> &RawTable { + &**self + } +} + +impl<'t, K, V> DerefMut> for &'t mut RawTable { + fn deref_mut(&mut self) -> &mut RawTable { + &mut **self + } +} + +// Buckets hold references to the table. +impl FullBucket { + /// Borrow a reference to the table. + pub fn table(&self) -> &M { + &self.table + } + /// Move out the reference to the table. + pub fn into_table(self) -> M { + self.table + } + /// Get the raw index. + pub fn index(&self) -> uint { + self.idx + } +} + +impl EmptyBucket { + /// Borrow a reference to the table. + pub fn table(&self) -> &M { + &self.table + } + /// Move out the reference to the table. + pub fn into_table(self) -> M { + self.table + } +} + +impl Bucket { + /// Move out the reference to the table. + pub fn into_table(self) -> M { + self.table + } + /// Get the raw index. + pub fn index(&self) -> uint { + self.idx + } +} + +impl>> Bucket { + pub fn new(table: M, hash: &SafeHash) -> Bucket { + Bucket::at_index(table, hash.inspect() as uint) + } + + pub fn at_index(table: M, ib_index: uint) -> Bucket { + let ib_index = ib_index & (table.capacity() - 1); + Bucket { + raw: unsafe { + table.first_bucket_raw().offset(ib_index as int) + }, + idx: ib_index, + table: table } } - fn is_null(&self) -> bool { - self.hash.is_null() + pub fn first(table: M) -> Bucket { + Bucket { + raw: table.first_bucket_raw(), + idx: 0, + table: table + } } - fn to_uint(&self) -> uint { - self.hash.to_uint() + /// Reads a bucket at a given index, returning an enum indicating whether + /// it's initialized or not. You need to match on this enum to get + /// the appropriate types to call most of the other functions in + /// this module. + pub fn peek(self) -> BucketState { + match unsafe { *self.raw.hash } { + EMPTY_BUCKET => + Empty(EmptyBucket { + raw: self.raw, + idx: self.idx, + table: self.table + }), + _ => + Full(FullBucket { + raw: self.raw, + idx: self.idx, + table: self.table + }) + } } - unsafe fn to_option(&self) -> Option<&u64> { - self.hash.to_option() + /// Modifies the bucket pointer in place to make it point to the next slot. + pub fn next(&mut self) { + // Branchless bucket iteration step. + // As we reach the end of the table... + // We take the current idx: 0111111b + // Xor it by its increment: ^ 1000000b + // ------------ + // 1111111b + // Then AND with the capacity: & 1000000b + // ------------ + // to get the backwards offset: 1000000b + // ... and it's zero at all other times. + let maybe_wraparound_dist = (self.idx ^ (self.idx + 1)) & self.table.capacity(); + // Finally, we obtain the offset 1 or the offset -cap + 1. + let dist = 1i - (maybe_wraparound_dist as int); + + self.idx += 1; + + unsafe { + self.raw = self.raw.offset(dist); + } } } -impl>> EmptyBucket { +impl>> EmptyBucket { + #[inline] pub fn next(self) -> Bucket { let mut bucket = self.into_bucket(); bucket.next(); bucket } + #[inline] pub fn into_bucket(self) -> Bucket { Bucket { raw: self.raw, @@ -217,24 +316,31 @@ impl>> EmptyBucket { }; match self.next().peek() { - Empty(_) => None, Full(bucket) => { Some(GapThenFull { gap: gap, full: bucket }) } + Empty(..) => None } } } -impl>> EmptyBucket { +impl>> EmptyBucket { + /// Puts given key and value pair, along with the key's hash, + /// into this bucket in the hashtable. Note how `self` is 'moved' into + /// this function, because this slot will no longer be empty when + /// we return! A `FullBucket` is returned for later use, pointing to + /// the newly-filled slot in the hashtable. + /// + /// Use `make_hash` to construct a `SafeHash` to pass to this function. pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket { unsafe { *self.raw.hash = hash.inspect(); - write(self.raw.key, key); - write(self.raw.val, value); + ptr::write(self.raw.key, key); + ptr::write(self.raw.val, value); } self.table.size += 1; @@ -243,13 +349,15 @@ impl>> EmptyBucket { } } -impl>> FullBucket { +impl>> FullBucket { + #[inline] pub fn next(self) -> Bucket { let mut bucket = self.into_bucket(); bucket.next(); bucket } + #[inline] pub fn into_bucket(self) -> Bucket { Bucket { raw: self.raw, @@ -258,10 +366,19 @@ impl>> FullBucket { } } + /// Get the distance between this bucket and the 'ideal' location + /// as determined by the key's hash stored in it. + /// + /// In the cited blog posts above, this is called the "distance to + /// initial bucket", or DIB. Also known as "probe count". pub fn distance(&self) -> uint { + // Calculates the distance one has to travel when going from + // `hash mod capacity` onwards to `idx mod capacity`, wrapping around + // if the destination is not reached before the end of the table. (self.idx - self.hash().inspect() as uint) & (self.table.capacity() - 1) } + #[inline] pub fn hash(&self) -> SafeHash { unsafe { SafeHash { @@ -270,23 +387,20 @@ impl>> FullBucket { } } - pub fn read<'a>(&'a self) -> (&'a K, &'a V) { - unsafe { - (&*self.raw.key, - &*self.raw.val) - } - } - - pub fn into_refs(self) -> (&K, &V) { + /// Gets references to the key and value at a given index. + pub fn read(&self) -> (&K, &V) { unsafe { - // debug_assert!(*self.raw.hash != EMPTY_BUCKET); (&*self.raw.key, &*self.raw.val) } } } -impl>> FullBucket { +impl>> FullBucket { + /// Removes this bucket's key and value from the hashtable. + /// + /// This works similarly to `put`, building an `EmptyBucket` out of the + /// taken bucket. pub fn take(mut self) -> (EmptyBucket, K, V) { let key = self.raw.key as *const K; let val = self.raw.val as *const V; @@ -317,176 +431,86 @@ impl>> FullBucket { } } - pub fn read_mut<'a>(&'a self) -> (&'a mut K, &'a mut V) { + /// Gets mutable references to the key and value at a given index. + pub fn read_mut(&mut self) -> (&mut K, &mut V) { unsafe { - // debug_assert!(*self.raw.hash != EMPTY_BUCKET); (&mut *self.raw.key, &mut *self.raw.val) } } +} - pub fn into_mut_refs(self) -> (&mut K, &mut V) { +impl<'t, K, V, M: Deref> + 't> FullBucket { + /// Exchange a bucket state for immutable references into the table. + /// Because the underlying reference to the table is also consumed, + /// no further changes to the structure of the table are possible; + /// in exchange for this, the returned references have a longer lifetime + /// than the references returned by `read()`. + pub fn into_refs(self) -> (&'t K, &'t V) { unsafe { - // debug_assert!(*self.raw.hash != EMPTY_BUCKET); - (&mut *self.raw.key, - &mut *self.raw.val) + (&*self.raw.key, + &*self.raw.val) } } } -impl>> Bucket { - pub fn new(table: M, hash: &SafeHash) -> Bucket { - let ib_index = (hash.inspect() as uint) & (table.capacity() - 1); - Bucket { - raw: unsafe { - table.as_mut_ptrs().offset(ib_index as int) - }, - idx: ib_index, - table: table - } - } - - pub fn at_index(table: M, ib_index: uint) -> Bucket { - let ib_index = ib_index & (table.capacity() - 1); - Bucket { - raw: unsafe { - table.as_mut_ptrs().offset(ib_index as int) - }, - idx: ib_index, - table: table - } - } - - pub fn first(table: M) -> Bucket { - Bucket { - raw: table.as_mut_ptrs(), - idx: 0, - table: table - } - } - - pub fn peek(self) -> BucketState { - match unsafe { *self.raw.hash } { - EMPTY_BUCKET => - Empty(EmptyBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }), - _ => - Full(FullBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }) - } - } - - pub fn next(&mut self) { - self.idx += 1; - - let dist = if self.idx == self.table.capacity() { - -(self.table.capacity() as int - 1) - } else { - 1i - }; - +impl<'t, K, V, M: DerefMut> + 't> FullBucket { + /// This works similarly to `into_refs`, exchanging a bucket state + /// for mutable references into the table. + pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { unsafe { - self.raw = self.raw.offset(dist); + (&mut *self.raw.key, + &mut *self.raw.val) } } } -impl BucketWithTable for FullBucket { - fn table<'a>(&'a self) -> &'a M { - &self.table - } - - fn into_table(self) -> M { - self.table - } - - fn index(&self) -> uint { - self.idx - } -} - -impl BucketWithTable for EmptyBucket { - fn table<'a>(&'a self) -> &'a M { - &self.table - } - - fn into_table(self) -> M { - self.table - } - - fn index(&self) -> uint { - self.idx +impl BucketState { + // For convenience. + pub fn expect_full(self) -> FullBucket { + match self { + Full(full) => full, + Empty(..) => fail!("Expected full bucket") + } } } -impl BucketWithTable for Bucket { - fn table<'a>(&'a self) -> &'a M { - &self.table +impl>> GapThenFull { + #[inline] + pub fn full(&self) -> &FullBucket { + &self.full } - fn into_table(self) -> M { - self.table - } + pub fn shift(mut self) -> Option> { + unsafe { + *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); + copy_nonoverlapping_memory(self.gap.raw.key, self.full.raw.key as *const K, 1); + copy_nonoverlapping_memory(self.gap.raw.val, self.full.raw.val as *const V, 1); + } - fn index(&self) -> uint { - self.idx - } -} + let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full; -impl<'table,K,V> Deref> for &'table RawTable { - fn deref<'a>(&'a self) -> &'a RawTable { - &**self - } -} + match self.full.next().peek() { + Full(bucket) => { + self.gap.raw = prev_raw; + self.gap.idx = prev_idx; -impl<'table,K,V> Deref> for &'table mut RawTable { - fn deref<'a>(&'a self) -> &'a RawTable { - &**self - } -} + self.full = bucket; -impl<'table,K,V> DerefMut> for &'table mut RawTable { - fn deref_mut<'a>(&'a mut self) -> &'a mut RawTable { - &mut **self + Some(self) + } + Empty(..) => None + } } } -pub enum BucketState { - Empty(EmptyBucket), - Full(FullBucket), -} - -/// A hash that is not zero, since we use a hash of zero to represent empty -/// buckets. -#[deriving(PartialEq)] -pub struct SafeHash { - hash: u64, -} - -impl SafeHash { - /// Peek at the hash value, which is guaranteed to be non-zero. - #[inline(always)] - pub fn inspect(&self) -> u64 { self.hash } -} - -/// We need to remove hashes of 0. That's reserved for empty buckets. -/// This function wraps up `hash_keyed` to be the only way outside this -/// module to generate a SafeHash. -pub fn make_hash, S, H: Hasher>(hasher: &H, t: &T) -> SafeHash { - match hasher.hash(t) { - // This constant is exceedingly likely to hash to the same - // bucket, but it won't be counted as empty! - EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 }, - h => SafeHash { hash: h }, - } -} +/// Rounds up to a multiple of a power of two. Returns the closest multiple +/// of `target_alignment` that is higher or equal to `unrounded`. +/// +/// # Failure +/// +/// Fails if `target_alignment` is not a power of two. fn round_up_to_next(unrounded: uint, target_alignment: uint) -> uint { assert!(is_power_of_two(target_alignment)); (unrounded + target_alignment - 1) & !(target_alignment - 1) @@ -531,7 +555,6 @@ fn test_offset_calculation() { } impl RawTable { - /// Does not initialize the buckets. The caller should ensure they, /// at the very least, set every hash to EMPTY_BUCKET. unsafe fn new_uninitialized(capacity: uint) -> RawTable { @@ -540,6 +563,7 @@ impl RawTable { size: 0, capacity: 0, hashes: 0 as *mut u64, + marker: marker::CovariantType, }; } let hashes_size = capacity.checked_mul(&size_of::()) @@ -571,17 +595,18 @@ impl RawTable { capacity: capacity, size: 0, hashes: hashes, + marker: marker::CovariantType, } } - fn as_mut_ptrs(&self) -> RawBucket { + fn first_bucket_raw(&self) -> RawBucket { let hashes_size = self.capacity * size_of::(); let keys_size = self.capacity * size_of::(); - let keys_offset = (hashes_size + min_align_of::< K >() - 1) & !(min_align_of::< K >() - 1); + let keys_offset = (hashes_size + min_align_of::() - 1) & !(min_align_of::() - 1); let end_of_keys = keys_offset + keys_size; - let vals_offset = (end_of_keys + min_align_of::< V >() - 1) & !(min_align_of::< V >() - 1); + let vals_offset = (end_of_keys + min_align_of::() - 1) & !(min_align_of::() - 1); let buffer = self.hashes as *mut u8; @@ -600,7 +625,7 @@ impl RawTable { pub fn new(capacity: uint) -> RawTable { unsafe { let ret = RawTable::new_uninitialized(capacity); - set_memory(ret.hashes, 0u8, capacity); + zero_memory(ret.hashes, capacity); ret } } @@ -616,49 +641,51 @@ impl RawTable { self.size } - fn ptrs<'a>(&'a self) -> RawBuckets<'a, K, V> { + fn raw_buckets(&self) -> RawBuckets { RawBuckets { - raw: self.as_mut_ptrs(), + raw: self.first_bucket_raw(), hashes_end: unsafe { self.hashes.offset(self.capacity as int) } } } - pub fn iter<'a>(&'a self) -> Entries<'a, K, V> { + pub fn iter(&self) -> Entries { Entries { - iter: self.ptrs(), + iter: self.raw_buckets(), elems_left: self.size(), } } - pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> { + pub fn mut_iter(&mut self) -> MutEntries { MutEntries { - iter: self.ptrs(), + iter: self.raw_buckets(), elems_left: self.size(), } } pub fn move_iter(self) -> MoveEntries { MoveEntries { - iter: self.ptrs(), + iter: self.raw_buckets(), table: self, } } - pub fn rev_move_buckets<'a>(&'a mut self) -> RevMoveBuckets<'a, K, V> { - let raw_bucket = self.as_mut_ptrs(); - unsafe { - RevMoveBuckets { - raw: raw_bucket.offset(self.capacity as int), - hashes_end: raw_bucket.hash, - elems_left: self.size - } + /// Returns an iterator that copies out each entry. Used while the table + /// is being dropped. + unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets { + let raw_bucket = self.first_bucket_raw(); + RevMoveBuckets { + raw: raw_bucket.offset(self.capacity as int), + hashes_end: raw_bucket.hash, + elems_left: self.size } } } -pub struct RawBuckets<'a, K, V> { +/// A raw iterator. The basis for some other iterators in this module. Although +/// this interface is safe, it's not used outside this module. +struct RawBuckets<'a, K, V> { raw: RawBucket, hashes_end: *mut u64 } @@ -667,6 +694,8 @@ impl<'a, K, V> Iterator> for RawBuckets<'a, K, V> { fn next(&mut self) -> Option> { while self.raw.hash != self.hashes_end { unsafe { + // We are swapping out the pointer to a bucket and replacing + // it with the pointer to the next one. let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); if *prev.hash != EMPTY_BUCKET { return Some(prev); @@ -678,7 +707,10 @@ impl<'a, K, V> Iterator> for RawBuckets<'a, K, V> { } } -pub struct RevMoveBuckets<'a, K, V> { +/// An iterator that moves out buckets in reverse order. It leaves the table +/// in an an inconsistent state and should only be used for dropping +/// the table's remaining entries. It's used in the implementation of Drop. +struct RevMoveBuckets<'a, K, V> { raw: RawBucket, hashes_end: *mut u64, elems_left: uint @@ -708,43 +740,13 @@ impl<'a, K, V> Iterator<(K, V)> for RevMoveBuckets<'a, K, V> { } } -// `read_all_mut` casts a `*u64` to a `*SafeHash`. Since we statically -// ensure that a `FullIndex` points to an index with a non-zero hash, -// and a `SafeHash` is just a `u64` with a different name, this is -// safe. -// -// This test ensures that a `SafeHash` really IS the same size as a -// `u64`. If you need to change the size of `SafeHash` (and -// consequently made this test fail), `read_all_mut` needs to be -// modified to no longer assume this. -#[test] -fn can_alias_safehash_as_u64() { - assert_eq!(size_of::(), size_of::()) -} - -/// Note: stage0-specific version that lacks bound. -#[cfg(stage0)] -pub struct Entries<'a, K, V> { - iter: RawBuckets<'a, K, V>, - elems_left: uint, -} - /// Iterator over shared references to entries in a table. -#[cfg(not(stage0))] pub struct Entries<'a, K: 'a, V: 'a> { iter: RawBuckets<'a, K, V>, elems_left: uint, } -/// Note: stage0-specific version that lacks bound. -#[cfg(stage0)] -pub struct MutEntries<'a, K, V> { - iter: RawBuckets<'a, K, V>, - elems_left: uint, -} - /// Iterator over mutable references to entries in a table. -#[cfg(not(stage0))] pub struct MutEntries<'a, K: 'a, V: 'a> { iter: RawBuckets<'a, K, V>, elems_left: uint, @@ -830,14 +832,14 @@ impl Clone for RawTable { mem::overwrite(new_buckets.raw.key, k); mem::overwrite(new_buckets.raw.val, v); } - _ => { + Empty(..) => { *new_buckets.raw.hash = EMPTY_BUCKET; } } new_buckets.next(); buckets.next(); } - } + }; new_ht.size = self.size(); @@ -852,12 +854,14 @@ impl Drop for RawTable { if self.hashes.is_null() { return; } - // This is in reverse because we're likely to have partially taken + // This is done in reverse because we've likely partially taken // some elements out with `.move_iter()` from the front. // Check if the size is 0, so we don't do a useless scan when // dropping empty tables such as on resize. - // Avoid double free of elements already moved out. - for _ in self.rev_move_buckets() {} + // Also avoid double drop of elements that have been already moved out. + unsafe { + for _ in self.rev_move_buckets() {} + } let hashes_size = self.capacity * size_of::(); let keys_size = self.capacity * size_of::(); @@ -871,7 +875,5 @@ impl Drop for RawTable { // Remember how everything was allocated out of one buffer // during initialization? We only need one call to free here. } - - self.hashes = RawPtr::null(); } }