From 22d489be76271e36259ab1c7f76dbd88e6fdca2e Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Wed, 27 Jan 2021 14:28:07 +0100 Subject: [PATCH] Let a portion of DefPathHash uniquely identify the DefPath's crate. This allows to directly map from a DefPathHash to the crate it originates from, without constructing side tables to do that mapping. It also allows to reliably and cheaply check for DefPathHash collisions. --- compiler/rustc_ast/src/lib.rs | 1 - .../rustc_data_structures/src/fingerprint.rs | 15 +++- compiler/rustc_hir/src/definitions.rs | 61 +++++++++----- compiler/rustc_metadata/src/creader.rs | 30 ++++++- compiler/rustc_metadata/src/locator.rs | 8 ++ compiler/rustc_metadata/src/rmeta/decoder.rs | 4 + compiler/rustc_metadata/src/rmeta/encoder.rs | 1 + compiler/rustc_metadata/src/rmeta/mod.rs | 3 +- compiler/rustc_session/src/session.rs | 2 +- .../src/crate_disambiguator.rs | 0 compiler/rustc_span/src/def_id.rs | 81 +++++++++++++++++++ compiler/rustc_span/src/lib.rs | 2 + 12 files changed, 181 insertions(+), 27 deletions(-) rename compiler/{rustc_ast => rustc_span}/src/crate_disambiguator.rs (100%) diff --git a/compiler/rustc_ast/src/lib.rs b/compiler/rustc_ast/src/lib.rs index 08695491de756..5ddaf9c950a3b 100644 --- a/compiler/rustc_ast/src/lib.rs +++ b/compiler/rustc_ast/src/lib.rs @@ -41,7 +41,6 @@ pub mod util { pub mod ast; pub mod attr; -pub mod crate_disambiguator; pub mod entry; pub mod expand; pub mod mut_visit; diff --git a/compiler/rustc_data_structures/src/fingerprint.rs b/compiler/rustc_data_structures/src/fingerprint.rs index 08c3419a8421d..fecc51324cc67 100644 --- a/compiler/rustc_data_structures/src/fingerprint.rs +++ b/compiler/rustc_data_structures/src/fingerprint.rs @@ -7,11 +7,17 @@ use std::hash::{Hash, Hasher}; use std::mem::{self, MaybeUninit}; #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy)] +#[repr(C)] pub struct Fingerprint(u64, u64); impl Fingerprint { pub const ZERO: Fingerprint = Fingerprint(0, 0); + #[inline] + pub fn new(_0: u64, _1: u64) -> Fingerprint { + Fingerprint(_0, _1) + } + #[inline] pub fn from_smaller_hash(hash: u64) -> Fingerprint { Fingerprint(hash, hash) @@ -19,7 +25,12 @@ impl Fingerprint { #[inline] pub fn to_smaller_hash(&self) -> u64 { - self.0 + // Even though both halves of the fingerprint are expected to be good + // quality hash values, let's still combine the two values because the + // Fingerprints in DefPathHash have the StableCrateId portion which is + // the same for all DefPathHashes from the same crate. Combining the + // two halfs makes sure we get a good quality hash in such cases too. + self.0.wrapping_mul(3).wrapping_add(self.1) } #[inline] @@ -93,7 +104,7 @@ impl FingerprintHasher for crate::unhash::Unhasher { #[inline] fn write_fingerprint(&mut self, fingerprint: &Fingerprint) { // `Unhasher` only wants a single `u64` - self.write_u64(fingerprint.0); + self.write_u64(fingerprint.0.wrapping_add(fingerprint.1)); } } diff --git a/compiler/rustc_hir/src/definitions.rs b/compiler/rustc_hir/src/definitions.rs index 6a1b9bdbb9491..7ab55db509b02 100644 --- a/compiler/rustc_hir/src/definitions.rs +++ b/compiler/rustc_hir/src/definitions.rs @@ -5,13 +5,16 @@ //! expressions) that are mostly just leftovers. pub use crate::def_id::DefPathHash; -use crate::def_id::{CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_INDEX, LOCAL_CRATE}; +use crate::def_id::{ + CrateNum, DefId, DefIndex, LocalDefId, StableCrateId, CRATE_DEF_INDEX, LOCAL_CRATE, +}; use crate::hir; -use rustc_ast::crate_disambiguator::CrateDisambiguator; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::stable_hasher::StableHasher; +use rustc_data_structures::unhash::UnhashMap; use rustc_index::vec::IndexVec; +use rustc_span::crate_disambiguator::CrateDisambiguator; use rustc_span::hygiene::ExpnId; use rustc_span::symbol::{kw, sym, Symbol}; @@ -27,6 +30,7 @@ use tracing::debug; pub struct DefPathTable { index_to_key: IndexVec, def_path_hashes: IndexVec, + def_path_hash_to_index: UnhashMap, } impl DefPathTable { @@ -39,6 +43,25 @@ impl DefPathTable { }; self.def_path_hashes.push(def_path_hash); debug_assert!(self.def_path_hashes.len() == self.index_to_key.len()); + + // Check for hash collisions of DefPathHashes. These should be + // exceedingly rare. + if let Some(existing) = self.def_path_hash_to_index.insert(def_path_hash, index) { + let def_path1 = DefPath::make(LOCAL_CRATE, existing, |idx| self.def_key(idx)); + let def_path2 = DefPath::make(LOCAL_CRATE, index, |idx| self.def_key(idx)); + + // Continuing with colliding DefPathHashes can lead to correctness + // issues. We must abort compilation. + panic!("Found DefPathHash collsion between {:?} and {:?}", def_path1, def_path2); + } + + // Assert that all DefPathHashes correctly contain the local crate's + // StableCrateId + #[cfg(debug_assertions)] + if let Some(root) = self.def_path_hashes.get(CRATE_DEF_INDEX) { + assert!(def_path_hash.stable_crate_id() == root.stable_crate_id()); + } + index } @@ -108,13 +131,10 @@ pub struct DefKey { } impl DefKey { - fn compute_stable_hash(&self, parent_hash: DefPathHash) -> DefPathHash { + fn compute_stable_hash(&self, parent: DefPathHash) -> DefPathHash { let mut hasher = StableHasher::new(); - // We hash a `0u8` here to disambiguate between regular `DefPath` hashes, - // and the special "root_parent" below. - 0u8.hash(&mut hasher); - parent_hash.hash(&mut hasher); + parent.hash(&mut hasher); let DisambiguatedDefPathData { ref data, disambiguator } = self.disambiguated_data; @@ -127,19 +147,13 @@ impl DefKey { disambiguator.hash(&mut hasher); - DefPathHash(hasher.finish()) - } + let local_hash: u64 = hasher.finish(); - fn root_parent_stable_hash( - crate_name: &str, - crate_disambiguator: CrateDisambiguator, - ) -> DefPathHash { - let mut hasher = StableHasher::new(); - // Disambiguate this from a regular `DefPath` hash; see `compute_stable_hash()` above. - 1u8.hash(&mut hasher); - crate_name.hash(&mut hasher); - crate_disambiguator.hash(&mut hasher); - DefPathHash(hasher.finish()) + // Construct the new DefPathHash, making sure that the `crate_id` + // portion of the hash is properly copied from the parent. This way the + // `crate_id` part will be recursively propagated from the root to all + // DefPathHashes in this DefPathTable. + DefPathHash::new(parent.stable_crate_id(), local_hash) } } @@ -295,6 +309,12 @@ impl Definitions { self.table.def_path_hash(id.local_def_index) } + #[inline] + pub fn def_path_hash_to_def_id(&self, def_path_hash: DefPathHash) -> LocalDefId { + let local_def_index = self.table.def_path_hash_to_index[&def_path_hash]; + LocalDefId { local_def_index } + } + /// Returns the path from the crate root to `index`. The root /// nodes are not included in the path (i.e., this will be an /// empty vector for the crate root). For an inlined item, this @@ -332,7 +352,8 @@ impl Definitions { }, }; - let parent_hash = DefKey::root_parent_stable_hash(crate_name, crate_disambiguator); + let stable_crate_id = StableCrateId::new(crate_name, crate_disambiguator); + let parent_hash = DefPathHash::new(stable_crate_id, 0); let def_path_hash = key.compute_stable_hash(parent_hash); // Create the root definition. diff --git a/compiler/rustc_metadata/src/creader.rs b/compiler/rustc_metadata/src/creader.rs index e3fbd1a2b29ea..42e4f1f8fff15 100644 --- a/compiler/rustc_metadata/src/creader.rs +++ b/compiler/rustc_metadata/src/creader.rs @@ -6,11 +6,11 @@ use crate::rmeta::{CrateDep, CrateMetadata, CrateNumMap, CrateRoot, MetadataBlob use rustc_ast::expand::allocator::AllocatorKind; use rustc_ast::{self as ast, *}; -use rustc_data_structures::fx::FxHashSet; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::Lrc; use rustc_expand::base::SyntaxExtension; -use rustc_hir::def_id::{CrateNum, LocalDefId, LOCAL_CRATE}; +use rustc_hir::def_id::{CrateNum, LocalDefId, StableCrateId, LOCAL_CRATE}; use rustc_hir::definitions::Definitions; use rustc_index::vec::IndexVec; use rustc_middle::middle::cstore::{CrateDepKind, CrateSource, ExternCrate}; @@ -40,6 +40,10 @@ pub struct CStore { allocator_kind: Option, /// This crate has a `#[global_allocator]` item. has_global_allocator: bool, + + /// This map is used to verify we get no hash conflicts between + /// `StableCrateId` values. + stable_crate_ids: FxHashMap, } pub struct CrateLoader<'a> { @@ -192,6 +196,11 @@ impl<'a> CrateLoader<'a> { metadata_loader: &'a MetadataLoaderDyn, local_crate_name: &str, ) -> Self { + let local_crate_stable_id = + StableCrateId::new(local_crate_name, sess.local_crate_disambiguator()); + let mut stable_crate_ids = FxHashMap::default(); + stable_crate_ids.insert(local_crate_stable_id, LOCAL_CRATE); + CrateLoader { sess, metadata_loader, @@ -205,6 +214,7 @@ impl<'a> CrateLoader<'a> { injected_panic_runtime: None, allocator_kind: None, has_global_allocator: false, + stable_crate_ids, }, used_extern_options: Default::default(), } @@ -311,6 +321,20 @@ impl<'a> CrateLoader<'a> { res } + fn verify_no_stable_crate_id_hash_conflicts( + &mut self, + root: &CrateRoot<'_>, + cnum: CrateNum, + ) -> Result<(), CrateError> { + if let Some(existing) = self.cstore.stable_crate_ids.insert(root.stable_crate_id(), cnum) { + let crate_name0 = root.name(); + let crate_name1 = self.cstore.get_crate_data(existing).name(); + return Err(CrateError::StableCrateIdCollision(crate_name0, crate_name1)); + } + + Ok(()) + } + fn register_crate( &mut self, host_lib: Option, @@ -332,6 +356,8 @@ impl<'a> CrateLoader<'a> { // Claim this crate number and cache it let cnum = self.cstore.alloc_new_crate_num(); + self.verify_no_stable_crate_id_hash_conflicts(&crate_root, cnum)?; + info!( "register crate `{}` (cnum = {}. private_dep = {})", crate_root.name(), diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index b66c6cffb1b26..a9f58b08f5899 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -888,6 +888,7 @@ crate enum CrateError { MultipleMatchingCrates(Symbol, FxHashMap), SymbolConflictsCurrent(Symbol), SymbolConflictsOthers(Symbol), + StableCrateIdCollision(Symbol, Symbol), DlOpen(String), DlSym(String), LocatorCombined(CombinedLocatorError), @@ -970,6 +971,13 @@ impl CrateError { `-C metadata`. This will result in symbol conflicts between the two.", root_name, ), + CrateError::StableCrateIdCollision(crate_name0, crate_name1) => { + let msg = format!( + "found crates (`{}` and `{}`) with colliding StableCrateId values.", + crate_name0, crate_name1 + ); + sess.struct_span_err(span, &msg) + } CrateError::DlOpen(s) | CrateError::DlSym(s) => sess.struct_span_err(span, &s), CrateError::LocatorCombined(locator) => { let crate_name = locator.crate_name; diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index e3c3539079857..e9b8388c1c915 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -635,6 +635,10 @@ impl CrateRoot<'_> { self.hash } + crate fn stable_crate_id(&self) -> StableCrateId { + self.stable_crate_id + } + crate fn triple(&self) -> &TargetTriple { &self.triple } diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 3961adacecae8..25d70d9768a6a 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -651,6 +651,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { triple: tcx.sess.opts.target_triple.clone(), hash: tcx.crate_hash(LOCAL_CRATE), disambiguator: tcx.sess.local_crate_disambiguator(), + stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(), panic_strategy: tcx.sess.panic_strategy(), edition: tcx.sess.edition(), has_global_allocator: tcx.has_global_allocator(LOCAL_CRATE), diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index b44c3bfcac647..610528956d0a1 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -7,7 +7,7 @@ use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::MetadataRef; use rustc_hir as hir; use rustc_hir::def::{CtorKind, DefKind}; -use rustc_hir::def_id::{DefId, DefIndex, DefPathHash}; +use rustc_hir::def_id::{DefId, DefIndex, DefPathHash, StableCrateId}; use rustc_hir::definitions::DefKey; use rustc_hir::lang_items; use rustc_index::{bit_set::FiniteBitSet, vec::IndexVec}; @@ -203,6 +203,7 @@ crate struct CrateRoot<'tcx> { extra_filename: String, hash: Svh, disambiguator: CrateDisambiguator, + stable_crate_id: StableCrateId, panic_strategy: PanicStrategy, edition: Edition, has_global_allocator: bool, diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs index 69aa72d899fb3..3f690ba87ea05 100644 --- a/compiler/rustc_session/src/session.rs +++ b/compiler/rustc_session/src/session.rs @@ -8,7 +8,6 @@ use crate::parse::ParseSess; use crate::search_paths::{PathKind, SearchPath}; pub use rustc_ast::attr::MarkedAttrs; -pub use rustc_ast::crate_disambiguator::CrateDisambiguator; pub use rustc_ast::Attribute; use rustc_data_structures::flock; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; @@ -23,6 +22,7 @@ use rustc_errors::json::JsonEmitter; use rustc_errors::registry::Registry; use rustc_errors::{Applicability, Diagnostic, DiagnosticBuilder, DiagnosticId, ErrorReported}; use rustc_lint_defs::FutureBreakage; +pub use rustc_span::crate_disambiguator::CrateDisambiguator; use rustc_span::edition::Edition; use rustc_span::source_map::{FileLoader, MultiSpan, RealFileLoader, SourceMap, Span}; use rustc_span::{sym, SourceFileHashAlgorithm, Symbol}; diff --git a/compiler/rustc_ast/src/crate_disambiguator.rs b/compiler/rustc_span/src/crate_disambiguator.rs similarity index 100% rename from compiler/rustc_ast/src/crate_disambiguator.rs rename to compiler/rustc_span/src/crate_disambiguator.rs diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs index b24ede9c53aed..7c02056105e05 100644 --- a/compiler/rustc_span/src/def_id.rs +++ b/compiler/rustc_span/src/def_id.rs @@ -1,3 +1,4 @@ +use crate::crate_disambiguator::CrateDisambiguator; use crate::HashStableContext; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; @@ -105,10 +106,66 @@ impl ::std::fmt::Debug for CrateNum { } } +/// A `DefPathHash` is a fixed-size representation of a `DefPath` that is +/// stable across crate and compilation session boundaries. It consists of two +/// separate 64-bit hashes. The first uniquely identifies the crate this +/// `DefPathHash` originates from (see [StableCrateId]), and the second +/// uniquely identifies the corresponding `DefPath` within that crate. Together +/// they form a unique identifier within an entire crate graph. +/// +/// There is a very small chance of hash collisions, which would mean that two +/// different `DefPath`s map to the same `DefPathHash`. Proceeding compilation +/// with such a hash collision would very probably lead to an ICE, and in the +/// worst case lead to a silent mis-compilation. The compiler therefore actively +/// and exhaustively checks for such hash collisions and aborts compilation if +/// it finds one. +/// +/// `DefPathHash` uses 64-bit hashes for both the crate-id part and the +/// crate-internal part, even though it is likely that there are many more +/// `LocalDefId`s in a single crate than there are individual crates in a crate +/// graph. Since we use the same number of bits in both cases, the collision +/// probability for the crate-local part will be quite a bit higher (though +/// still very small). +/// +/// This imbalance is not by accident: A hash collision in the +/// crate-local part of a `DefPathHash` will be detected and reported while +/// compiling the crate in question. Such a collision does not depend on +/// outside factors and can be easily fixed by the crate maintainer (e.g. by +/// renaming the item in question or by bumping the crate version in a harmless +/// way). +/// +/// A collision between crate-id hashes on the other hand is harder to fix +/// because it depends on the set of crates in the entire crate graph of a +/// compilation session. Again, using the same crate with a different version +/// number would fix the issue with a high probability -- but that might be +/// easier said then done if the crates in questions are dependencies of +/// third-party crates. +/// +/// That being said, given a high quality hash function, the collision +/// probabilities in question are very small. For example, for a big crate like +/// `rustc_middle` (with ~50000 `LocalDefId`s as of the time of writing) there +/// is a probability of roughly 1 in 14,750,000,000 of a crate-internal +/// collision occurring. For a big crate graph with 1000 crates in it, there is +/// a probability of 1 in 36,890,000,000,000 of a `StableCrateId` collision. #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] #[derive(HashStable_Generic, Encodable, Decodable)] pub struct DefPathHash(pub Fingerprint); +impl DefPathHash { + /// Returns the [StableCrateId] identifying the crate this [DefPathHash] + /// originates from. + #[inline] + pub fn stable_crate_id(&self) -> StableCrateId { + StableCrateId(self.0.as_value().0) + } + + /// Builds a new [DefPathHash] with the given [StableCrateId] and + /// `local_hash`, where `local_hash` must be unique within its crate. + pub fn new(stable_crate_id: StableCrateId, local_hash: u64) -> DefPathHash { + DefPathHash(Fingerprint::new(stable_crate_id.0, local_hash)) + } +} + impl Borrow for DefPathHash { #[inline] fn borrow(&self) -> &Fingerprint { @@ -116,6 +173,30 @@ impl Borrow for DefPathHash { } } +/// A [StableCrateId] is a 64 bit hash of `(crate-name, crate-disambiguator)`. It +/// is to [CrateNum] what [DefPathHash] is to [DefId]. It is stable across +/// compilation sessions. +/// +/// Since the ID is a hash value there is a (very small) chance that two crates +/// end up with the same [StableCrateId]. The compiler will check for such +/// collisions when loading crates and abort compilation in order to avoid +/// further trouble. +#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug, Encodable, Decodable)] +pub struct StableCrateId(u64); + +impl StableCrateId { + /// Computes the stable ID for a crate with the given name and + /// disambiguator. + pub fn new(crate_name: &str, crate_disambiguator: CrateDisambiguator) -> StableCrateId { + use std::hash::Hash; + + let mut hasher = StableHasher::new(); + crate_name.hash(&mut hasher); + crate_disambiguator.hash(&mut hasher); + StableCrateId(hasher.finish()) + } +} + rustc_index::newtype_index! { /// A DefIndex is an index into the hir-map for a crate, identifying a /// particular definition. It should really be considered an interned diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index f3d876a5770a8..0f14154047d6c 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -47,6 +47,8 @@ pub mod lev_distance; mod span_encoding; pub use span_encoding::{Span, DUMMY_SP}; +pub mod crate_disambiguator; + pub mod symbol; pub use symbol::{sym, Symbol};