From 8c7840e8cb700870854efa02cca817c7dd610698 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 8 Aug 2021 17:24:30 +0200 Subject: [PATCH] Use a separate interner type for UniqueTypeId Using symbol::Interner makes it very easy to mixup UniqueTypeId symbols with the global interner. In fact the Debug implementation of UniqueTypeId did exactly this. Using a separate interner type also avoids prefilling the interner with unused symbols and allow for optimizing the symbol interner for parallel access without negatively affecting the single threaded module codegen. --- Cargo.lock | 1 + compiler/rustc_codegen_llvm/Cargo.toml | 1 + .../src/debuginfo/metadata.rs | 63 ++++++++++++++++--- compiler/rustc_macros/src/symbols.rs | 2 +- compiler/rustc_span/src/symbol.rs | 5 +- 5 files changed, 61 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 80acf227698e9..940f949cbb579 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3677,6 +3677,7 @@ dependencies = [ "libc", "measureme", "rustc-demangle", + "rustc_arena", "rustc_ast", "rustc_attr", "rustc_codegen_ssa", diff --git a/compiler/rustc_codegen_llvm/Cargo.toml b/compiler/rustc_codegen_llvm/Cargo.toml index 521ce344180a1..3c99febbd57cc 100644 --- a/compiler/rustc_codegen_llvm/Cargo.toml +++ b/compiler/rustc_codegen_llvm/Cargo.toml @@ -16,6 +16,7 @@ snap = "1" tracing = "0.1" rustc_middle = { path = "../rustc_middle" } rustc-demangle = "0.1.21" +rustc_arena = { path = "../rustc_arena" } rustc_attr = { path = "../rustc_attr" } rustc_codegen_ssa = { path = "../rustc_codegen_ssa" } rustc_data_structures = { path = "../rustc_data_structures" } diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs index f913c3e4b703d..9272435a330a5 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs @@ -34,7 +34,7 @@ use rustc_middle::ty::Instance; use rustc_middle::ty::{self, AdtKind, GeneratorSubsts, ParamEnv, Ty, TyCtxt}; use rustc_middle::{bug, span_bug}; use rustc_session::config::{self, DebugInfo}; -use rustc_span::symbol::{Interner, Symbol}; +use rustc_span::symbol::Symbol; use rustc_span::FileNameDisplayPreference; use rustc_span::{self, SourceFile, SourceFileHash, Span}; use rustc_target::abi::{Abi, Align, HasDataLayout, Integer, TagEncoding}; @@ -89,8 +89,54 @@ pub const UNKNOWN_COLUMN_NUMBER: c_uint = 0; pub const NO_SCOPE_METADATA: Option<&DIScope> = None; -#[derive(Copy, Debug, Hash, Eq, PartialEq, Clone)] -pub struct UniqueTypeId(Symbol); +mod unique_type_id { + use super::*; + use rustc_arena::DroplessArena; + + #[derive(Copy, Hash, Eq, PartialEq, Clone)] + pub(super) struct UniqueTypeId(u32); + + // The `&'static str`s in this type actually point into the arena. + // + // The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278 + // found that to regress performance up to 2% in some cases. This might be + // revisited after further improvements to `indexmap`. + #[derive(Default)] + pub(super) struct TypeIdInterner { + arena: DroplessArena, + names: FxHashMap<&'static str, UniqueTypeId>, + strings: Vec<&'static str>, + } + + impl TypeIdInterner { + #[inline] + pub(super) fn intern(&mut self, string: &str) -> UniqueTypeId { + if let Some(&name) = self.names.get(string) { + return name; + } + + let name = UniqueTypeId(self.strings.len() as u32); + + // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be + // UTF-8. + let string: &str = + unsafe { std::str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes())) }; + // It is safe to extend the arena allocation to `'static` because we only access + // these while the arena is still alive. + let string: &'static str = unsafe { &*(string as *const str) }; + self.strings.push(string); + self.names.insert(string, name); + name + } + + // Get the symbol as a string. `Symbol::as_str()` should be used in + // preference to this function. + pub(super) fn get(&self, symbol: UniqueTypeId) -> &str { + self.strings[symbol.0 as usize] + } + } +} +use unique_type_id::*; /// The `TypeMap` is where the `CrateDebugContext` holds the type metadata nodes /// created so far. The metadata nodes are indexed by `UniqueTypeId`, and, for @@ -99,7 +145,7 @@ pub struct UniqueTypeId(Symbol); #[derive(Default)] pub struct TypeMap<'ll, 'tcx> { /// The `UniqueTypeId`s created so far. - unique_id_interner: Interner, + unique_id_interner: TypeIdInterner, /// A map from `UniqueTypeId` to debuginfo metadata for that type. This is a 1:1 mapping. unique_id_to_metadata: FxHashMap, /// A map from types to debuginfo metadata. This is an N:1 mapping. @@ -166,8 +212,7 @@ impl TypeMap<'ll, 'tcx> { /// Gets the string representation of a `UniqueTypeId`. This method will fail if /// the ID is unknown. fn get_unique_type_id_as_string(&self, unique_type_id: UniqueTypeId) -> &str { - let UniqueTypeId(interner_key) = unique_type_id; - self.unique_id_interner.get(interner_key) + self.unique_id_interner.get(unique_type_id) } /// Gets the `UniqueTypeId` for the given type. If the `UniqueTypeId` for the given @@ -197,9 +242,9 @@ impl TypeMap<'ll, 'tcx> { let unique_type_id = hasher.finish::().to_hex(); let key = self.unique_id_interner.intern(&unique_type_id); - self.type_to_unique_id.insert(type_, UniqueTypeId(key)); + self.type_to_unique_id.insert(type_, key); - UniqueTypeId(key) + key } /// Gets the `UniqueTypeId` for an enum variant. Enum variants are not really @@ -215,7 +260,7 @@ impl TypeMap<'ll, 'tcx> { let enum_variant_type_id = format!("{}::{}", self.get_unique_type_id_as_string(enum_type_id), variant_name); let interner_key = self.unique_id_interner.intern(&enum_variant_type_id); - UniqueTypeId(interner_key) + interner_key } /// Gets the unique type ID string for an enum variant part. diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs index 85e990bde86dc..1b245f2a75060 100644 --- a/compiler/rustc_macros/src/symbols.rs +++ b/compiler/rustc_macros/src/symbols.rs @@ -215,7 +215,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { } impl Interner { - pub fn fresh() -> Self { + pub(crate) fn fresh() -> Self { Interner::prefill(&[ #prefill_stream ]) diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index c816d06045681..1df5b8cd2cc8d 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1701,8 +1701,11 @@ impl ToStableHashKey for Symbol { // The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278 // found that to regress performance up to 2% in some cases. This might be // revisited after further improvements to `indexmap`. +// +// This type is private to prevent accidentally constructing more than one `Interner` on the same +// thread, which makes it easy to mixup `Symbol`s between `Interner`s. #[derive(Default)] -pub struct Interner { +pub(crate) struct Interner { arena: DroplessArena, names: FxHashMap<&'static str, Symbol>, strings: Vec<&'static str>,