Skip to content

Commit

Permalink
Auto merge of #47269 - michaelwoerister:mangled-cgu-names, r=alexcric…
Browse files Browse the repository at this point in the history
…hton

Shorten names of some compiler generated artifacts.

This PR makes the compiler mangle codegen unit names by default. The name of every codegen unit name will now be a random string of 16 characters. It also makes the file extensions of some intermediate compiler products shorter. Hopefully, these changes will reduce the pressure on tools with path length restrictions like buildbot. The change should also solve problems with case-insensitive file system.

cc #47186 and #47222

r? @alexcrichton
  • Loading branch information
bors committed Jan 9, 2018
2 parents 2e33c89 + 94f3037 commit 61452e5
Show file tree
Hide file tree
Showing 10 changed files with 70 additions and 28 deletions.
12 changes: 12 additions & 0 deletions src/librustc/mir/mono.rs
Expand Up @@ -12,9 +12,11 @@ use syntax::ast::NodeId;
use syntax::symbol::InternedString;
use ty::Instance;
use util::nodemap::FxHashMap;
use rustc_data_structures::base_n;
use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
StableHasher};
use ich::{Fingerprint, StableHashingContext, NodeIdHashingMode};
use std::hash::Hash;

#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)]
pub enum MonoItem<'tcx> {
Expand Down Expand Up @@ -119,6 +121,16 @@ impl<'tcx> CodegenUnit<'tcx> {
{
&mut self.items
}

pub fn mangle_name(human_readable_name: &str) -> String {
// We generate a 80 bit hash from the name. This should be enough to
// avoid collisions and is still reasonably short for filenames.
let mut hasher = StableHasher::new();
human_readable_name.hash(&mut hasher);
let hash: u128 = hasher.finish();
let hash = hash & ((1u128 << 80) - 1);
base_n::encode(hash, base_n::CASE_INSENSITIVE)
}
}

impl<'tcx> HashStable<StableHashingContext<'tcx>> for CodegenUnit<'tcx> {
Expand Down
2 changes: 2 additions & 0 deletions src/librustc/session/config.rs
Expand Up @@ -1234,6 +1234,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
"rewrite operators on i128 and u128 into lang item calls (typically provided \
by compiler-builtins) so translation doesn't need to support them,
overriding the default for the current target"),
human_readable_cgu_names: bool = (false, parse_bool, [TRACKED],
"generate human-readable, predictable names for codegen units"),
}

pub fn default_lib_output() -> CrateType {
Expand Down
22 changes: 13 additions & 9 deletions src/librustc_data_structures/base_n.rs
Expand Up @@ -13,18 +13,21 @@

use std::str;

pub const MAX_BASE: u64 = 64;
pub const ALPHANUMERIC_ONLY: u64 = 62;
pub const MAX_BASE: usize = 64;
pub const ALPHANUMERIC_ONLY: usize = 62;
pub const CASE_INSENSITIVE: usize = 36;

const BASE_64: &'static [u8; MAX_BASE as usize] =
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";

#[inline]
pub fn push_str(mut n: u64, base: u64, output: &mut String) {
pub fn push_str(mut n: u128, base: usize, output: &mut String) {
debug_assert!(base >= 2 && base <= MAX_BASE);
let mut s = [0u8; 64];
let mut s = [0u8; 128];
let mut index = 0;

let base = base as u128;

loop {
s[index] = BASE_64[(n % base) as usize];
index += 1;
Expand All @@ -39,16 +42,16 @@ pub fn push_str(mut n: u64, base: u64, output: &mut String) {
}

#[inline]
pub fn encode(n: u64, base: u64) -> String {
let mut s = String::with_capacity(13);
pub fn encode(n: u128, base: usize) -> String {
let mut s = String::new();
push_str(n, base, &mut s);
s
}

#[test]
fn test_encode() {
fn test(n: u64, base: u64) {
assert_eq!(Ok(n), u64::from_str_radix(&encode(n, base), base as u32));
fn test(n: u128, base: usize) {
assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
}

for base in 2..37 {
Expand All @@ -57,7 +60,8 @@ fn test_encode() {
test(35, base);
test(36, base);
test(37, base);
test(u64::max_value(), base);
test(u64::max_value() as u128, base);
test(u128::max_value(), base);

for i in 0 .. 1_000 {
test(i * 983, base);
Expand Down
11 changes: 6 additions & 5 deletions src/librustc_incremental/persist/fs.rs
Expand Up @@ -137,7 +137,7 @@ const QUERY_CACHE_FILENAME: &'static str = "query-cache.bin";
// or hexadecimal numbers (we want short file and directory names). Since these
// numbers will be used in file names, we choose an encoding that is not
// case-sensitive (as opposed to base64, for example).
const INT_ENCODE_BASE: u64 = 36;
const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE;

pub fn dep_graph_path(sess: &Session) -> PathBuf {
in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
Expand Down Expand Up @@ -357,7 +357,7 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) {
let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]);

// Append the svh
base_n::push_str(svh.as_u64(), INT_ENCODE_BASE, &mut new_sub_dir_name);
base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name);

// Create the full path
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
Expand Down Expand Up @@ -465,7 +465,7 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {

let directory_name = format!("s-{}-{}-working",
timestamp,
base_n::encode(random_number as u64,
base_n::encode(random_number as u128,
INT_ENCODE_BASE));
debug!("generate_session_dir_path: directory_name = {}", directory_name);
let directory_path = crate_dir.join(directory_name);
Expand Down Expand Up @@ -599,7 +599,7 @@ fn timestamp_to_string(timestamp: SystemTime) -> String {
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
let micros = duration.as_secs() * 1_000_000 +
(duration.subsec_nanos() as u64) / 1000;
base_n::encode(micros, INT_ENCODE_BASE)
base_n::encode(micros as u128, INT_ENCODE_BASE)
}

fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
Expand All @@ -626,7 +626,8 @@ fn crate_path(sess: &Session,
// The full crate disambiguator is really long. 64 bits of it should be
// sufficient.
let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash();
let crate_disambiguator = base_n::encode(crate_disambiguator, INT_ENCODE_BASE);
let crate_disambiguator = base_n::encode(crate_disambiguator as u128,
INT_ENCODE_BASE);

let crate_name = format!("{}-{}", crate_name, crate_disambiguator);
incr_dir.join(crate_name)
Expand Down
4 changes: 2 additions & 2 deletions src/librustc_incremental/persist/work_product.rs
Expand Up @@ -35,9 +35,9 @@ pub fn save_trans_partition(sess: &Session,
let extension = match kind {
WorkProductFileKind::Object => "o",
WorkProductFileKind::Bytecode => "bc",
WorkProductFileKind::BytecodeCompressed => "bc-compressed",
WorkProductFileKind::BytecodeCompressed => "bc.z",
};
let file_name = format!("cgu-{}.{}", cgu_name, extension);
let file_name = format!("{}.{}", cgu_name, extension);
let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name);
match link_or_copy(path, &path_in_incr_dir) {
Ok(_) => Some((kind, file_name)),
Expand Down
33 changes: 24 additions & 9 deletions src/librustc_mir/monomorphize/partitioning.rs
Expand Up @@ -200,7 +200,16 @@ impl<'tcx> CodegenUnitExt<'tcx> for CodegenUnit<'tcx> {
}

// Anything we can't find a proper codegen unit for goes into this.
const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit";
fn fallback_cgu_name(tcx: TyCtxt) -> InternedString {
const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit";

if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str()
} else {
Symbol::intern(&CodegenUnit::mangle_name(FALLBACK_CODEGEN_UNIT)).as_str()
}
}


pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
trans_items: I,
Expand Down Expand Up @@ -297,7 +306,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,

let codegen_unit_name = match characteristic_def_id {
Some(def_id) => compute_codegen_unit_name(tcx, def_id, is_volatile),
None => Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(),
None => fallback_cgu_name(tcx),
};

let make_codegen_unit = || {
Expand Down Expand Up @@ -381,7 +390,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// always ensure we have at least one CGU; otherwise, if we have a
// crate with just types (for example), we could wind up with no CGU
if codegen_units.is_empty() {
let codegen_unit_name = Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str();
let codegen_unit_name = fallback_cgu_name(tcx);
codegen_units.insert(codegen_unit_name.clone(),
CodegenUnit::new(codegen_unit_name.clone()));
}
Expand Down Expand Up @@ -630,10 +639,10 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// Unfortunately we cannot just use the `ty::item_path` infrastructure here
// because we need paths to modules and the DefIds of those are not
// available anymore for external items.
let mut mod_path = String::with_capacity(64);
let mut cgu_name = String::with_capacity(64);

let def_path = tcx.def_path(def_id);
mod_path.push_str(&tcx.crate_name(def_path.krate).as_str());
cgu_name.push_str(&tcx.crate_name(def_path.krate).as_str());

for part in tcx.def_path(def_id)
.data
Expand All @@ -644,15 +653,21 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
_ => false,
}
}) {
mod_path.push_str("-");
mod_path.push_str(&part.data.as_interned_str());
cgu_name.push_str("-");
cgu_name.push_str(&part.data.as_interned_str());
}

if volatile {
mod_path.push_str(".volatile");
cgu_name.push_str(".volatile");
}

return Symbol::intern(&mod_path[..]).as_str();
let cgu_name = if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
cgu_name
} else {
CodegenUnit::mangle_name(&cgu_name)
};

Symbol::intern(&cgu_name[..]).as_str()
}

fn numbered_codegen_unit_name(crate_name: &str, index: usize) -> InternedString {
Expand Down
6 changes: 5 additions & 1 deletion src/librustc_trans/assert_module_sources.rs
Expand Up @@ -28,8 +28,10 @@
//! perturb the reuse results.

use rustc::dep_graph::{DepNode, DepConstructor};
use rustc::mir::mono::CodegenUnit;
use rustc::ty::TyCtxt;
use syntax::ast;
use syntax_pos::symbol::Symbol;
use rustc::ich::{ATTR_PARTITION_REUSED, ATTR_PARTITION_TRANSLATED};

const MODULE: &'static str = "module";
Expand Down Expand Up @@ -71,9 +73,11 @@ impl<'a, 'tcx> AssertModuleSource<'a, 'tcx> {
}

let mname = self.field(attr, MODULE);
let mangled_cgu_name = CodegenUnit::mangle_name(&mname.as_str());
let mangled_cgu_name = Symbol::intern(&mangled_cgu_name).as_str();

let dep_node = DepNode::new(self.tcx,
DepConstructor::CompileCodegenUnit(mname.as_str()));
DepConstructor::CompileCodegenUnit(mangled_cgu_name));

if let Some(loaded_from_cache) = self.tcx.dep_graph.was_loaded_from_cache(&dep_node) {
match (disposition, loaded_from_cache) {
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_trans/back/bytecode.rs
Expand Up @@ -47,7 +47,7 @@ pub const RLIB_BYTECODE_OBJECT_MAGIC: &'static [u8] = b"RUST_OBJECT";
// The version number this compiler will write to bytecode objects in rlibs
pub const RLIB_BYTECODE_OBJECT_VERSION: u8 = 2;

pub const RLIB_BYTECODE_EXTENSION: &str = "bytecode.encoded";
pub const RLIB_BYTECODE_EXTENSION: &str = "bc.z";

pub fn encode(identifier: &str, bytecode: &[u8]) -> Vec<u8> {
let mut encoded = Vec::new();
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_trans/context.rs
Expand Up @@ -572,7 +572,7 @@ impl<'b, 'tcx> CrateContext<'b, 'tcx> {
let mut name = String::with_capacity(prefix.len() + 6);
name.push_str(prefix);
name.push_str(".");
base_n::push_str(idx as u64, base_n::ALPHANUMERIC_ONLY, &mut name);
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
name
}

Expand Down
4 changes: 4 additions & 0 deletions src/tools/compiletest/src/runtest.rs
Expand Up @@ -1520,6 +1520,10 @@ impl<'test> TestCx<'test> {
rustc.args(&["-Z", "incremental-queries"]);
}

if self.config.mode == CodegenUnits {
rustc.args(&["-Z", "human_readable_cgu_names"]);
}

match self.config.mode {
CompileFail | ParseFail | Incremental => {
// If we are extracting and matching errors in the new
Expand Down

0 comments on commit 61452e5

Please sign in to comment.