From 9e35b797b158d2e437bfee6376f852fd87861286 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 25 Sep 2017 12:26:25 -0700 Subject: [PATCH] rustc: Default 32 codegen units at O0 This commit changes the default of rustc to use 32 codegen units when compiling in debug mode, typically an opt-level=0 compilation. Since their inception codegen units have matured quite a bit, gaining features such as: * Parallel translation and codegen enabling codegen units to get worked on even more quickly. * Deterministic and reliable partitioning through the same infrastructure as incremental compilation. * Global rate limiting through the `jobserver` crate to avoid overloading the system. The largest benefit of codegen units has forever been faster compilation through parallel processing of modules on the LLVM side of things, using all the cores available on build machines that typically have many available. Some downsides have been fixed through the features above, but the major downside remaining is that using codegen units reduces opportunities for inlining and optimization. This, however, doesn't matter much during debug builds! In this commit the default number of codegen units for debug builds has been raised from 1 to 32. This should enable most `cargo build` compiles that are bottlenecked on translation and/or code generation to immediately see speedups through parallelization on available cores. Work is being done to *always* enable multiple codegen units (and therefore parallel codegen) but it requires #44841 at least to be landed and stabilized, but stay tuned if you're interested in that aspect! --- src/librustc/session/config.rs | 97 +++++++++++++------ src/librustc_trans/back/link.rs | 2 +- src/librustc_trans/back/write.rs | 11 ++- src/librustc_trans/base.rs | 4 +- .../run-make/codegen-options-parsing/Makefile | 30 +++--- src/test/run-make/llvm-phase/test.rs | 1 + 6 files changed, 97 insertions(+), 48 deletions(-) diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs index d3256357941f3..0390e22234865 100644 --- a/src/librustc/session/config.rs +++ b/src/librustc/session/config.rs @@ -350,6 +350,9 @@ top_level_options!( // is currently just a hack and will be removed eventually, so please // try to not rely on this too much. actually_rustdoc: bool [TRACKED], + + // Number of object files/codegen units to produce on the backend + codegen_units: usize [UNTRACKED], } ); @@ -512,6 +515,7 @@ pub fn basic_options() -> Options { unstable_features: UnstableFeatures::Disallow, debug_assertions: true, actually_rustdoc: false, + codegen_units: 1, } } @@ -529,11 +533,6 @@ impl Options { (self.debugging_opts.query_dep_graph || self.debugging_opts.incremental_info) } - pub fn single_codegen_unit(&self) -> bool { - self.incremental.is_none() || - self.cg.codegen_units == 1 - } - pub fn file_path_mapping(&self) -> FilePathMapping { FilePathMapping::new( self.debugging_opts.remap_path_prefix_from.iter().zip( @@ -791,7 +790,7 @@ macro_rules! options { fn parse_opt_uint(slot: &mut Option, v: Option<&str>) -> bool { match v { Some(s) => { *slot = s.parse().ok(); slot.is_some() } - None => { *slot = None; true } + None => { *slot = None; false } } } @@ -924,7 +923,7 @@ options! {CodegenOptions, CodegenSetter, basic_codegen_options, "metadata to mangle symbol names with"), extra_filename: String = ("".to_string(), parse_string, [UNTRACKED], "extra data to put in each output filename"), - codegen_units: usize = (1, parse_uint, [UNTRACKED], + codegen_units: Option = (None, parse_opt_uint, [UNTRACKED], "divide crate into N units to optimize in parallel"), remark: Passes = (SomePasses(Vec::new()), parse_passes, [UNTRACKED], "print remarks for these optimization passes (space separated, or \"all\")"), @@ -1521,27 +1520,35 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) } let mut cg = build_codegen_options(matches, error_format); + let mut codegen_units = cg.codegen_units; // Issue #30063: if user requests llvm-related output to one // particular path, disable codegen-units. - if matches.opt_present("o") && cg.codegen_units != 1 { - let incompatible: Vec<_> = output_types.iter() - .map(|ot_path| ot_path.0) - .filter(|ot| { - !ot.is_compatible_with_codegen_units_and_single_output_file() - }).collect(); - if !incompatible.is_empty() { - for ot in &incompatible { - early_warn(error_format, &format!("--emit={} with -o incompatible with \ - -C codegen-units=N for N > 1", - ot.shorthand())); + let incompatible: Vec<_> = output_types.iter() + .map(|ot_path| ot_path.0) + .filter(|ot| { + !ot.is_compatible_with_codegen_units_and_single_output_file() + }) + .map(|ot| ot.shorthand()) + .collect(); + if !incompatible.is_empty() { + match codegen_units { + Some(n) if n > 1 => { + if matches.opt_present("o") { + for ot in &incompatible { + early_warn(error_format, &format!("--emit={} with -o incompatible with \ + -C codegen-units=N for N > 1", + ot)); + } + early_warn(error_format, "resetting to default -C codegen-units=1"); + codegen_units = Some(1); + } } - early_warn(error_format, "resetting to default -C codegen-units=1"); - cg.codegen_units = 1; + _ => codegen_units = Some(1), } } - if cg.codegen_units < 1 { + if codegen_units == Some(0) { early_error(error_format, "Value for codegen units must be a positive nonzero integer"); } @@ -1550,12 +1557,17 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) // case, but it would be confusing to have the validity of // `-Z lto -C codegen-units=2` depend on details of the crate being // compiled, so we complain regardless. - if cg.lto && cg.codegen_units > 1 { - // This case is impossible to handle because LTO expects to be able - // to combine the entire crate and all its dependencies into a - // single compilation unit, but each codegen unit is in a separate - // LLVM context, so they can't easily be combined. - early_error(error_format, "can't perform LTO when using multiple codegen units"); + if cg.lto { + if let Some(n) = codegen_units { + if n > 1 { + // This case is impossible to handle because LTO expects to be able + // to combine the entire crate and all its dependencies into a + // single compilation unit, but each codegen unit is in a separate + // LLVM context, so they can't easily be combined. + early_error(error_format, "can't perform LTO when using multiple codegen units"); + } + } + codegen_units = Some(1); } if cg.lto && debugging_opts.incremental.is_some() { @@ -1720,6 +1732,34 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) let incremental = debugging_opts.incremental.as_ref().map(|m| PathBuf::from(m)); + let codegen_units = codegen_units.unwrap_or_else(|| { + match opt_level { + // If we're compiling at `-O0` then default to 32 codegen units. + // The number here shouldn't matter too too much as debug mode + // builds don't rely on performance at all, meaning that lost + // opportunities for inlining through multiple codegen units is + // a non-issue. + // + // Note that the high number here doesn't mean that we'll be + // spawning a large number of threads in parallel. The backend + // of rustc contains global rate limiting through the + // `jobserver` crate so we'll never overload the system with too + // much work, but rather we'll only be optimizing when we're + // otherwise cooperating with other instances of rustc. + // + // Rather the high number here means that we should be able to + // keep a lot of idle cpus busy. By ensuring that no codegen + // unit takes *too* long to build we'll be guaranteed that all + // cpus will finish pretty closely to one another and we should + // make relatively optimal use of system resources + OptLevel::No => 32, + + // All other optimization levels default use one codegen unit, + // the historical default in Rust for a Long Time. + _ => 1, + } + }); + (Options { crate_types, optimize: opt_level, @@ -1744,6 +1784,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) unstable_features: UnstableFeatures::from_environment(), debug_assertions, actually_rustdoc: false, + codegen_units, }, cfg) } @@ -2447,7 +2488,7 @@ mod tests { opts.cg.extra_filename = String::from("extra-filename"); assert_eq!(reference.dep_tracking_hash(), opts.dep_tracking_hash()); - opts.cg.codegen_units = 42; + opts.cg.codegen_units = Some(42); assert_eq!(reference.dep_tracking_hash(), opts.dep_tracking_hash()); opts.cg.remark = super::SomePasses(vec![String::from("pass1"), diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs index 1630e7759919c..2cf0a32d5afdf 100644 --- a/src/librustc_trans/back/link.rs +++ b/src/librustc_trans/back/link.rs @@ -509,7 +509,7 @@ fn link_rlib<'a>(sess: &'a Session, // of when we do and don't keep .#module-name#.bc files around. let user_wants_numbered_bitcode = sess.opts.output_types.contains_key(&OutputType::Bitcode) && - sess.opts.cg.codegen_units > 1; + sess.opts.codegen_units > 1; if !sess.opts.cg.save_temps && !user_wants_numbered_bitcode { remove(sess, &bc_filename); } diff --git a/src/librustc_trans/back/write.rs b/src/librustc_trans/back/write.rs index ef6bf2504f312..6b980a37ac778 100644 --- a/src/librustc_trans/back/write.rs +++ b/src/librustc_trans/back/write.rs @@ -939,10 +939,10 @@ fn produce_final_output_artifacts(sess: &Session, let needs_crate_object = crate_output.outputs.contains_key(&OutputType::Exe); let keep_numbered_bitcode = needs_crate_bitcode || - (user_wants_bitcode && sess.opts.cg.codegen_units > 1); + (user_wants_bitcode && sess.opts.codegen_units > 1); let keep_numbered_objects = needs_crate_object || - (user_wants_objects && sess.opts.cg.codegen_units > 1); + (user_wants_objects && sess.opts.codegen_units > 1); for module in compiled_modules.modules.iter() { let module_name = Some(&module.name[..]); @@ -1520,6 +1520,11 @@ fn start_executing_work(tcx: TyCtxt, total_llvm_time); } + // Regardless of what order these modules completed in, report them to + // the backend in the same order every time to ensure that we're handing + // out deterministic results. + compiled_modules.sort_by(|a, b| a.name.cmp(&b.name)); + let compiled_metadata_module = compiled_metadata_module .expect("Metadata module not compiled?"); @@ -1853,7 +1858,7 @@ impl OngoingCrateTranslation { // FIXME: time_llvm_passes support - does this use a global context or // something? - if sess.opts.cg.codegen_units == 1 && sess.time_llvm_passes() { + if sess.opts.codegen_units == 1 && sess.time_llvm_passes() { unsafe { llvm::LLVMRustPrintPassTimings(); } } diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs index 2d01d2947d6eb..774acc813438f 100644 --- a/src/librustc_trans/base.rs +++ b/src/librustc_trans/base.rs @@ -1162,7 +1162,7 @@ fn collect_and_partition_translation_items<'a, 'tcx>( let strategy = if tcx.sess.opts.debugging_opts.incremental.is_some() { PartitioningStrategy::PerModule } else { - PartitioningStrategy::FixedUnitCount(tcx.sess.opts.cg.codegen_units) + PartitioningStrategy::FixedUnitCount(tcx.sess.opts.codegen_units) }; let codegen_units = time(time_passes, "codegen unit partitioning", || { @@ -1175,7 +1175,7 @@ fn collect_and_partition_translation_items<'a, 'tcx>( .collect::>() }); - assert!(tcx.sess.opts.cg.codegen_units == codegen_units.len() || + assert!(tcx.sess.opts.codegen_units == codegen_units.len() || tcx.sess.opts.debugging_opts.incremental.is_some()); let translation_items: DefIdSet = items.iter().filter_map(|trans_item| { diff --git a/src/test/run-make/codegen-options-parsing/Makefile b/src/test/run-make/codegen-options-parsing/Makefile index dc46a8a04ef8c..755e211a34996 100644 --- a/src/test/run-make/codegen-options-parsing/Makefile +++ b/src/test/run-make/codegen-options-parsing/Makefile @@ -1,26 +1,28 @@ -include ../tools.mk +LOG = $(TMPDIR)/log.txt + all: #Option taking a number - $(RUSTC) -C codegen-units dummy.rs 2>&1 | \ - grep 'codegen option `codegen-units` requires a number' - $(RUSTC) -C codegen-units= dummy.rs 2>&1 | \ - grep 'incorrect value `` for codegen option `codegen-units` - a number was expected' - $(RUSTC) -C codegen-units=foo dummy.rs 2>&1 | \ - grep 'incorrect value `foo` for codegen option `codegen-units` - a number was expected' + $(RUSTC) -C codegen-units dummy.rs 2>&1 | tee $(LOG) + grep 'codegen option `codegen-units` requires a number' $(LOG) + $(RUSTC) -C codegen-units= dummy.rs 2>&1 | tee $(LOG) + grep 'incorrect value `` for codegen option `codegen-units` - a number was expected' $(LOG) + $(RUSTC) -C codegen-units=foo dummy.rs 2>&1 | tee $(LOG) + grep 'incorrect value `foo` for codegen option `codegen-units` - a number was expected' $(LOG) $(RUSTC) -C codegen-units=1 dummy.rs #Option taking a string - $(RUSTC) -C extra-filename dummy.rs 2>&1 | \ - grep 'codegen option `extra-filename` requires a string' + $(RUSTC) -C extra-filename dummy.rs 2>&1 | tee $(LOG) + grep 'codegen option `extra-filename` requires a string' $(LOG) $(RUSTC) -C extra-filename= dummy.rs 2>&1 $(RUSTC) -C extra-filename=foo dummy.rs 2>&1 #Option taking no argument - $(RUSTC) -C lto= dummy.rs 2>&1 | \ - grep 'codegen option `lto` takes no value' - $(RUSTC) -C lto=1 dummy.rs 2>&1 | \ - grep 'codegen option `lto` takes no value' - $(RUSTC) -C lto=foo dummy.rs 2>&1 | \ - grep 'codegen option `lto` takes no value' + $(RUSTC) -C lto= dummy.rs 2>&1 | tee $(LOG) + grep 'codegen option `lto` takes no value' $(LOG) + $(RUSTC) -C lto=1 dummy.rs 2>&1 | tee $(LOG) + grep 'codegen option `lto` takes no value' $(LOG) + $(RUSTC) -C lto=foo dummy.rs 2>&1 | tee $(LOG) + grep 'codegen option `lto` takes no value' $(LOG) $(RUSTC) -C lto dummy.rs # Should not link dead code... diff --git a/src/test/run-make/llvm-phase/test.rs b/src/test/run-make/llvm-phase/test.rs index 7a63871f19e38..2ff4593a801fe 100644 --- a/src/test/run-make/llvm-phase/test.rs +++ b/src/test/run-make/llvm-phase/test.rs @@ -77,6 +77,7 @@ fn main() { .split(' ').map(|s| s.to_string()).collect(); args.push("--out-dir".to_string()); args.push(env::var("TMPDIR").unwrap()); + args.push("-Ccodegen-units=1".to_string()); let (result, _) = rustc_driver::run_compiler( &args, &mut JitCalls, Some(box JitLoader), None);