Skip to content

Commit

Permalink
rustc: Default 32 codegen units at O0
Browse files Browse the repository at this point in the history
This commit changes the default of rustc to use 32 codegen units when compiling
in debug mode, typically an opt-level=0 compilation. Since their inception
codegen units have matured quite a bit, gaining features such as:

* Parallel translation and codegen enabling codegen units to get worked on even
  more quickly.
* Deterministic and reliable partitioning through the same infrastructure as
  incremental compilation.
* Global rate limiting through the `jobserver` crate to avoid overloading the
  system.

The largest benefit of codegen units has forever been faster compilation through
parallel processing of modules on the LLVM side of things, using all the cores
available on build machines that typically have many available. Some downsides
have been fixed through the features above, but the major downside remaining is
that using codegen units reduces opportunities for inlining and optimization.
This, however, doesn't matter much during debug builds!

In this commit the default number of codegen units for debug builds has been
raised from 1 to 32. This should enable most `cargo build` compiles that are
bottlenecked on translation and/or code generation to immediately see speedups
through parallelization on available cores.

Work is being done to *always* enable multiple codegen units (and therefore
parallel codegen) but it requires #44841 at least to be landed and stabilized,
but stay tuned if you're interested in that aspect!
  • Loading branch information
alexcrichton committed Sep 26, 2017
1 parent 4b8bf39 commit 9e35b79
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 48 deletions.
97 changes: 69 additions & 28 deletions src/librustc/session/config.rs
Expand Up @@ -350,6 +350,9 @@ top_level_options!(
// is currently just a hack and will be removed eventually, so please
// try to not rely on this too much.
actually_rustdoc: bool [TRACKED],

// Number of object files/codegen units to produce on the backend
codegen_units: usize [UNTRACKED],
}
);

Expand Down Expand Up @@ -512,6 +515,7 @@ pub fn basic_options() -> Options {
unstable_features: UnstableFeatures::Disallow,
debug_assertions: true,
actually_rustdoc: false,
codegen_units: 1,
}
}

Expand All @@ -529,11 +533,6 @@ impl Options {
(self.debugging_opts.query_dep_graph || self.debugging_opts.incremental_info)
}

pub fn single_codegen_unit(&self) -> bool {
self.incremental.is_none() ||
self.cg.codegen_units == 1
}

pub fn file_path_mapping(&self) -> FilePathMapping {
FilePathMapping::new(
self.debugging_opts.remap_path_prefix_from.iter().zip(
Expand Down Expand Up @@ -791,7 +790,7 @@ macro_rules! options {
fn parse_opt_uint(slot: &mut Option<usize>, v: Option<&str>) -> bool {
match v {
Some(s) => { *slot = s.parse().ok(); slot.is_some() }
None => { *slot = None; true }
None => { *slot = None; false }
}
}

Expand Down Expand Up @@ -924,7 +923,7 @@ options! {CodegenOptions, CodegenSetter, basic_codegen_options,
"metadata to mangle symbol names with"),
extra_filename: String = ("".to_string(), parse_string, [UNTRACKED],
"extra data to put in each output filename"),
codegen_units: usize = (1, parse_uint, [UNTRACKED],
codegen_units: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
"divide crate into N units to optimize in parallel"),
remark: Passes = (SomePasses(Vec::new()), parse_passes, [UNTRACKED],
"print remarks for these optimization passes (space separated, or \"all\")"),
Expand Down Expand Up @@ -1521,27 +1520,35 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
}

let mut cg = build_codegen_options(matches, error_format);
let mut codegen_units = cg.codegen_units;

// Issue #30063: if user requests llvm-related output to one
// particular path, disable codegen-units.
if matches.opt_present("o") && cg.codegen_units != 1 {
let incompatible: Vec<_> = output_types.iter()
.map(|ot_path| ot_path.0)
.filter(|ot| {
!ot.is_compatible_with_codegen_units_and_single_output_file()
}).collect();
if !incompatible.is_empty() {
for ot in &incompatible {
early_warn(error_format, &format!("--emit={} with -o incompatible with \
-C codegen-units=N for N > 1",
ot.shorthand()));
let incompatible: Vec<_> = output_types.iter()
.map(|ot_path| ot_path.0)
.filter(|ot| {
!ot.is_compatible_with_codegen_units_and_single_output_file()
})
.map(|ot| ot.shorthand())
.collect();
if !incompatible.is_empty() {
match codegen_units {
Some(n) if n > 1 => {
if matches.opt_present("o") {
for ot in &incompatible {
early_warn(error_format, &format!("--emit={} with -o incompatible with \
-C codegen-units=N for N > 1",
ot));
}
early_warn(error_format, "resetting to default -C codegen-units=1");
codegen_units = Some(1);
}
}
early_warn(error_format, "resetting to default -C codegen-units=1");
cg.codegen_units = 1;
_ => codegen_units = Some(1),
}
}

if cg.codegen_units < 1 {
if codegen_units == Some(0) {
early_error(error_format, "Value for codegen units must be a positive nonzero integer");
}

Expand All @@ -1550,12 +1557,17 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
// case, but it would be confusing to have the validity of
// `-Z lto -C codegen-units=2` depend on details of the crate being
// compiled, so we complain regardless.
if cg.lto && cg.codegen_units > 1 {
// This case is impossible to handle because LTO expects to be able
// to combine the entire crate and all its dependencies into a
// single compilation unit, but each codegen unit is in a separate
// LLVM context, so they can't easily be combined.
early_error(error_format, "can't perform LTO when using multiple codegen units");
if cg.lto {
if let Some(n) = codegen_units {
if n > 1 {
// This case is impossible to handle because LTO expects to be able
// to combine the entire crate and all its dependencies into a
// single compilation unit, but each codegen unit is in a separate
// LLVM context, so they can't easily be combined.
early_error(error_format, "can't perform LTO when using multiple codegen units");
}
}
codegen_units = Some(1);
}

if cg.lto && debugging_opts.incremental.is_some() {
Expand Down Expand Up @@ -1720,6 +1732,34 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)

let incremental = debugging_opts.incremental.as_ref().map(|m| PathBuf::from(m));

let codegen_units = codegen_units.unwrap_or_else(|| {
match opt_level {
// If we're compiling at `-O0` then default to 32 codegen units.
// The number here shouldn't matter too too much as debug mode
// builds don't rely on performance at all, meaning that lost
// opportunities for inlining through multiple codegen units is
// a non-issue.
//
// Note that the high number here doesn't mean that we'll be
// spawning a large number of threads in parallel. The backend
// of rustc contains global rate limiting through the
// `jobserver` crate so we'll never overload the system with too
// much work, but rather we'll only be optimizing when we're
// otherwise cooperating with other instances of rustc.
//
// Rather the high number here means that we should be able to
// keep a lot of idle cpus busy. By ensuring that no codegen
// unit takes *too* long to build we'll be guaranteed that all
// cpus will finish pretty closely to one another and we should
// make relatively optimal use of system resources
OptLevel::No => 32,

// All other optimization levels default use one codegen unit,
// the historical default in Rust for a Long Time.
_ => 1,
}
});

(Options {
crate_types,
optimize: opt_level,
Expand All @@ -1744,6 +1784,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
unstable_features: UnstableFeatures::from_environment(),
debug_assertions,
actually_rustdoc: false,
codegen_units,
},
cfg)
}
Expand Down Expand Up @@ -2447,7 +2488,7 @@ mod tests {
opts.cg.extra_filename = String::from("extra-filename");
assert_eq!(reference.dep_tracking_hash(), opts.dep_tracking_hash());

opts.cg.codegen_units = 42;
opts.cg.codegen_units = Some(42);
assert_eq!(reference.dep_tracking_hash(), opts.dep_tracking_hash());

opts.cg.remark = super::SomePasses(vec![String::from("pass1"),
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_trans/back/link.rs
Expand Up @@ -509,7 +509,7 @@ fn link_rlib<'a>(sess: &'a Session,
// of when we do and don't keep .#module-name#.bc files around.
let user_wants_numbered_bitcode =
sess.opts.output_types.contains_key(&OutputType::Bitcode) &&
sess.opts.cg.codegen_units > 1;
sess.opts.codegen_units > 1;
if !sess.opts.cg.save_temps && !user_wants_numbered_bitcode {
remove(sess, &bc_filename);
}
Expand Down
11 changes: 8 additions & 3 deletions src/librustc_trans/back/write.rs
Expand Up @@ -939,10 +939,10 @@ fn produce_final_output_artifacts(sess: &Session,
let needs_crate_object = crate_output.outputs.contains_key(&OutputType::Exe);

let keep_numbered_bitcode = needs_crate_bitcode ||
(user_wants_bitcode && sess.opts.cg.codegen_units > 1);
(user_wants_bitcode && sess.opts.codegen_units > 1);

let keep_numbered_objects = needs_crate_object ||
(user_wants_objects && sess.opts.cg.codegen_units > 1);
(user_wants_objects && sess.opts.codegen_units > 1);

for module in compiled_modules.modules.iter() {
let module_name = Some(&module.name[..]);
Expand Down Expand Up @@ -1520,6 +1520,11 @@ fn start_executing_work(tcx: TyCtxt,
total_llvm_time);
}

// Regardless of what order these modules completed in, report them to
// the backend in the same order every time to ensure that we're handing
// out deterministic results.
compiled_modules.sort_by(|a, b| a.name.cmp(&b.name));

let compiled_metadata_module = compiled_metadata_module
.expect("Metadata module not compiled?");

Expand Down Expand Up @@ -1853,7 +1858,7 @@ impl OngoingCrateTranslation {

// FIXME: time_llvm_passes support - does this use a global context or
// something?
if sess.opts.cg.codegen_units == 1 && sess.time_llvm_passes() {
if sess.opts.codegen_units == 1 && sess.time_llvm_passes() {
unsafe { llvm::LLVMRustPrintPassTimings(); }
}

Expand Down
4 changes: 2 additions & 2 deletions src/librustc_trans/base.rs
Expand Up @@ -1162,7 +1162,7 @@ fn collect_and_partition_translation_items<'a, 'tcx>(
let strategy = if tcx.sess.opts.debugging_opts.incremental.is_some() {
PartitioningStrategy::PerModule
} else {
PartitioningStrategy::FixedUnitCount(tcx.sess.opts.cg.codegen_units)
PartitioningStrategy::FixedUnitCount(tcx.sess.opts.codegen_units)
};

let codegen_units = time(time_passes, "codegen unit partitioning", || {
Expand All @@ -1175,7 +1175,7 @@ fn collect_and_partition_translation_items<'a, 'tcx>(
.collect::<Vec<_>>()
});

assert!(tcx.sess.opts.cg.codegen_units == codegen_units.len() ||
assert!(tcx.sess.opts.codegen_units == codegen_units.len() ||
tcx.sess.opts.debugging_opts.incremental.is_some());

let translation_items: DefIdSet = items.iter().filter_map(|trans_item| {
Expand Down
30 changes: 16 additions & 14 deletions src/test/run-make/codegen-options-parsing/Makefile
@@ -1,26 +1,28 @@
-include ../tools.mk

LOG = $(TMPDIR)/log.txt

all:
#Option taking a number
$(RUSTC) -C codegen-units dummy.rs 2>&1 | \
grep 'codegen option `codegen-units` requires a number'
$(RUSTC) -C codegen-units= dummy.rs 2>&1 | \
grep 'incorrect value `` for codegen option `codegen-units` - a number was expected'
$(RUSTC) -C codegen-units=foo dummy.rs 2>&1 | \
grep 'incorrect value `foo` for codegen option `codegen-units` - a number was expected'
$(RUSTC) -C codegen-units dummy.rs 2>&1 | tee $(LOG)
grep 'codegen option `codegen-units` requires a number' $(LOG)
$(RUSTC) -C codegen-units= dummy.rs 2>&1 | tee $(LOG)
grep 'incorrect value `` for codegen option `codegen-units` - a number was expected' $(LOG)
$(RUSTC) -C codegen-units=foo dummy.rs 2>&1 | tee $(LOG)
grep 'incorrect value `foo` for codegen option `codegen-units` - a number was expected' $(LOG)
$(RUSTC) -C codegen-units=1 dummy.rs
#Option taking a string
$(RUSTC) -C extra-filename dummy.rs 2>&1 | \
grep 'codegen option `extra-filename` requires a string'
$(RUSTC) -C extra-filename dummy.rs 2>&1 | tee $(LOG)
grep 'codegen option `extra-filename` requires a string' $(LOG)
$(RUSTC) -C extra-filename= dummy.rs 2>&1
$(RUSTC) -C extra-filename=foo dummy.rs 2>&1
#Option taking no argument
$(RUSTC) -C lto= dummy.rs 2>&1 | \
grep 'codegen option `lto` takes no value'
$(RUSTC) -C lto=1 dummy.rs 2>&1 | \
grep 'codegen option `lto` takes no value'
$(RUSTC) -C lto=foo dummy.rs 2>&1 | \
grep 'codegen option `lto` takes no value'
$(RUSTC) -C lto= dummy.rs 2>&1 | tee $(LOG)
grep 'codegen option `lto` takes no value' $(LOG)
$(RUSTC) -C lto=1 dummy.rs 2>&1 | tee $(LOG)
grep 'codegen option `lto` takes no value' $(LOG)
$(RUSTC) -C lto=foo dummy.rs 2>&1 | tee $(LOG)
grep 'codegen option `lto` takes no value' $(LOG)
$(RUSTC) -C lto dummy.rs

# Should not link dead code...
Expand Down
1 change: 1 addition & 0 deletions src/test/run-make/llvm-phase/test.rs
Expand Up @@ -77,6 +77,7 @@ fn main() {
.split(' ').map(|s| s.to_string()).collect();
args.push("--out-dir".to_string());
args.push(env::var("TMPDIR").unwrap());
args.push("-Ccodegen-units=1".to_string());

let (result, _) = rustc_driver::run_compiler(
&args, &mut JitCalls, Some(box JitLoader), None);
Expand Down

0 comments on commit 9e35b79

Please sign in to comment.