Skip to content

Commit

Permalink
Update all submodules that rustbuild doesn't depend on lazily
Browse files Browse the repository at this point in the history
This only updates the submodules the first time they're needed, instead
of unconditionally the first time you run x.py.

Ideally, this would move *all* submodules and not exclude some tools and
backtrace. Unfortunately, cargo requires all `Cargo.toml` files in the
whole workspace to be present to build any crate.

On my machine, this takes the time for an initial submodule clone (for
`x.py --help`) from 55.70 to 15.87 seconds.

This uses exactly the same logic as the LLVM update used, modulo some
minor cleanups:
- Use a local variable for `src.join(relative_path)`
- Remove unnecessary arrays for `book!` macro and make the macro simpler to use
- Add more comments
  • Loading branch information
jyn514 committed Jul 21, 2021
1 parent 89d260f commit 2ac0e9b
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 113 deletions.
27 changes: 18 additions & 9 deletions src/bootstrap/bootstrap.py
Expand Up @@ -989,21 +989,30 @@ def update_submodules(self):
slow_submodules = self.get_toml('fast-submodules') == "false"
start_time = time()
if slow_submodules:
print('Unconditionally updating all submodules')
print('Unconditionally updating submodules')
else:
print('Updating only changed submodules')
default_encoding = sys.getdefaultencoding()
submodules = [s.split(' ', 1)[1] for s in subprocess.check_output(
["git", "config", "--file",
os.path.join(self.rust_root, ".gitmodules"),
"--get-regexp", "path"]
).decode(default_encoding).splitlines()]
# Only update submodules that are needed to build bootstrap. These are needed because Cargo
# currently requires everything in a workspace to be "locally present" when starting a
# build, and will give a hard error if any Cargo.toml files are missing.
# FIXME: Is there a way to avoid cloning these eagerly? Bootstrap itself doesn't need to
# share a workspace with any tools - maybe it could be excluded from the workspace?
# That will still require cloning the submodules the second you check the standard
# library, though...
# FIXME: Is there a way to avoid hard-coding the submodules required?
# WARNING: keep this in sync with the submodules hard-coded in bootstrap/lib.rs
submodules = [
"src/tools/rust-installer",
"src/tools/cargo",
"src/tools/rls",
"src/tools/miri",
"library/backtrace",
"library/stdarch"
]
filtered_submodules = []
submodules_names = []
for module in submodules:
# This is handled by native::Llvm in rustbuild, not here
if module.endswith("llvm-project"):
continue
check = self.check_submodule(module, slow_submodules)
filtered_submodules.append((module, check))
submodules_names.append(module)
Expand Down
4 changes: 3 additions & 1 deletion src/bootstrap/check.rs
Expand Up @@ -7,7 +7,7 @@ use crate::config::TargetSelection;
use crate::tool::{prepare_tool_cargo, SourceType};
use crate::INTERNER;
use crate::{Compiler, Mode, Subcommand};
use std::path::PathBuf;
use std::path::{Path, PathBuf};

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct Std {
Expand Down Expand Up @@ -72,6 +72,8 @@ impl Step for Std {
}

fn run(self, builder: &Builder<'_>) {
builder.update_submodule(&Path::new("library").join("stdarch"));

let target = self.target;
let compiler = builder.compiler(builder.top_stage, builder.config.build);

Expand Down
2 changes: 2 additions & 0 deletions src/bootstrap/compile.rs
Expand Up @@ -79,6 +79,8 @@ impl Step for Std {
return;
}

builder.update_submodule(&Path::new("library").join("stdarch"));

let mut target_deps = builder.ensure(StartupObjects { compiler, target });

let compiler_to_use = builder.compiler_for(compiler.stage, compiler.host, target);
Expand Down
39 changes: 29 additions & 10 deletions src/bootstrap/doc.rs
Expand Up @@ -22,8 +22,17 @@ use crate::config::{Config, TargetSelection};
use crate::tool::{self, prepare_tool_cargo, SourceType, Tool};
use crate::util::symlink_dir;

macro_rules! submodule_helper {
($path:expr, submodule) => {
$path
};
($path:expr, submodule = $submodule:literal) => {
$submodule
};
}

macro_rules! book {
($($name:ident, $path:expr, $book_name:expr;)+) => {
($($name:ident, $path:expr, $book_name:expr $(, submodule $(= $submodule:literal)? )? ;)+) => {
$(
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct $name {
Expand All @@ -46,6 +55,10 @@ macro_rules! book {
}

fn run(self, builder: &Builder<'_>) {
$(
let path = Path::new(submodule_helper!( $path, submodule $( = $submodule )? ));
builder.update_submodule(&path);
)?
builder.ensure(RustbookSrc {
target: self.target,
name: INTERNER.intern_str($book_name),
Expand All @@ -59,13 +72,16 @@ macro_rules! book {

// NOTE: When adding a book here, make sure to ALSO build the book by
// adding a build step in `src/bootstrap/builder.rs`!
// NOTE: Make sure to add the corresponding submodule when adding a new book.
// FIXME: Make checking for a submodule automatic somehow (maybe by having a list of all submodules
// and checking against it?).
book!(
CargoBook, "src/tools/cargo/src/doc", "cargo";
EditionGuide, "src/doc/edition-guide", "edition-guide";
EmbeddedBook, "src/doc/embedded-book", "embedded-book";
Nomicon, "src/doc/nomicon", "nomicon";
Reference, "src/doc/reference", "reference";
RustByExample, "src/doc/rust-by-example", "rust-by-example";
CargoBook, "src/tools/cargo/src/doc", "cargo", submodule = "src/tools/cargo";
EditionGuide, "src/doc/edition-guide", "edition-guide", submodule;
EmbeddedBook, "src/doc/embedded-book", "embedded-book", submodule;
Nomicon, "src/doc/nomicon", "nomicon", submodule;
Reference, "src/doc/reference", "reference", submodule;
RustByExample, "src/doc/rust-by-example", "rust-by-example", submodule;
RustdocBook, "src/doc/rustdoc", "rustdoc";
);

Expand Down Expand Up @@ -197,22 +213,25 @@ impl Step for TheBook {
/// * Index page
/// * Redirect pages
fn run(self, builder: &Builder<'_>) {
let relative_path = Path::new("src").join("doc").join("book");
builder.update_submodule(&relative_path);

let compiler = self.compiler;
let target = self.target;

// build book
builder.ensure(RustbookSrc {
target,
name: INTERNER.intern_str("book"),
src: INTERNER.intern_path(builder.src.join("src/doc/book")),
src: INTERNER.intern_path(builder.src.join(&relative_path)),
});

// building older edition redirects
for edition in &["first-edition", "second-edition", "2018-edition"] {
builder.ensure(RustbookSrc {
target,
name: INTERNER.intern_string(format!("book/{}", edition)),
src: INTERNER.intern_path(builder.src.join("src/doc/book").join(edition)),
src: INTERNER.intern_path(builder.src.join(&relative_path).join(edition)),
});
}

Expand All @@ -221,7 +240,7 @@ impl Step for TheBook {

// build the redirect pages
builder.info(&format!("Documenting book redirect pages ({})", target));
for file in t!(fs::read_dir(builder.src.join("src/doc/book/redirects"))) {
for file in t!(fs::read_dir(builder.src.join(&relative_path).join("redirects"))) {
let file = t!(file);
let path = file.path();
let path = path.to_str().unwrap();
Expand Down
113 changes: 108 additions & 5 deletions src/bootstrap/lib.rs
Expand Up @@ -477,17 +477,120 @@ impl Build {
slice::from_ref(&self.build.triple)
}

// modified from `check_submodule` and `update_submodule` in bootstrap.py
/// Given a path to the directory of a submodule, update it.
///
/// `relative_path` should be relative to the root of the git repository, not an absolute path.
pub(crate) fn update_submodule(&self, relative_path: &Path) {
fn dir_is_empty(dir: &Path) -> bool {
t!(std::fs::read_dir(dir)).next().is_none()
}

if !self.config.submodules {
return;
}

let absolute_path = self.config.src.join(relative_path);

// NOTE: The check for the empty directory is here because when running x.py the first time,
// the submodule won't be checked out. Check it out now so we can build it.
if !channel::GitInfo::new(false, relative_path).is_git() && !dir_is_empty(&absolute_path) {
return;
}

// check_submodule
if self.config.fast_submodules {
let checked_out_hash = output(
Command::new("git").args(&["rev-parse", "HEAD"]).current_dir(&absolute_path),
);
// update_submodules
let recorded = output(
Command::new("git")
.args(&["ls-tree", "HEAD"])
.arg(relative_path)
.current_dir(&self.config.src),
);
let actual_hash = recorded
.split_whitespace()
.nth(2)
.unwrap_or_else(|| panic!("unexpected output `{}`", recorded));

// update_submodule
if actual_hash == checked_out_hash.trim_end() {
// already checked out
return;
}
}

println!("Updating submodule {}", relative_path.display());
self.run(
Command::new("git")
.args(&["submodule", "-q", "sync"])
.arg(relative_path)
.current_dir(&self.config.src),
);

// Try passing `--progress` to start, then run git again without if that fails.
let update = |progress: bool| {
let mut git = Command::new("git");
git.args(&["submodule", "update", "--init", "--recursive"]);
if progress {
git.arg("--progress");
}
git.arg(relative_path).current_dir(&self.config.src);
git
};
// NOTE: doesn't use `try_run` because this shouldn't print an error if it fails.
if !update(true).status().map_or(false, |status| status.success()) {
self.run(&mut update(false));
}

self.run(Command::new("git").args(&["reset", "-q", "--hard"]).current_dir(&absolute_path));
self.run(Command::new("git").args(&["clean", "-qdfx"]).current_dir(absolute_path));
}

/// If any submodule has been initialized already, sync it unconditionally.
/// This avoids contributors checking in a submodule change by accident.
pub fn maybe_update_submodules(&self) {
// WARNING: keep this in sync with the submodules hard-coded in bootstrap.py
const BOOTSTRAP_SUBMODULES: &[&str] = &[
"src/tools/rust-installer",
"src/tools/cargo",
"src/tools/rls",
"src/tools/miri",
"library/backtrace",
"library/stdarch",
];
// Avoid running git when there isn't a git checkout.
if !self.config.submodules {
return;
}
let output = output(
Command::new("git")
.args(&["config", "--file"])
.arg(&self.config.src.join(".gitmodules"))
.args(&["--get-regexp", "path"]),
);
for line in output.lines() {
// Look for `submodule.$name.path = $path`
// Sample output: `submodule.src/rust-installer.path src/tools/rust-installer`
let submodule = Path::new(line.splitn(2, ' ').nth(1).unwrap());
// avoid updating submodules twice
if !BOOTSTRAP_SUBMODULES.iter().any(|&p| Path::new(p) == submodule)
&& channel::GitInfo::new(false, submodule).is_git()
{
self.update_submodule(submodule);
}
}
}

/// Executes the entire build, as configured by the flags and configuration.
pub fn build(&mut self) {
unsafe {
job::setup(self);
}

// If the LLVM submodule has been initialized already, sync it unconditionally. This avoids
// contributors checking in a submodule change by accident.
if self.in_tree_llvm_info.is_git() {
native::update_llvm_submodule(self);
}
self.maybe_update_submodules();

if let Subcommand::Format { check, paths } = &self.config.cmd {
return format::format(self, *check, &paths);
Expand Down
86 changes: 2 additions & 84 deletions src/bootstrap/native.rs
Expand Up @@ -21,7 +21,7 @@ use build_helper::{output, t};
use crate::builder::{Builder, RunConfig, ShouldRun, Step};
use crate::config::TargetSelection;
use crate::util::{self, exe};
use crate::{Build, GitRepo};
use crate::GitRepo;
use build_helper::up_to_date;

pub struct Meta {
Expand Down Expand Up @@ -91,86 +91,6 @@ pub fn prebuilt_llvm_config(
Err(Meta { stamp, build_llvm_config, out_dir, root: root.into() })
}

// modified from `check_submodule` and `update_submodule` in bootstrap.py
pub(crate) fn update_llvm_submodule(build: &Build) {
let llvm_project = &Path::new("src").join("llvm-project");

fn dir_is_empty(dir: &Path) -> bool {
t!(std::fs::read_dir(dir)).next().is_none()
}

if !build.config.submodules {
return;
}

// NOTE: The check for the empty directory is here because when running x.py
// the first time, the llvm submodule won't be checked out. Check it out
// now so we can build it.
if !build.in_tree_llvm_info.is_git() && !dir_is_empty(&build.config.src.join(llvm_project)) {
return;
}

// check_submodule
if build.config.fast_submodules {
let checked_out_hash = output(
Command::new("git")
.args(&["rev-parse", "HEAD"])
.current_dir(build.config.src.join(llvm_project)),
);
// update_submodules
let recorded = output(
Command::new("git")
.args(&["ls-tree", "HEAD"])
.arg(llvm_project)
.current_dir(&build.config.src),
);
let actual_hash = recorded
.split_whitespace()
.nth(2)
.unwrap_or_else(|| panic!("unexpected output `{}`", recorded));

// update_submodule
if actual_hash == checked_out_hash.trim_end() {
// already checked out
return;
}
}

println!("Updating submodule {}", llvm_project.display());
build.run(
Command::new("git")
.args(&["submodule", "-q", "sync"])
.arg(llvm_project)
.current_dir(&build.config.src),
);

// Try passing `--progress` to start, then run git again without if that fails.
let update = |progress: bool| {
let mut git = Command::new("git");
git.args(&["submodule", "update", "--init", "--recursive"]);
if progress {
git.arg("--progress");
}
git.arg(llvm_project).current_dir(&build.config.src);
git
};
// NOTE: doesn't use `try_run` because this shouldn't print an error if it fails.
if !update(true).status().map_or(false, |status| status.success()) {
build.run(&mut update(false));
}

build.run(
Command::new("git")
.args(&["reset", "-q", "--hard"])
.current_dir(build.config.src.join(llvm_project)),
);
build.run(
Command::new("git")
.args(&["clean", "-qdfx"])
.current_dir(build.config.src.join(llvm_project)),
);
}

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Llvm {
pub target: TargetSelection,
Expand Down Expand Up @@ -208,9 +128,7 @@ impl Step for Llvm {
Err(m) => m,
};

if !builder.config.dry_run {
update_llvm_submodule(builder);
}
builder.update_submodule(&Path::new("src").join("llvm-project"));
if builder.config.llvm_link_shared
&& (target.contains("windows") || target.contains("apple-darwin"))
{
Expand Down

0 comments on commit 2ac0e9b

Please sign in to comment.