Skip to content

Commit

Permalink
build-manifest: refactor checksum generation into a struct
Browse files Browse the repository at this point in the history
  • Loading branch information
pietroalbini committed Oct 26, 2020
1 parent 0ee1e91 commit 0cf3ce4
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 50 deletions.
75 changes: 75 additions & 0 deletions src/tools/build-manifest/src/checksum.rs
@@ -0,0 +1,75 @@
use crate::manifest::{FileHash, Manifest};
use rayon::prelude::*;
use sha2::{Digest, Sha256};
use std::collections::{HashMap, HashSet};
use std::error::Error;
use std::fs::File;
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use std::time::Instant;

pub(crate) struct Checksums {
collected: Mutex<HashMap<PathBuf, String>>,
}

impl Checksums {
pub(crate) fn new() -> Self {
Checksums { collected: Mutex::new(HashMap::new()) }
}

pub(crate) fn fill_missing_checksums(&mut self, manifest: &mut Manifest) {
let need_checksums = self.find_missing_checksums(manifest);
if !need_checksums.is_empty() {
self.collect_checksums(&need_checksums);
}
self.replace_checksums(manifest);
}

fn find_missing_checksums(&mut self, manifest: &mut Manifest) -> HashSet<PathBuf> {
let mut need_checksums = HashSet::new();
crate::manifest::visit_file_hashes(manifest, |file_hash| {
if let FileHash::Missing(path) = file_hash {
need_checksums.insert(path.clone());
}
});
need_checksums
}

fn replace_checksums(&mut self, manifest: &mut Manifest) {
let collected = self.collected.lock().unwrap();
crate::manifest::visit_file_hashes(manifest, |file_hash| {
if let FileHash::Missing(path) = file_hash {
match collected.get(path) {
Some(hash) => *file_hash = FileHash::Present(hash.clone()),
None => panic!("missing hash for file {}", path.display()),
}
}
});
}

fn collect_checksums(&mut self, files: &HashSet<PathBuf>) {
let collection_start = Instant::now();
println!(
"collecting hashes for {} tarballs across {} threads",
files.len(),
rayon::current_num_threads().min(files.len()),
);

files.par_iter().for_each(|path| match hash(path) {
Ok(hash) => {
self.collected.lock().unwrap().insert(path.clone(), hash);
}
Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
});

println!("collected {} hashes in {:.2?}", files.len(), collection_start.elapsed());
}
}

fn hash(path: &Path) -> Result<String, Box<dyn Error>> {
let mut file = BufReader::new(File::open(path)?);
let mut sha256 = Sha256::default();
std::io::copy(&mut file, &mut sha256)?;
Ok(hex::encode(sha256.finalize()))
}
57 changes: 7 additions & 50 deletions src/tools/build-manifest/src/main.rs
Expand Up @@ -4,22 +4,19 @@
//! via `x.py dist hash-and-sign`; the cmdline arguments are set up
//! by rustbuild (in `src/bootstrap/dist.rs`).

mod checksum;
mod manifest;
mod versions;

use crate::manifest::{Component, FileHash, Manifest, Package, Rename, Target};
use crate::checksum::Checksums;
use crate::manifest::{Component, Manifest, Package, Rename, Target};
use crate::versions::{PkgType, Versions};
use rayon::prelude::*;
use sha2::Digest;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::env;
use std::error::Error;
use std::fs::{self, File};
use std::io::{self, BufReader, Read, Write};
use std::io::{self, Read, Write};
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::Mutex;
use std::time::Instant;

static HOSTS: &[&str] = &[
"aarch64-apple-darwin",
Expand Down Expand Up @@ -186,6 +183,7 @@ macro_rules! t {

struct Builder {
versions: Versions,
checksums: Checksums,
shipped_files: HashSet<String>,

input: PathBuf,
Expand Down Expand Up @@ -240,6 +238,7 @@ fn main() {

Builder {
versions: Versions::new(&channel, &input).unwrap(),
checksums: Checksums::new(),
shipped_files: HashSet::new(),

input,
Expand Down Expand Up @@ -321,7 +320,7 @@ impl Builder {
self.add_renames_to(&mut manifest);
manifest.pkg.insert("rust".to_string(), self.rust_package(&manifest));

self.fill_missing_hashes(&mut manifest);
self.checksums.fill_missing_checksums(&mut manifest);

manifest
}
Expand Down Expand Up @@ -595,41 +594,6 @@ impl Builder {
assert!(t!(child.wait()).success());
}

fn fill_missing_hashes(&self, manifest: &mut Manifest) {
// First collect all files that need hashes
let mut need_hashes = HashSet::new();
crate::manifest::visit_file_hashes(manifest, |file_hash| {
if let FileHash::Missing(path) = file_hash {
need_hashes.insert(path.clone());
}
});

let collected = Mutex::new(HashMap::new());
let collection_start = Instant::now();
println!(
"collecting hashes for {} tarballs across {} threads",
need_hashes.len(),
rayon::current_num_threads().min(need_hashes.len()),
);
need_hashes.par_iter().for_each(|path| match fetch_hash(path) {
Ok(hash) => {
collected.lock().unwrap().insert(path, hash);
}
Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
});
let collected = collected.into_inner().unwrap();
println!("collected {} hashes in {:.2?}", collected.len(), collection_start.elapsed());

crate::manifest::visit_file_hashes(manifest, |file_hash| {
if let FileHash::Missing(path) = file_hash {
match collected.get(path) {
Some(hash) => *file_hash = FileHash::Present(hash.clone()),
None => panic!("missing hash for file {}", path.display()),
}
}
})
}

fn write_channel_files(&mut self, channel_name: &str, manifest: &Manifest) {
self.write(&toml::to_string(&manifest).unwrap(), channel_name, ".toml");
self.write(&manifest.date, channel_name, "-date.txt");
Expand Down Expand Up @@ -660,10 +624,3 @@ impl Builder {
t!(std::fs::write(path, content.as_bytes()));
}
}

fn fetch_hash(path: &Path) -> Result<String, Box<dyn Error>> {
let mut file = BufReader::new(File::open(path)?);
let mut sha256 = sha2::Sha256::default();
std::io::copy(&mut file, &mut sha256)?;
Ok(hex::encode(sha256.finalize()))
}

0 comments on commit 0cf3ce4

Please sign in to comment.