From da8a99c456ca62c7c75787ca381f7a616091105d Mon Sep 17 00:00:00 2001 From: Camille Mougey Date: Wed, 26 Apr 2023 15:44:16 +0200 Subject: [PATCH 1/8] Bench: avoid code duplication --- mla/benches/bench_archive.rs | 112 ++++++++++++++--------------------- 1 file changed, 45 insertions(+), 67 deletions(-) diff --git a/mla/benches/bench_archive.rs b/mla/benches/bench_archive.rs index ba9ee18..2628622 100644 --- a/mla/benches/bench_archive.rs +++ b/mla/benches/bench_archive.rs @@ -23,6 +23,48 @@ const MB: usize = 1024 * KB; const SIZE_LIST: [usize; 5] = [KB, 16 * KB, 128 * KB, MB, 4 * MB]; const SAMPLE_SIZE_SMALL: usize = 10; +/// Build an archive with `iters` files of `size` bytes each and `layers` enabled +/// +/// Files names are `file_{i}` +fn build_archive<'a>( + iters: u64, + size: u64, + layers: Layers, +) -> ArchiveReader<'a, io::Cursor>> { + // Setup + let mut rng = ChaChaRng::seed_from_u64(0); + let mut bytes = [0u8; 32]; + rng.fill_bytes(&mut bytes); + let key = StaticSecret::from(bytes); + let file = Vec::new(); + + // Create the initial archive with `iters` files of `size` bytes + let mut config = ArchiveWriterConfig::new(); + config + .enable_layer(layers) + .add_public_keys(&[PublicKey::from(&key)]); + let mut mla = ArchiveWriter::from_config(file, config).expect("Writer init failed"); + for i in 0..iters { + let data: Vec = Alphanumeric + .sample_iter(&mut rng) + .take(size as usize) + .collect(); + let id = mla.start_file(&format!("file_{i}")).unwrap(); + mla.append_file_content(id, data.len() as u64, data.as_slice()) + .unwrap(); + mla.end_file(id).unwrap(); + } + mla.finalize().unwrap(); + + // Instantiate the reader + let dest = mla.into_raw(); + let buf = Cursor::new(dest); + let mut config = ArchiveReaderConfig::new(); + config.add_private_keys(std::slice::from_ref(&key)); + ArchiveReader::from_config(buf, config).unwrap() +} + + /// Benchmark with all layers' permutations different block size /// /// The archive is not reset between iterations, only between benchs. @@ -122,37 +164,11 @@ pub fn multiple_compression_quality(c: &mut Criterion) { /// creation nor file getting fn iter_decompress(iters: u64, size: u64, layers: Layers) -> Duration { // Prepare data - let mut rng = ChaChaRng::seed_from_u64(0); - let mut bytes = [0u8; 32]; - rng.fill_bytes(&mut bytes); - let key = StaticSecret::from(bytes); - let data: Vec = Alphanumeric - .sample_iter(&mut rng) - .take((size * iters) as usize) - .collect(); - - // Create an archive with one file - let file = Vec::new(); - let mut config = ArchiveWriterConfig::new(); - config - .enable_layer(layers) - .add_public_keys(&[PublicKey::from(&key)]); - let mut mla = ArchiveWriter::from_config(file, config).expect("Writer init failed"); - let id = mla.start_file("file").unwrap(); - mla.append_file_content(id, data.len() as u64, data.as_slice()) - .unwrap(); - mla.end_file(id).unwrap(); - mla.finalize().unwrap(); - - // Prepare the reader - let dest = mla.into_raw(); - let buf = Cursor::new(dest.as_slice()); - let mut config = ArchiveReaderConfig::new(); - config.add_private_keys(std::slice::from_ref(&key)); - let mut mla_read = ArchiveReader::from_config(buf, config).unwrap(); + let mut mla_read = build_archive(1, size * iters, layers); // Get the file (costly as `seek` are implied) - let subfile = mla_read.get_file("file".to_string()).unwrap().unwrap(); + let subfile = mla_read.get_file("file_0".to_string()).unwrap() + .unwrap(); // Read iters * size bytes let start = Instant::now(); @@ -187,44 +203,6 @@ pub fn multiple_layers_multiple_block_size_decompress(c: &mut Criterion) { group.finish(); } -fn build_archive<'a>( - iters: u64, - size: u64, - layers: Layers, -) -> ArchiveReader<'a, io::Cursor>> { - // Setup - let mut rng = ChaChaRng::seed_from_u64(0); - let mut bytes = [0u8; 32]; - rng.fill_bytes(&mut bytes); - let key = StaticSecret::from(bytes); - let file = Vec::new(); - - // Create the initial archive with `iters` files of `size` bytes - let mut config = ArchiveWriterConfig::new(); - config - .enable_layer(layers) - .add_public_keys(&[PublicKey::from(&key)]); - let mut mla = ArchiveWriter::from_config(file, config).expect("Writer init failed"); - for i in 0..iters { - let data: Vec = Alphanumeric - .sample_iter(&mut rng) - .take(size as usize) - .collect(); - let id = mla.start_file(&format!("file_{i}")).unwrap(); - mla.append_file_content(id, data.len() as u64, data.as_slice()) - .unwrap(); - mla.end_file(id).unwrap(); - } - mla.finalize().unwrap(); - - // Instantiate the reader - let dest = mla.into_raw(); - let buf = Cursor::new(dest); - let mut config = ArchiveReaderConfig::new(); - config.add_private_keys(std::slice::from_ref(&key)); - ArchiveReader::from_config(buf, config).unwrap() -} - /// Create an archive with a `iters` files of `size` bytes using `layers` and /// measure the time needed to read them (in a random order) /// From b0f6e44e9c2c009b66865c4e26da4fbd6d4a2aff Mon Sep 17 00:00:00 2001 From: Camille Mougey Date: Wed, 26 Apr 2023 15:52:40 +0200 Subject: [PATCH 2/8] Bench: reduce the sizes considered --- mla/benches/bench_archive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mla/benches/bench_archive.rs b/mla/benches/bench_archive.rs index 2628622..0b20edd 100644 --- a/mla/benches/bench_archive.rs +++ b/mla/benches/bench_archive.rs @@ -20,7 +20,7 @@ use x25519_dalek::{PublicKey, StaticSecret}; const KB: usize = 1024; const MB: usize = 1024 * KB; -const SIZE_LIST: [usize; 5] = [KB, 16 * KB, 128 * KB, MB, 4 * MB]; +const SIZE_LIST: [usize; 4] = [KB, 64 * KB, MB, 16 * MB]; const SAMPLE_SIZE_SMALL: usize = 10; /// Build an archive with `iters` files of `size` bytes each and `layers` enabled From 86ffd68fd97b50e7d446fd1b113525d871bf4d69 Mon Sep 17 00:00:00 2001 From: Camille Mougey Date: Wed, 26 Apr 2023 16:03:55 +0200 Subject: [PATCH 3/8] Benches: use a compile time Layers list --- mla/benches/bench_archive.rs | 38 +++++++++++------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/mla/benches/bench_archive.rs b/mla/benches/bench_archive.rs index 0b20edd..c8b70ff 100644 --- a/mla/benches/bench_archive.rs +++ b/mla/benches/bench_archive.rs @@ -22,6 +22,12 @@ const MB: usize = 1024 * KB; const SIZE_LIST: [usize; 4] = [KB, 64 * KB, MB, 16 * MB]; const SAMPLE_SIZE_SMALL: usize = 10; +const LAYERS_POSSIBILITIES: [Layers; 4] = [ + Layers::EMPTY, + Layers::COMPRESS, + Layers::ENCRYPT, + Layers::COMPRESS.union(Layers::ENCRYPT), +]; /// Build an archive with `iters` files of `size` bytes each and `layers` enabled /// @@ -64,7 +70,6 @@ fn build_archive<'a>( ArchiveReader::from_config(buf, config).unwrap() } - /// Benchmark with all layers' permutations different block size /// /// The archive is not reset between iterations, only between benchs. @@ -89,12 +94,7 @@ pub fn multiple_layers_multiple_block_size(c: &mut Criterion) { let data: Vec = Alphanumeric.sample_iter(&mut rng).take(*size).collect(); - for layers in &[ - Layers::EMPTY, - Layers::COMPRESS, - Layers::ENCRYPT, - Layers::COMPRESS | Layers::ENCRYPT, - ] { + for layers in &LAYERS_POSSIBILITIES { // Create an archive let file = Vec::new(); let mut config = ArchiveWriterConfig::new(); @@ -167,8 +167,7 @@ fn iter_decompress(iters: u64, size: u64, layers: Layers) -> Duration { let mut mla_read = build_archive(1, size * iters, layers); // Get the file (costly as `seek` are implied) - let subfile = mla_read.get_file("file_0".to_string()).unwrap() - .unwrap(); + let subfile = mla_read.get_file("file_0".to_string()).unwrap().unwrap(); // Read iters * size bytes let start = Instant::now(); @@ -188,12 +187,7 @@ pub fn multiple_layers_multiple_block_size_decompress(c: &mut Criterion) { for size in SIZE_LIST.iter() { group.throughput(Throughput::Bytes(*size as u64)); - for layers in &[ - Layers::EMPTY, - Layers::COMPRESS, - Layers::ENCRYPT, - Layers::COMPRESS | Layers::ENCRYPT, - ] { + for layers in &LAYERS_POSSIBILITIES { group.bench_function( BenchmarkId::new(format!("Layers {layers:?}"), size), move |b| b.iter_custom(|iters| iter_decompress(iters, *size as u64, *layers)), @@ -238,12 +232,7 @@ pub fn multiple_layers_multiple_block_size_decompress_multifiles_random(c: &mut for size in [MB, 2 * MB, 4 * MB, 16 * MB].iter() { group.throughput(Throughput::Bytes(*size as u64)); - for layers in &[ - Layers::EMPTY, - Layers::COMPRESS, - Layers::ENCRYPT, - Layers::COMPRESS | Layers::ENCRYPT, - ] { + for layers in &LAYERS_POSSIBILITIES { group.bench_function( BenchmarkId::new(format!("Layers {layers:?}"), size), move |b| { @@ -290,12 +279,7 @@ pub fn linear_vs_normal_extract(c: &mut Criterion) { for size in [MB, 2 * MB, 4 * MB, 16 * MB].iter() { group.throughput(Throughput::Bytes(*size as u64)); - for layers in &[ - Layers::EMPTY, - Layers::COMPRESS, - Layers::ENCRYPT, - Layers::COMPRESS | Layers::ENCRYPT, - ] { + for layers in &LAYERS_POSSIBILITIES { group.bench_function( BenchmarkId::new(format!("NORMAL / Layers {layers:?}"), size), move |b| { From d9f59cbb699b8295c0fed8e6b8b4b4e35007985d Mon Sep 17 00:00:00 2001 From: Camille Mougey Date: Wed, 26 Apr 2023 16:11:19 +0200 Subject: [PATCH 4/8] Benches: basic cleanup --- mla/benches/bench_archive.rs | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/mla/benches/bench_archive.rs b/mla/benches/bench_archive.rs index c8b70ff..51ea710 100644 --- a/mla/benches/bench_archive.rs +++ b/mla/benches/bench_archive.rs @@ -162,7 +162,7 @@ pub fn multiple_compression_quality(c: &mut Criterion) { /// /// This function is used to measure only the read time without the cost of /// creation nor file getting -fn iter_decompress(iters: u64, size: u64, layers: Layers) -> Duration { +fn read_one_file_by_chunk(iters: u64, size: u64, layers: Layers) -> Duration { // Prepare data let mut mla_read = build_archive(1, size * iters, layers); @@ -190,7 +190,9 @@ pub fn multiple_layers_multiple_block_size_decompress(c: &mut Criterion) { for layers in &LAYERS_POSSIBILITIES { group.bench_function( BenchmarkId::new(format!("Layers {layers:?}"), size), - move |b| b.iter_custom(|iters| iter_decompress(iters, *size as u64, *layers)), + move |b| { + b.iter_custom(|iters| read_one_file_by_chunk(iters, *size as u64, *layers)) + }, ); } } @@ -202,7 +204,7 @@ pub fn multiple_layers_multiple_block_size_decompress(c: &mut Criterion) { /// /// This function is used to measure only the get_file + read time without the /// cost of archive creation -fn iter_decompress_multifiles_random(iters: u64, size: u64, layers: Layers) -> Duration { +fn iter_read_multifiles_random(iters: u64, size: u64, layers: Layers) -> Duration { let mut mla_read = build_archive(iters, size, layers); let mut rng = ChaChaRng::seed_from_u64(0); @@ -223,22 +225,17 @@ fn iter_decompress_multifiles_random(iters: u64, size: u64, layers: Layers) -> D /// /// This pattern should represent one of the common use of the library pub fn multiple_layers_multiple_block_size_decompress_multifiles_random(c: &mut Criterion) { - static KB: usize = 1024; - static MB: usize = 1024 * KB; - let mut group = c.benchmark_group("chunk_size_decompress_mutilfiles_random"); // Reduce the number of sample to avoid taking too much time group.sample_size(SAMPLE_SIZE_SMALL); - for size in [MB, 2 * MB, 4 * MB, 16 * MB].iter() { + for size in [MB, 4 * MB, 16 * MB].iter() { group.throughput(Throughput::Bytes(*size as u64)); for layers in &LAYERS_POSSIBILITIES { group.bench_function( BenchmarkId::new(format!("Layers {layers:?}"), size), move |b| { - b.iter_custom(|iters| { - iter_decompress_multifiles_random(iters, *size as u64, *layers) - }) + b.iter_custom(|iters| iter_read_multifiles_random(iters, *size as u64, *layers)) }, ); } @@ -270,22 +267,17 @@ fn iter_decompress_multifiles_linear(iters: u64, size: u64, layers: Layers) -> D /// The full extraction is a common pattern of use of the library. This /// benchmark helps measuring the gain of using `linear_extract`. pub fn linear_vs_normal_extract(c: &mut Criterion) { - static KB: usize = 1024; - static MB: usize = 1024 * KB; - let mut group = c.benchmark_group("linear_vs_normal_extract"); // Reduce the number of sample to avoid taking too much time group.sample_size(SAMPLE_SIZE_SMALL); - for size in [MB, 2 * MB, 4 * MB, 16 * MB].iter() { + for size in [MB, 4 * MB, 16 * MB].iter() { group.throughput(Throughput::Bytes(*size as u64)); for layers in &LAYERS_POSSIBILITIES { group.bench_function( BenchmarkId::new(format!("NORMAL / Layers {layers:?}"), size), move |b| { - b.iter_custom(|iters| { - iter_decompress_multifiles_random(iters, *size as u64, *layers) - }) + b.iter_custom(|iters| iter_read_multifiles_random(iters, *size as u64, *layers)) }, ); group.bench_function( From c87ed6332427e160988c16b0c67505d68332ae7a Mon Sep 17 00:00:00 2001 From: Camille Mougey Date: Wed, 26 Apr 2023 16:18:38 +0200 Subject: [PATCH 5/8] Benches: clearer names and avoid no-more-needed tests --- mla/benches/bench_archive.rs | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/mla/benches/bench_archive.rs b/mla/benches/bench_archive.rs index 51ea710..88e7395 100644 --- a/mla/benches/bench_archive.rs +++ b/mla/benches/bench_archive.rs @@ -30,7 +30,7 @@ const LAYERS_POSSIBILITIES: [Layers; 4] = [ ]; /// Build an archive with `iters` files of `size` bytes each and `layers` enabled -/// +/// /// Files names are `file_{i}` fn build_archive<'a>( iters: u64, @@ -78,7 +78,7 @@ fn build_archive<'a>( /// enough samples it ends as outliers /// /// Big blocks (> 4MB) are also use to force the use of several blocks inside boundaries -pub fn multiple_layers_multiple_block_size(c: &mut Criterion) { +pub fn writer_multiple_layers_multiple_block_size(c: &mut Criterion) { // Setup // Use a deterministic RNG in tests, for reproductability. DO NOT DO THIS IS IN ANY RELEASED BINARY! let mut rng = ChaChaRng::seed_from_u64(0); @@ -86,7 +86,7 @@ pub fn multiple_layers_multiple_block_size(c: &mut Criterion) { rng.fill_bytes(&mut bytes); let key = StaticSecret::from(bytes); - let mut group = c.benchmark_group("multiple_layers_multiple_block_size"); + let mut group = c.benchmark_group("writer_multiple_layers_multiple_block_size"); group.measurement_time(Duration::from_secs(10)); group.sample_size(SAMPLE_SIZE_SMALL); for size in SIZE_LIST.iter() { @@ -179,8 +179,8 @@ fn read_one_file_by_chunk(iters: u64, size: u64, layers: Layers) -> Duration { } /// Benchmark the read speed depending on layers enabled and read size -pub fn multiple_layers_multiple_block_size_decompress(c: &mut Criterion) { - let mut group = c.benchmark_group("multiple_layers_multiple_block_size_decompress"); +pub fn reader_multiple_layers_multiple_block_size(c: &mut Criterion) { + let mut group = c.benchmark_group("reader_multiple_layers_multiple_block_size"); // Reduce the number of sample to avoid taking too much time group.sample_size(SAMPLE_SIZE_SMALL); @@ -224,7 +224,7 @@ fn iter_read_multifiles_random(iters: u64, size: u64, layers: Layers) -> Duratio /// This benchmark measures the time needed to randomly pick a file and read it /// /// This pattern should represent one of the common use of the library -pub fn multiple_layers_multiple_block_size_decompress_multifiles_random(c: &mut Criterion) { +pub fn reader_multiple_layers_multiple_block_size_multifiles_random(c: &mut Criterion) { let mut group = c.benchmark_group("chunk_size_decompress_mutilfiles_random"); // Reduce the number of sample to avoid taking too much time group.sample_size(SAMPLE_SIZE_SMALL); @@ -266,8 +266,8 @@ fn iter_decompress_multifiles_linear(iters: u64, size: u64, layers: Layers) -> D /// /// The full extraction is a common pattern of use of the library. This /// benchmark helps measuring the gain of using `linear_extract`. -pub fn linear_vs_normal_extract(c: &mut Criterion) { - let mut group = c.benchmark_group("linear_vs_normal_extract"); +pub fn reader_linear_vs_random_extract(c: &mut Criterion) { + let mut group = c.benchmark_group("reader_linear_vs_random_extract"); // Reduce the number of sample to avoid taking too much time group.sample_size(SAMPLE_SIZE_SMALL); for size in [MB, 4 * MB, 16 * MB].iter() { @@ -295,10 +295,15 @@ pub fn linear_vs_normal_extract(c: &mut Criterion) { criterion_group!( benches, - multiple_layers_multiple_block_size, - multiple_compression_quality, - multiple_layers_multiple_block_size_decompress, - multiple_layers_multiple_block_size_decompress_multifiles_random, - linear_vs_normal_extract, + writer_multiple_layers_multiple_block_size, + reader_multiple_layers_multiple_block_size, + reader_linear_vs_random_extract, + // Was used to determine the best default compression quality ratio + // + // multiple_compression_quality, + // + // Commented as these tests are already made in `reader_linear_vs_random_extract` + // + // reader_multiple_layers_multiple_block_size_multifiles_random, ); criterion_main!(benches); From 77c8481d3283e47b64c72d17dd2c6f049eda5914 Mon Sep 17 00:00:00 2001 From: Camille Mougey Date: Wed, 26 Apr 2023 16:28:12 +0200 Subject: [PATCH 6/8] Benches: explode reader_linear_vs_random_extract in two distinct --- mla/benches/bench_archive.rs | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/mla/benches/bench_archive.rs b/mla/benches/bench_archive.rs index 88e7395..4574799 100644 --- a/mla/benches/bench_archive.rs +++ b/mla/benches/bench_archive.rs @@ -261,25 +261,19 @@ fn iter_decompress_multifiles_linear(iters: u64, size: u64, layers: Layers) -> D start.elapsed() } -/// This benchmark measures the time needed to compare the extraction time -/// between the "randomly pick" and "linear extraction" +/// This benchmark measures the time needed in a "linear extraction" +/// It can be compared to the "randomly pick" extraction /// /// The full extraction is a common pattern of use of the library. This /// benchmark helps measuring the gain of using `linear_extract`. -pub fn reader_linear_vs_random_extract(c: &mut Criterion) { - let mut group = c.benchmark_group("reader_linear_vs_random_extract"); +pub fn reader_multiple_layers_multiple_block_size_multifiles_linear(c: &mut Criterion) { + let mut group = c.benchmark_group("reader_multiple_layers_multiple_block_size_multifiles_linear"); // Reduce the number of sample to avoid taking too much time group.sample_size(SAMPLE_SIZE_SMALL); for size in [MB, 4 * MB, 16 * MB].iter() { group.throughput(Throughput::Bytes(*size as u64)); for layers in &LAYERS_POSSIBILITIES { - group.bench_function( - BenchmarkId::new(format!("NORMAL / Layers {layers:?}"), size), - move |b| { - b.iter_custom(|iters| iter_read_multifiles_random(iters, *size as u64, *layers)) - }, - ); group.bench_function( BenchmarkId::new(format!("LINEAR / Layers {layers:?}"), size), move |b| { @@ -297,13 +291,10 @@ criterion_group!( benches, writer_multiple_layers_multiple_block_size, reader_multiple_layers_multiple_block_size, - reader_linear_vs_random_extract, + reader_multiple_layers_multiple_block_size_multifiles_random, + reader_multiple_layers_multiple_block_size_multifiles_linear, // Was used to determine the best default compression quality ratio // // multiple_compression_quality, - // - // Commented as these tests are already made in `reader_linear_vs_random_extract` - // - // reader_multiple_layers_multiple_block_size_multifiles_random, ); criterion_main!(benches); From f65ca2ad9f2463c519827be1ce4076ec777ae144 Mon Sep 17 00:00:00 2001 From: Camille Mougey Date: Wed, 26 Apr 2023 16:29:24 +0200 Subject: [PATCH 7/8] Benches: use the common SIZE list --- mla/benches/bench_archive.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mla/benches/bench_archive.rs b/mla/benches/bench_archive.rs index 4574799..975800d 100644 --- a/mla/benches/bench_archive.rs +++ b/mla/benches/bench_archive.rs @@ -228,7 +228,7 @@ pub fn reader_multiple_layers_multiple_block_size_multifiles_random(c: &mut Crit let mut group = c.benchmark_group("chunk_size_decompress_mutilfiles_random"); // Reduce the number of sample to avoid taking too much time group.sample_size(SAMPLE_SIZE_SMALL); - for size in [MB, 4 * MB, 16 * MB].iter() { + for size in SIZE_LIST.iter() { group.throughput(Throughput::Bytes(*size as u64)); for layers in &LAYERS_POSSIBILITIES { @@ -270,7 +270,7 @@ pub fn reader_multiple_layers_multiple_block_size_multifiles_linear(c: &mut Crit let mut group = c.benchmark_group("reader_multiple_layers_multiple_block_size_multifiles_linear"); // Reduce the number of sample to avoid taking too much time group.sample_size(SAMPLE_SIZE_SMALL); - for size in [MB, 4 * MB, 16 * MB].iter() { + for size in SIZE_LIST.iter() { group.throughput(Throughput::Bytes(*size as u64)); for layers in &LAYERS_POSSIBILITIES { From 49538afa91df6776639aaa54b5af6c306da7c7fb Mon Sep 17 00:00:00 2001 From: Camille Mougey Date: Wed, 26 Apr 2023 16:34:15 +0200 Subject: [PATCH 8/8] CI: add a criterion-based result for new PR --- .github/workflows/bench.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/workflows/bench.yml diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml new file mode 100644 index 0000000..6081355 --- /dev/null +++ b/.github/workflows/bench.yml @@ -0,0 +1,17 @@ +name: Benchmark PR + +on: + pull_request: + branches: [ master ] + +jobs: + runBenchmark: + name: Criterion benchmark + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: boa-dev/criterion-compare-action@v3 + with: + branchName: ${{ github.base_ref }} + benchName: "bench_archive" + token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file