From 8afa3f3c5cadf6bfa4c69908ab86ceb4d477d29f Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Sun, 14 Apr 2024 12:19:19 +0200 Subject: [PATCH] Added benchmarks for memory and time requirements relative to building corpus --- benchmarks/README.md | 2 +- benchmarks/src/main.rs | 31 ++++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 44d7209..53443c1 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -249,7 +249,7 @@ In the new edition we also provide a parallel version, which has the same memory | OLD | 5 | 20,336 | 9,583,720,360 | | NEW | 6 | 3,893,922 | 615,458,920 | | NEWPAR | 6 | 163,489 | 615,458,920 | -| OLD | 6 | 22,206 | 10,211,711,214| +| OLD | 6 | 22,206 | 10,211,711,214|ments relative to building corpus) ## Benchmarks 5 April 2024, 08:00 PM The sixth benchmark was run on a 6-core machine with 32 GBs of RAM. We loaded the entirety of the taxons dataset into memory. diff --git a/benchmarks/src/main.rs b/benchmarks/src/main.rs index f69bca7..709bd11 100644 --- a/benchmarks/src/main.rs +++ b/benchmarks/src/main.rs @@ -10,7 +10,6 @@ use core::fmt::Debug; use mem_dbg::*; use ngrammatic::prelude::*; use rayon::prelude::*; -use sux::dict::rear_coded_list::{RearCodedList, RearCodedListBuilder}; /// Returns an iterator over the taxons in the corpus. fn iter_taxons() -> impl Iterator { @@ -129,6 +128,10 @@ fn load_corpus_old(arity: usize) -> ngrammatic_old::Corpus { corpus } +/// We allow dead code here because the version of the +/// webgraph crate that is necessary for this benchmark +/// is currently in nightly. +#[allow(dead_code)] fn load_corpus_webgraph() where NG: Ngram + Debug, @@ -156,6 +159,10 @@ where ); } +/// We allow dead code here because the version of the +/// webgraph crate that is necessary for this benchmark +/// is currently in nightly. +#[allow(dead_code)] fn load_corpus_rcl_webgraph() where NG: Ngram + Debug, @@ -199,18 +206,12 @@ where fn main() { env_logger::builder().try_init().unwrap(); - // experiment::>(); - // experiment::>(); - // experiment::>(); - // experiment::>(); - // experiment::>(); - // experiment::>(); - // experiment::>(); - // experiment::>(); - use ngrammatic::prelude::*; - let mut animals: Vec = iter_taxons().collect(); - - let corpus: Corpus, TriGram, Lowercase> = Corpus::par_from(animals); - - corpus.mem_dbg(DbgFlags::default() | DbgFlags::CAPACITY | DbgFlags::HUMANIZE).unwrap(); + experiment::>(); + experiment::>(); + experiment::>(); + experiment::>(); + experiment::>(); + experiment::>(); + experiment::>(); + experiment::>(); }