Skip to content

Commit

Permalink
feat: rayon bring us back to that 36% performance improvement we had
Browse files Browse the repository at this point in the history
originally observed, and dramatically simplifies our code
  • Loading branch information
Gnarus-G committed May 6, 2023
1 parent 31e454c commit d348803
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 104 deletions.
10 changes: 5 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ clap = { version = "3.2.6", features = ["derive"] }
regex = "1.5.6"
mrp = { path = "./mrp/" }
glob = "0.3.1"
num_cpus = "1.15.0"
stderrlog = "0.5.4"
log = "0.4.17"
rayon = "1.7.0"

[dev-dependencies]
criterion = "0.4"
Expand Down
4 changes: 2 additions & 2 deletions benches/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ time ../target/release/rn simple "g-(g:int)-a-(a:int)-al-(al:int)->artist-(a)-al
rm -r files;

echo;
echo "--- rename simple in parallel";
echo "--- rename simple";
echo "setting up ~1.5M files..."
mkdir files;
mkdir files1;
Expand Down Expand Up @@ -77,6 +77,6 @@ touch files25/g-{0001..0038}-a-{0001..0038}-al-{0001..0038}; # ~54K files
touch files26/g-{0001..0038}-a-{0001..0038}-al-{0001..0038}; # ~54K files
touch files27/g-{0001..0038}-a-{0001..0038}-al-{0001..0038}; # ~54K files
echo "running..."
time ../target/release/rn simple "g-(g:int)-a-(a:int)-al-(al:int)->artist-(a)-album-(al)-genre-(g)" --multi --glob "files*/g*";
time ../target/release/rn simple "g-(g:int)-a-(a:int)-al-(al:int)->artist-(a)-album-(al)-genre-(g)" --glob "files*/g*";
rm -r files;
rm -r files*;
55 changes: 54 additions & 1 deletion benches/bulk_renames.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use rayon::prelude::*;
use std::{path::PathBuf, str::FromStr};

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
Expand Down Expand Up @@ -39,5 +40,57 @@ fn renaming_files(c: &mut Criterion) {
}
}

criterion_group!(benches, renaming_files);
fn comparing_rayon_and_single_threaded(c: &mut Criterion) {
let renamer = get_renamer();
let mut group = c.benchmark_group("rayon vs serial with a few files");
group.sample_size(10);

#[derive(Debug, Clone, Copy)]
enum VS {
Serial,
Rayon,
}

for size in [
(2, VS::Serial),
(2, VS::Rayon),
(20, VS::Serial),
(20, VS::Rayon),
(200, VS::Serial),
(200, VS::Rayon),
(20000, VS::Serial),
(20000, VS::Rayon),
]
.iter()
{
let files = create_file_paths(size.0);
group.throughput(criterion::Throughput::Elements(size.0 as u64));

group.bench_with_input(
BenchmarkId::from_parameter(format!("{} with {:?}", size.0, size.1)),
&(files, size.1),
|b, (files, choice)| match choice {
VS::Serial => {
b.iter(|| {
files.iter().filter_map(|p| p.to_str()).for_each(|name| {
renamer.apply(name);
});
});
}
VS::Rayon => {
b.iter(|| {
files
.par_iter()
.filter_map(|p| p.to_str())
.for_each(|name| {
renamer.apply(name);
});
});
}
},
);
}
}

criterion_group!(benches, renaming_files, comparing_rayon_and_single_threaded);
criterion_main!(benches);
108 changes: 19 additions & 89 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,109 +1,39 @@
use std::{path::PathBuf, thread};
use std::path::PathBuf;

use log::*;
use mrp::MatchAndReplaceStrategy;
use rayon::prelude::*;

#[derive(Default)]
pub struct BulkRenameOptions {
pub no_rename: bool,
pub multi: bool,
}

pub fn in_bulk<'p: 'r, 'r, R: MatchAndReplaceStrategy<'r> + std::marker::Sync>(
paths: &'p [PathBuf],
rename: &R,
options: &BulkRenameOptions,
) {
if paths.is_empty() {
return;
}

if options.multi {
let thread_count = num_cpus::get();

if thread_count > paths.len() {
warn!("there are more threads that files to rename, so single threaded it is");
} else if thread_count * 500 > paths.len() {
warn!("probably too few files to warrant multithreading, but here we go...");
return in_bulk_multithreaded(paths, rename, thread_count, options.no_rename);
} else {
return in_bulk_multithreaded(paths, rename, thread_count, options.no_rename);
}
}
return in_bulk_single_thread(paths, rename, options.no_rename);
}

fn in_bulk_single_thread<'p: 'r, 'r, R: MatchAndReplaceStrategy<'r>>(
paths: &'p [PathBuf],
rename: &R,
no_rename: bool,
) {
let values = paths.iter().filter_map(|p| {
let str = p.to_str();

if str.is_none() {
error!("Path is invalid unicode: {:?}", p);
}

return str;
});

for from in values {
if let Some(to) = rename.apply(from) {
if no_rename {
paths
.par_iter()
.filter_map(|p| {
let path_string = p.to_str();

if path_string.is_none() {
error!("Path is invalid unicode: {:?}", p);
}

return match path_string {
Some(s) => rename.apply(s).map(|renamed| (s, renamed)),
None => None,
};
})
.for_each(|(from, to)| {
if options.no_rename {
println!("{:?} -> {:?}", from, to);
} else {
if let Err(err) = std::fs::rename(from, to.to_string()) {
error!("{:?}: {}", from, err);
}
};
}
}
}

fn in_bulk_multithreaded<'p: 'r, 'r, R: MatchAndReplaceStrategy<'r> + std::marker::Sync>(
paths: &'p [PathBuf],
rename: &R,
thread_count: usize,
no_rename: bool,
) {
debug!("found {} threads available on this machine", thread_count);
let max_chunk_size = paths.len() / (thread_count - 1);

debug!(
"chunking work, to handle {} files in each of {} threads",
max_chunk_size, thread_count
);

let chunks = paths.chunks(max_chunk_size);

thread::scope(|s| {
let mut join_handles = vec![];

for (id, path_chunk) in chunks.enumerate() {
if let Ok(handle) = thread::Builder::new().spawn_scoped(s, || {
in_bulk_single_thread(path_chunk, rename, no_rename);
}) {
debug!(
"spawned thread {} with {} file to rename",
id,
path_chunk.len()
);
join_handles.push(handle);
} else {
error!(
"failed to spawn thread {}, renaming the next {} files in the main thread",
id,
path_chunk.len()
);
in_bulk_single_thread(path_chunk, rename, no_rename);
};
}

for handle in join_handles {
handle
.join()
.expect("Couldn't join on the associated thread")
}
})
})
}
7 changes: 1 addition & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ struct RenameArgs {
#[clap(global = true, long, conflicts_with = "paths")]
glob: Option<String>,

/// Enable multi-threading to process more files at a time
#[clap(global = true, short, long = "multi")]
multithreading: bool,

/// Prevent diagnostic logging
#[clap(global = true, short, long)]
quiet: bool,
Expand Down Expand Up @@ -65,15 +61,14 @@ fn main() -> ExitCode {

let options = &rename::BulkRenameOptions {
no_rename: base_args.dry_run,
multi: base_args.multithreading,
};

match base_args.command {
Command::REGEX(args) => rename::in_bulk(&paths, &args, options),
Command::SIMPLE(args) => {
let mut replacer = MatchAndReplacer::new(args.expression);
replacer.set_strip(args.strip);
rename::in_bulk(&paths, &mut replacer, options);
rename::in_bulk(&paths, &replacer, options);
}
};

Expand Down

0 comments on commit d348803

Please sign in to comment.