diff --git a/Cargo.lock b/Cargo.lock index f4cb84b..fed8919 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anes" version = "0.1.6" @@ -52,12 +61,32 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-integer", + "num-traits", + "wasm-bindgen", + "winapi", +] + [[package]] name = "ciborium" version = "0.2.0" @@ -124,6 +153,16 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + [[package]] name = "colored" version = "2.0.0" @@ -135,6 +174,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "core-foundation-sys" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" + [[package]] name = "criterion" version = "0.4.0" @@ -214,6 +259,50 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "cxx" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a140f260e6f3f79013b8bfc65e7ce630c9ab4388c6a89c71e07226f49487b72" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxx-build" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da6383f459341ea689374bf0a42979739dc421874f112ff26f829b8040b8e613" +dependencies = [ + "cc", + "codespan-reporting", + "once_cell", + "proc-macro2", + "quote", + "scratch", + "syn", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90201c1a650e95ccff1c8c0bb5a343213bdd317c6e600a93075bca2eff54ec97" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b75aed41bb2e6367cae39e6326ef817a851db13c13e4f3263714ca3cfb8de56" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.8.1" @@ -262,6 +351,30 @@ dependencies = [ "libc", ] +[[package]] +name = "iana-time-zone" +version = "0.1.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +dependencies = [ + "cxx", + "cxx-build", +] + [[package]] name = "indexmap" version = "1.9.1" @@ -308,6 +421,15 @@ version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +[[package]] +name = "link-cplusplus" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +dependencies = [ + "cc", +] + [[package]] name = "log" version = "0.4.17" @@ -341,6 +463,16 @@ dependencies = [ "regex", ] +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.15" @@ -450,9 +582,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" dependencies = [ "either", "rayon-core", @@ -460,9 +592,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.10.2" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -492,9 +624,13 @@ name = "rename" version = "0.4.0" dependencies = [ "clap", + "criterion", "glob", + "log", "mrp", + "rayon", "regex", + "stderrlog", ] [[package]] @@ -518,6 +654,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "scratch" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" + [[package]] name = "serde" version = "1.0.147" @@ -549,6 +691,19 @@ dependencies = [ "serde", ] +[[package]] +name = "stderrlog" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69a26bbf6de627d389164afa9783739b56746c6c72c4ed16539f4ff54170327b" +dependencies = [ + "atty", + "chrono", + "log", + "termcolor", + "thread_local", +] + [[package]] name = "strsim" version = "0.10.0" @@ -581,6 +736,16 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -597,6 +762,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + [[package]] name = "version_check" version = "0.9.4" @@ -708,3 +879,69 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" diff --git a/Cargo.toml b/Cargo.toml index b6a0008..bfb2d68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,16 @@ clap = { version = "3.2.6", features = ["derive"] } regex = "1.5.6" mrp = { path = "./mrp/" } glob = "0.3.1" +stderrlog = "0.5.4" +log = "0.4.17" +rayon = "1.7.0" + +[dev-dependencies] +criterion = "0.4" + +[[bench]] +name = "bulk_renames" +harness = false [workspace] members = ["mrp"] diff --git a/benchmarking/bench.sh b/benches/bench.sh similarity index 86% rename from benchmarking/bench.sh rename to benches/bench.sh index e968521..abfbfdb 100755 --- a/benchmarking/bench.sh +++ b/benches/bench.sh @@ -1,3 +1,5 @@ +cargo build --release; + echo "--- perl-rename"; echo "setting up ~54K files..." mkdir files; @@ -12,7 +14,7 @@ echo "setting up ~54K files..." mkdir files; touch files/g-{0001..0038}-a-{0001..0038}-al-{0001..0038}; # ~54K files echo "running..." -time rn simple "g-(g:int)-a-(a:int)-al-(al:int)->artist-(a)-album-(al)-genre-(g)" files/g*; +time ../target/release/rn simple "g-(g:int)-a-(a:int)-al-(al:int)->artist-(a)-album-(al)-genre-(g)" files/g*; rm -r files; echo; @@ -77,30 +79,4 @@ touch files27/g-{0001..0038}-a-{0001..0038}-al-{0001..0038}; # ~54K files echo "running..." time ../target/release/rn simple "g-(g:int)-a-(a:int)-al-(al:int)->artist-(a)-album-(al)-genre-(g)" --glob "files*/g*"; rm -r files; -rm -r files1; -rm -r files2; -rm -r files3; -rm -r files4; -rm -r files5; -rm -r files6; -rm -r files7; -rm -r files8; -rm -r files9; -rm -r files10; -rm -r files11; -rm -r files12; -rm -r files13; -rm -r files14; -rm -r files15; -rm -r files16; -rm -r files17; -rm -r files18; -rm -r files19; -rm -r files20; -rm -r files21; -rm -r files22; -rm -r files23; -rm -r files24; -rm -r files25; -rm -r files26; -rm -r files27; +rm -r files*; diff --git a/benches/bulk_renames.rs b/benches/bulk_renames.rs new file mode 100644 index 0000000..c091967 --- /dev/null +++ b/benches/bulk_renames.rs @@ -0,0 +1,96 @@ +use rayon::prelude::*; +use std::{path::PathBuf, str::FromStr}; + +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use mrp::{MatchAndReplaceStrategy, MatchAndReplacer}; + +fn get_renamer() -> MatchAndReplacer<'static> { + let expr = mrp::parser::MatchAndReplaceExpression::from_str( + "g-(g:int)-a-(a:int)-al-(al:int)->artist-(a)-album-(al)-genre-(g)", + ) + .unwrap(); + + return MatchAndReplacer::new(expr); +} + +fn create_file_paths(count: usize) -> Vec { + let paths = (0..count) + .map(|i| PathBuf::from(format!("./files/g-{i}-a-{i}-al-{i}"))) + .collect::>(); + + return paths; +} + +fn renaming_files(c: &mut Criterion) { + let renamer = get_renamer(); + let mut group = c.benchmark_group("renames"); + group.sample_size(10); + + for size in [10, 100, 1000, 10000, 100000, 1000000].iter() { + let files = create_file_paths(*size); + group.throughput(criterion::Throughput::Elements(*size as u64)); + + group.bench_with_input(BenchmarkId::from_parameter(size), &files, |b, files| { + b.iter(|| { + files.iter().filter_map(|p| p.to_str()).for_each(|name| { + renamer.apply(name); + }); + }); + }); + } +} + +fn comparing_rayon_and_single_threaded(c: &mut Criterion) { + let renamer = get_renamer(); + let mut group = c.benchmark_group("rayon vs serial with a few files"); + group.sample_size(10); + + #[derive(Debug, Clone, Copy)] + enum VS { + Serial, + Rayon, + } + + for size in [ + (2, VS::Serial), + (2, VS::Rayon), + (20, VS::Serial), + (20, VS::Rayon), + (200, VS::Serial), + (200, VS::Rayon), + (20000, VS::Serial), + (20000, VS::Rayon), + ] + .iter() + { + let files = create_file_paths(size.0); + group.throughput(criterion::Throughput::Elements(size.0 as u64)); + + group.bench_with_input( + BenchmarkId::from_parameter(format!("{} with {:?}", size.0, size.1)), + &(files, size.1), + |b, (files, choice)| match choice { + VS::Serial => { + b.iter(|| { + files.iter().filter_map(|p| p.to_str()).for_each(|name| { + renamer.apply(name); + }); + }); + } + VS::Rayon => { + b.iter(|| { + files + .par_iter() + .filter_map(|p| p.to_str()) + .for_each(|name| { + renamer.apply(name); + }); + }); + } + }, + ); + } +} + +criterion_group!(benches, renaming_files, comparing_rayon_and_single_threaded); +criterion_main!(benches); diff --git a/mrp/benches/mrp_vs_regex.rs b/mrp/benches/mrp_vs_regex.rs index 391be09..0119e4e 100644 --- a/mrp/benches/mrp_vs_regex.rs +++ b/mrp/benches/mrp_vs_regex.rs @@ -1,5 +1,7 @@ +use std::str::FromStr; + use criterion::{criterion_group, criterion_main, Criterion}; -use mrp::{parser::Parser, MatchAndReplaceStrategy, MatchAndReplacer}; +use mrp::{parser::MatchAndReplaceExpression, MatchAndReplaceStrategy, MatchAndReplacer}; use regex::Regex; const EXP: &str = "(num:int)asdf->lul(num)"; @@ -11,8 +13,8 @@ fn regex_benchmark(c: &mut Criterion) { } fn mrp_benchmark(c: &mut Criterion) { - let exp = Parser::from(EXP).parse().unwrap(); - let r = MatchAndReplacer::new(&exp); + let exp = MatchAndReplaceExpression::from_str(EXP).unwrap(); + let r = MatchAndReplacer::new(exp); c.bench_function("mrp strat", |b| { b.iter(|| { r.apply(INPUT); diff --git a/mrp/src/captures.rs b/mrp/src/captures.rs new file mode 100644 index 0000000..56bdda4 --- /dev/null +++ b/mrp/src/captures.rs @@ -0,0 +1,22 @@ +#[derive(Debug, PartialEq)] +struct Capture<'source, 'input> { + name: &'source str, + value: &'input str, +} + +#[derive(Debug, PartialEq)] +pub struct Captures<'source, 'input> { + inner: Vec>, +} + +impl<'source, 'input> Captures<'source, 'input> { + pub fn new() -> Self { + Self { inner: vec![] } + } + pub fn put(&mut self, name: &'source str, value: &'input str) { + self.inner.push(Capture { name, value }); + } + pub fn get(&self, name: &str) -> Option<&str> { + self.inner.iter().find(|c| c.name == name).map(|c| c.value) + } +} diff --git a/mrp/src/error.rs b/mrp/src/error.rs index 99aebca..d7cfc73 100644 --- a/mrp/src/error.rs +++ b/mrp/src/error.rs @@ -2,25 +2,25 @@ use colored::Colorize; use crate::lexer::{Token, TokenKind}; -pub type Result<'s, T> = std::result::Result>; +pub type Result<'source, T> = std::result::Result>; #[derive(Debug, PartialEq)] -pub enum ParseErrorKind<'t> { +pub enum ParseErrorKind<'source> { ExpectedToken { expected: TokenKind, found: TokenKind, - text: &'t str, + text: &'source str, position: usize, }, - UnsupportedToken(Token<'t>), + UnsupportedToken(Token<'source>), UnexpectedToken { unexpected: TokenKind, previous: TokenKind, position: usize, }, UndeclaredIdentifier { - ident: &'t str, - declared: Vec<&'t str>, + ident: &'source str, + declared: Vec<&'source str>, position: usize, }, } @@ -41,9 +41,9 @@ impl TokenKind { } #[derive(Debug, PartialEq)] -pub struct ParseError<'t> { - pub(crate) input: &'t str, - pub(crate) kind: ParseErrorKind<'t>, +pub struct ParseError<'source> { + pub(crate) source: &'source str, + pub(crate) kind: ParseErrorKind<'source>, } impl<'t> ParseError<'t> { @@ -63,7 +63,7 @@ impl<'t> std::fmt::Display for ParseError<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use ParseErrorKind::*; - writeln!(f, "{}", self.input.yellow())?; + writeln!(f, "\n{}", self.source.yellow())?; let location = self.error_location(); @@ -145,20 +145,22 @@ impl<'t> std::fmt::Display for ParseError<'t> { #[cfg(test)] mod tests { + use std::str::FromStr; + use super::*; - use crate::parser::Parser; + use crate::parser::MatchAndReplaceExpression; use ParseErrorKind::*; use TokenKind::*; macro_rules! assert_error { ($input:literal, $error_kind:expr) => { let input = $input; - let err = Parser::from(input).parse().unwrap_err(); + let err = MatchAndReplaceExpression::from_str(input).unwrap_err(); assert_eq!( err, ParseError { - input: $input, + source: $input, kind: $error_kind } ); diff --git a/mrp/src/lexer.rs b/mrp/src/lexer.rs index b251336..831e163 100644 --- a/mrp/src/lexer.rs +++ b/mrp/src/lexer.rs @@ -16,13 +16,13 @@ pub enum TokenKind { } #[derive(Debug, PartialEq)] -pub enum TokenText<'t> { - Slice(&'t str), +pub enum TokenText<'source> { + Slice(&'source str), Empty, } -impl<'t> Deref for TokenText<'t> { - type Target = &'t str; +impl<'source> Deref for TokenText<'source> { + type Target = &'source str; fn deref(&self) -> &Self::Target { match self { @@ -32,7 +32,7 @@ impl<'t> Deref for TokenText<'t> { } } -impl<'a> TokenText<'a> { +impl<'source> TokenText<'source> { pub fn len(&self) -> usize { match self { TokenText::Slice(s) => s.len(), @@ -41,7 +41,7 @@ impl<'a> TokenText<'a> { } } -impl<'t> Display for TokenText<'t> { +impl<'source> Display for TokenText<'source> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -55,31 +55,31 @@ impl<'t> Display for TokenText<'t> { } #[derive(Debug, PartialEq)] -pub struct Token<'t> { +pub struct Token<'source> { pub kind: TokenKind, - pub text: TokenText<'t>, + pub text: TokenText<'source>, pub start: usize, } #[derive(Debug)] -pub struct Lexer<'a> { - input: &'a [u8], +pub struct Lexer<'source> { + input: &'source [u8], position: usize, } -impl<'a> Lexer<'a> { - pub fn new(input: &'a str) -> Self { +impl<'source> Lexer<'source> { + pub fn new(input: &'source str) -> Self { Self { input: input.as_bytes(), position: 0, } } - pub fn input(&self) -> &'a str { + pub fn input(&self) -> &'source str { std::str::from_utf8(&self.input).expect("input should only contain utf-8 characters") } - fn input_slice(&self, range: Range) -> &'a str { + fn input_slice(&self, range: Range) -> &'source str { std::str::from_utf8(&self.input[range]).expect("input should only contain utf-8 characters") } @@ -134,7 +134,7 @@ impl<'a> Lexer<'a> { return (start_pos, self.position + 1); } - pub fn next_token(&mut self) -> Token<'a> { + pub fn next_token(&mut self) -> Token<'source> { let t = match self.ch() { Some(ch) => match ch { b'(' => self.char_token(TokenKind::Lparen), @@ -165,7 +165,7 @@ impl<'a> Lexer<'a> { t } - fn type_token(&mut self) -> Token<'a> { + fn type_token(&mut self) -> Token<'source> { let start = self.position; let (s, e) = self.read_while(|c| c.is_ascii_alphabetic()); let slice = self.input_slice(s..e); @@ -176,7 +176,7 @@ impl<'a> Lexer<'a> { } } - fn identifier_token(&mut self) -> Token<'a> { + fn identifier_token(&mut self) -> Token<'source> { let start = self.position; let (s, e) = self.read_while(|c| c.is_ascii_alphabetic()); let slice = self.input_slice(s..e); @@ -188,7 +188,7 @@ impl<'a> Lexer<'a> { } } - fn literal(&mut self) -> Token<'a> { + fn literal(&mut self) -> Token<'source> { let start = self.position; let (s, e) = self.read_while(|c| match c { b'(' | b')' | b':' | b'-' => false, @@ -201,7 +201,7 @@ impl<'a> Lexer<'a> { } } - fn char_token(&self, kind: TokenKind) -> Token<'a> { + fn char_token(&self, kind: TokenKind) -> Token<'source> { Token { kind, text: TokenText::Slice(self.input_slice(self.position..self.position + 1)), diff --git a/mrp/src/lib.rs b/mrp/src/lib.rs index 1a80fcc..f110510 100644 --- a/mrp/src/lib.rs +++ b/mrp/src/lib.rs @@ -1,3 +1,4 @@ +mod captures; mod error; pub mod lexer; mod matcher; @@ -5,24 +6,29 @@ pub mod parser; use std::borrow::Cow; -use parser::{AbstractReplaceExpression, MatchAndReplaceExpression}; +use parser::{AbstractReplaceExpression, MatchAndReplaceExpression, MatchExpression}; /// Representing a stragety by which to match and replace on a `string` value -pub trait MatchAndReplaceStrategy<'m> { +pub trait MatchAndReplaceStrategy<'input> { /// Match and replace - fn apply<'sf, 's: 'sf + 'm>(&'sf self, value: &'s str) -> Option>; + fn apply(&self, value: &'input str) -> Option>; } -pub struct MatchAndReplacer<'m> { - mrex: &'m MatchAndReplaceExpression<'m>, +pub struct MatchAndReplacer<'source> { + mex: MatchExpression<'source>, + exprs: Vec>, /// When true, this strategy will replace the matching range found, and strip everything else /// off. strip: bool, } -impl<'m> MatchAndReplacer<'m> { - pub fn new(mrex: &'m MatchAndReplaceExpression<'m>) -> Self { - Self { mrex, strip: false } +impl<'source> MatchAndReplacer<'source> { + pub fn new(mrex: MatchAndReplaceExpression<'source>) -> Self { + Self { + mex: mrex.mex, + exprs: mrex.rex.expressions, + strip: false, + } } pub fn set_strip(&mut self, s: bool) { @@ -30,23 +36,19 @@ impl<'m> MatchAndReplacer<'m> { } } -impl<'m> MatchAndReplaceStrategy<'m> for MatchAndReplacer<'m> { - fn apply<'sf, 's: 'sf + 'm>(&'sf self, value: &'s str) -> Option> { - match self.mrex.mex.find_at(value, 0) { - None => None, - Some(m) => { +impl<'input> MatchAndReplaceStrategy<'input> for MatchAndReplacer<'input> { + fn apply(&self, value: &'input str) -> Option> { + match self.mex.find_at_capturing(value, 0) { + (None, _) => None, + (Some(m), captures) => { let mut new = Cow::from(value); let replacement_str: String = self - .mrex - .rex - .expressions + .exprs .iter() .map(|e| match e { AbstractReplaceExpression::Literal(l) => *l, - AbstractReplaceExpression::Identifier(i) => self - .mrex - .mex - .get_capture(i) + AbstractReplaceExpression::Identifier(i) => captures + .get(i) .expect(&format!("'{i}' should have been captured")), }) .collect(); @@ -65,24 +67,28 @@ impl<'m> MatchAndReplaceStrategy<'m> for MatchAndReplacer<'m> { #[cfg(test)] mod tests { - use crate::parser::Parser; + use std::str::FromStr; use super::*; - impl<'m> MatchAndReplacer<'m> { - fn apply_all<'sf, 's: 'sf + 'm>( - &'s self, - values: Vec<&'s str>, - ) -> Vec> { - return values.iter().filter_map(|s| self.apply(s)).collect(); + impl<'source> MatchAndReplacer<'source> { + fn apply_all(&mut self, values: Vec<&'source str>) -> Vec { + let mut replaced = vec![]; + for value in values { + if let Some(v) = self.apply(value) { + replaced.push(v.to_string()) + } + } + + return replaced; } } #[test] fn one_literal_and_int_capture() { let input = "lit(num:int)->lul(num)"; - let expression = Parser::from(input).parse().unwrap(); - let strat = MatchAndReplacer::new(&expression); + let expression = MatchAndReplaceExpression::from_str(input).unwrap(); + let strat = MatchAndReplacer::new(expression); assert_eq!(strat.apply("lit12").unwrap(), "lul12"); } @@ -90,16 +96,16 @@ mod tests { #[test] fn test_mrp_application() { let input = "(num:int)asdf->lul(num)"; - let expression = Parser::from(input).parse().unwrap(); - let strat = MatchAndReplacer::new(&expression); + let expression = MatchAndReplaceExpression::from_str(input).unwrap(); + let mut strat = MatchAndReplacer::new(expression); let treated = strat.apply_all(vec!["124asdf", "3asdfwery", "lk234asdfas"]); assert_eq!(treated, vec!["lul124", "lul3wery", "lklul234as"]); - let expression = Parser::from("hello(as:dig)->oh(as)hi").parse().unwrap(); + let expression = MatchAndReplaceExpression::from_str("hello(as:dig)->oh(as)hi").unwrap(); - let strat = MatchAndReplacer::new(&expression); + let mut strat = MatchAndReplacer::new(expression); let treated = strat.apply_all(vec!["hello5", "ashello090", "hello345hello"]); @@ -108,8 +114,9 @@ mod tests { #[test] fn test_mrp_application_stripping() { - let expression = Parser::from("hello(as:dig)->oh(as)hi").parse().unwrap(); - let mut strat = MatchAndReplacer::new(&expression); + let expression = MatchAndReplaceExpression::from_str("hello(as:dig)->oh(as)hi").unwrap(); + + let mut strat = MatchAndReplacer::new(expression); strat.set_strip(true); @@ -120,8 +127,8 @@ mod tests { #[test] fn test_mrp_application_with_multi_digits_and_stripping() { - let expression = Parser::from("(n:int)->step(n)").parse().unwrap(); - let mut strat = MatchAndReplacer::new(&expression); + let expression = MatchAndReplaceExpression::from_str("(n:int)->step(n)").unwrap(); + let mut strat = MatchAndReplacer::new(expression); strat.set_strip(true); diff --git a/mrp/src/matcher.rs b/mrp/src/matcher.rs index a9a3264..68a75af 100644 --- a/mrp/src/matcher.rs +++ b/mrp/src/matcher.rs @@ -1,30 +1,36 @@ -use crate::parser::{AbstractMatchingExpression, CaptureType, MatchExpression}; +use crate::{ + captures::Captures, + parser::{AbstractMatchingExpression, CaptureType, MatchExpression}, +}; -pub struct Match<'t> { - text: &'t str, +pub struct Match<'input> { + input: &'input str, pub start: usize, pub end: usize, } -impl<'t> Match<'t> { +impl<'input> Match<'input> { pub fn as_str(&self) -> &str { - &self.text[self.start..self.end] + &self.input[self.start..self.end] } } -impl<'a> MatchExpression<'a> { - /// Find the leftmost-first match in the input starting at the given position - pub fn find_at<'t: 'a, 's: 'a>(&'s self, input: &'t str, start: usize) -> Option> { +impl<'source> MatchExpression<'source> { + pub fn find_at_capturing<'input>( + &self, + input: &'input str, + start: usize, + ) -> (Option>, Captures<'source, 'input>) { let mut curr_position = start; let mut legit_start = start; let mut state = 0; let mut capture_slice_start = None; let mut capture_candidate_found = None; - let mut captures_map = self.captures.borrow_mut(); + let mut captures = Captures::new(); while state < self.expressions.len() && curr_position < input.len() { - let e = self.expressions.get(state).unwrap(); + let e = self.get_expression(state).unwrap(); match e { AbstractMatchingExpression::Literal(literal) => { @@ -43,7 +49,7 @@ impl<'a> MatchExpression<'a> { let slice = &input[slice_range]; - let is_match = slice == *literal; + let is_match = slice == literal; if is_match { state += 1; @@ -64,7 +70,7 @@ impl<'a> MatchExpression<'a> { if ch.is_ascii_digit() { curr_position += 1; state += 1; - captures_map.insert(identifier.as_ref(), ch_str); + captures.put(identifier.as_ref(), ch_str); } else { curr_position += 1; state = 0; @@ -74,7 +80,7 @@ impl<'a> MatchExpression<'a> { let ch = input.as_bytes()[curr_position] as char; let mut capture = |start: usize, curr_position: usize| { - captures_map.insert(identifier.as_ref(), &input[start..curr_position]); + captures.put(identifier.as_ref(), &input[start..curr_position]); }; if ch.is_ascii_digit() { @@ -107,47 +113,55 @@ impl<'a> MatchExpression<'a> { } if state == self.expressions.len() { - return Some(Match { - text: input, - start: legit_start, - end: curr_position, - }); + return ( + Some(Match { + input, + start: legit_start, + end: curr_position, + }), + captures, + ); } - None + (None, captures) + } + + /// Find the leftmost-first match in the input starting at the given position + pub fn find_at<'input>(&self, input: &'input str, start: usize) -> Option> { + self.find_at_capturing(input, start).0 } - pub fn find_iter<'m: 'a, 't>(&'m self, text: &'t str) -> Matches<'t, 'm> { - Matches::new(self, text) + pub fn find_iter<'input>(self, input: &'input str) -> Matches<'input, 'source> { + Matches::new(self, input) } } #[derive(Debug)] -pub struct Matches<'t, 'm> { - pub(crate) text: &'t str, - pub(crate) mex: &'m MatchExpression<'m>, +pub struct Matches<'input, 'source> { + pub(crate) input: &'input str, + pub(crate) mex: MatchExpression<'source>, last_end: usize, } -impl<'t, 'm> Matches<'t, 'm> { - pub fn new(mex: &'m MatchExpression<'m>, text: &'t str) -> Self { +impl<'input, 'source> Matches<'input, 'source> { + pub fn new(mex: MatchExpression<'source>, input: &'input str) -> Self { Self { - text, + input, mex, last_end: 0, } } } -impl<'t: 'm, 'm> Iterator for Matches<'t, 'm> { - type Item = Match<'t>; +impl<'input, 'source> Iterator for Matches<'input, 'source> { + type Item = Match<'input>; fn next(&mut self) -> Option { - if self.last_end >= self.text.len() { + if self.last_end >= self.input.len() { return None; } - let m = match self.mex.find_at(self.text, self.last_end) { + let m = match self.mex.find_at(self.input, self.last_end) { None => return None, Some(m) => m, }; @@ -161,6 +175,8 @@ impl<'t: 'm, 'm> Iterator for Matches<'t, 'm> { #[cfg(test)] mod tests { + use std::str::FromStr; + use crate::{lexer::Lexer, parser::Parser}; use super::*; @@ -170,11 +186,11 @@ mod tests { macro_rules! assert_match_on { ($pattern:literal, $input:literal) => { let exp = Parser::new(Lexer::new($pattern)).parse_match_exp().unwrap(); - assert!(Matches::new(&exp, $input).count() > 0); + assert!(Matches::new(exp, $input).count() > 0); }; ($pattern:literal, $input:literal, $boolean:literal) => { let exp = Parser::new(Lexer::new($pattern)).parse_match_exp().unwrap(); - assert_eq!(Matches::new(&exp, $input).count() > 0, $boolean); + assert_eq!(Matches::new(exp, $input).count() > 0, $boolean); }; } @@ -194,8 +210,11 @@ mod tests { let text = "ab321love78"; assert_eq!(exp.find_at(text, 0).unwrap().as_str(), text); - assert_eq!(exp.get_capture("n").unwrap(), "321"); - assert_eq!(exp.get_capture("i").unwrap(), "78"); + + let cap = exp.find_at_capturing(text, 0).1; + + assert_eq!(cap.get("n").unwrap(), "321"); + assert_eq!(cap.get("i").unwrap(), "78"); } #[test] @@ -206,7 +225,8 @@ mod tests { let text = "aewrdigit276yoypa"; assert_eq!(exp.find_at(text, 0).unwrap().as_str(), "digit2"); - assert_eq!(exp.get_capture("d").unwrap(), "2"); + let cap = exp.find_at_capturing(text, 0).1; + assert_eq!(cap.get("d").unwrap(), "2"); } #[test] @@ -217,9 +237,10 @@ mod tests { let text = "ab321love78ly8"; assert_eq!(exp.find_at(text, 0).unwrap().as_str(), text); - assert_eq!(exp.get_capture("n").unwrap(), "321"); - assert_eq!(exp.get_capture("i").unwrap(), "78"); - assert_eq!(exp.get_capture("d").unwrap(), "8"); + let cap = exp.find_at_capturing(text, 0).1; + assert_eq!(cap.get("n").unwrap(), "321"); + assert_eq!(cap.get("i").unwrap(), "78"); + assert_eq!(cap.get("d").unwrap(), "8"); } #[test] @@ -230,24 +251,24 @@ mod tests { let text = "ab321love78ly8"; assert_eq!(exp.find_at(text, 0).unwrap().as_str(), &text[2..]); - assert_eq!(exp.get_capture("n").unwrap(), "321"); - assert_eq!(exp.get_capture("i").unwrap(), "78"); - assert_eq!(exp.get_capture("d").unwrap(), "8"); + + let cap = exp.find_at_capturing(text, 0).1; + assert_eq!(cap.get("n").unwrap(), "321"); + assert_eq!(cap.get("i").unwrap(), "78"); + assert_eq!(cap.get("d").unwrap(), "8"); } #[test] fn special() { - let mut parser = Parser::from("hello(as:dig)->oh(as)hi"); - let exp = parser.parse_match_exp().unwrap(); + let exp = MatchExpression::from_str("hello(as:dig)->oh(as)hi").unwrap(); assert_eq!(exp.find_at("ashello090", 0).unwrap().as_str(), "hello0"); } #[test] fn muliple_matches() { - let mut parser = Parser::from("xy(n:int)"); - let pattern = parser.parse_match_exp().unwrap(); + let pattern = MatchExpression::from_str("xy(n:int)").unwrap(); let text = "wxy10xy33asdfxy81"; - let mut matches = Matches::new(&pattern, text); + let mut matches = Matches::new(pattern, text); assert_eq!(matches.next().unwrap().as_str(), "xy10"); assert_eq!(matches.next().unwrap().as_str(), "xy33"); diff --git a/mrp/src/parser.rs b/mrp/src/parser.rs index 01aea96..f486fa1 100644 --- a/mrp/src/parser.rs +++ b/mrp/src/parser.rs @@ -1,94 +1,96 @@ -use std::{cell::RefCell, collections::HashMap}; +use std::str::FromStr; use crate::{ error::{ParseError, ParseErrorKind, Result}, lexer::{Lexer, Token, TokenKind}, }; -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub enum CaptureType { Int, Digit, } -#[derive(Debug, PartialEq)] -pub enum AbstractMatchingExpression<'a> { - Literal(&'a str), +#[derive(Debug, PartialEq, Clone)] +pub enum AbstractMatchingExpression<'source> { + Literal(&'source str), Capture { - identifier: &'a str, + identifier: &'source str, identifier_type: CaptureType, }, } #[derive(Debug, PartialEq, Clone)] -pub enum AbstractReplaceExpression<'a> { - Literal(&'a str), - Identifier(&'a str), +pub enum AbstractReplaceExpression<'source> { + Literal(&'source str), + Identifier(&'source str), } #[derive(Debug, PartialEq)] -pub struct MatchExpression<'a> { - pub expressions: Vec>, - pub captures: RefCell>, +pub struct MatchExpression<'source> { + pub expressions: Vec>, } -impl<'a> MatchExpression<'a> { - pub fn new(expressions: Vec>) -> Self { - Self { - expressions, - captures: RefCell::new(HashMap::new()), - } +impl FromStr for MatchExpression<'static> { + type Err = ParseError<'static>; + + fn from_str(s: &str) -> std::result::Result { + let input = Box::leak(s.into()); + Parser::new(Lexer::new(input)).parse_match_exp() + } +} + +impl<'source> MatchExpression<'source> { + pub fn new(expressions: Vec>) -> Self { + Self { expressions } } - pub fn get_capture(&self, name: &str) -> Option<&str> { - self.captures.borrow().get(name).map(|s| *s) + pub fn get_expression(&self, idx: usize) -> Option> { + self.expressions.get(idx).map(|exp| exp.clone()) } } #[derive(Debug, PartialEq)] -pub struct ReplaceExpression<'a> { - pub expressions: Vec>, +pub struct ReplaceExpression<'source> { + pub expressions: Vec>, } #[derive(Debug, PartialEq)] -pub struct MatchAndReplaceExpression<'a> { - pub mex: MatchExpression<'a>, - pub rex: ReplaceExpression<'a>, +pub struct MatchAndReplaceExpression<'source> { + pub mex: MatchExpression<'source>, + pub rex: ReplaceExpression<'source>, } -pub struct Parser<'a> { - lexer: Lexer<'a>, - peeked: Option>, -} +impl FromStr for MatchAndReplaceExpression<'static> { + type Err = ParseError<'static>; -impl<'a> From<&'a str> for Parser<'a> { - fn from(input: &'a str) -> Self { - Self::new(Lexer::new(input)) + fn from_str(s: &str) -> std::result::Result { + let input = Box::leak(s.into()); + Parser::new(Lexer::new(input)).parse() } } -impl<'a> From<&'a String> for Parser<'a> { - fn from(input: &'a String) -> Self { - Self::new(Lexer::new(&input)) - } +pub struct Parser<'source> { + lexer: Lexer<'source>, + peeked: Option>, } -impl<'a> Parser<'a> { - pub fn new(lexer: Lexer<'a>) -> Self { +impl<'source> Parser<'source> { + pub fn new(lexer: Lexer<'source>) -> Self { Self { lexer, peeked: None, } } - fn token(&mut self) -> Token<'a> { + fn token(&mut self) -> Token<'source> { match self.peeked.take() { Some(t) => t, None => self.lexer.next_token(), } } - fn peek_token(&mut self) -> &Token<'a> { + fn peek_token(&mut self) -> &Token<'source> { self.peeked.get_or_insert_with(|| self.lexer.next_token()) } @@ -96,7 +98,7 @@ impl<'a> Parser<'a> { self.token(); } - pub(crate) fn parse_match_exp(&mut self) -> Result<'a, MatchExpression<'a>> { + pub(crate) fn parse_match_exp(&mut self) -> Result<'source, MatchExpression<'source>> { let mut expressions = vec![]; let mut token = self.token(); @@ -133,7 +135,10 @@ impl<'a> Parser<'a> { Ok(MatchExpression::new(expressions)) } - fn parse_capture(&mut self, identifier: &'a str) -> Result<'a, AbstractMatchingExpression<'a>> { + fn parse_capture( + &mut self, + identifier: &'source str, + ) -> Result<'source, AbstractMatchingExpression<'source>> { self.eat_token(); self.expect(TokenKind::Type)?; @@ -146,7 +151,7 @@ impl<'a> Parser<'a> { "dig" => CaptureType::Digit, _ => { return Err(ParseError { - input: self.lexer.input(), + source: self.lexer.input(), kind: ParseErrorKind::UnsupportedToken(t), }) } @@ -156,7 +161,7 @@ impl<'a> Parser<'a> { }) } - fn expect(&mut self, token_kind: TokenKind) -> Result<'a, ()> { + fn expect(&mut self, token_kind: TokenKind) -> Result<'source, ()> { let error_kind = match self.peek_token() { t if t.kind == token_kind => return Ok(()), t => ParseErrorKind::ExpectedToken { @@ -168,12 +173,12 @@ impl<'a> Parser<'a> { }; Err(ParseError { - input: self.lexer.input(), + source: self.lexer.input(), kind: error_kind, }) } - fn expect_not(&mut self, token_kind: TokenKind, current: TokenKind) -> Result<'a, ()> { + fn expect_not(&mut self, token_kind: TokenKind, current: TokenKind) -> Result<'source, ()> { let error_kind = match self.peek_token() { t if t.kind == token_kind => ParseErrorKind::UnexpectedToken { unexpected: token_kind, @@ -184,15 +189,15 @@ impl<'a> Parser<'a> { }; Err(ParseError { - input: self.lexer.input(), + source: self.lexer.input(), kind: error_kind, }) } pub(crate) fn parse_replacement_exp( &mut self, - declared_idents: Vec<&'a str>, - ) -> Result<'a, ReplaceExpression<'a>> { + declared_idents: Vec<&'source str>, + ) -> Result<'source, ReplaceExpression<'source>> { let mut expressions = vec![]; let mut token = self.token(); @@ -208,7 +213,7 @@ impl<'a> Parser<'a> { Ident => { if !declared_idents.contains(&token.text) { return Err(ParseError { - input: self.lexer.input(), + source: self.lexer.input(), kind: ParseErrorKind::UndeclaredIdentifier { ident: &token.text, declared: declared_idents, @@ -233,7 +238,7 @@ impl<'a> Parser<'a> { Ok(ReplaceExpression { expressions }) } - pub fn parse(&mut self) -> Result<'a, MatchAndReplaceExpression<'a>> { + pub fn parse(&mut self) -> Result<'source, MatchAndReplaceExpression<'source>> { let mex = self.parse_match_exp()?; let declared_idents = mex .expressions @@ -338,12 +343,12 @@ mod tests { #[test] fn test_wrong_capture_syntax() { - let input = "(ident:)"; - let mut p = Parser::new(Lexer::new(input)); + let source = "(ident:)"; + let mut p = Parser::new(Lexer::new(source)); assert_eq!( p.parse_match_exp().unwrap_err(), ParseError { - input, + source, kind: ParseErrorKind::ExpectedToken { expected: TokenKind::Type, found: TokenKind::Rparen, diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c143b7c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,39 @@ +use std::path::PathBuf; + +use log::*; +use mrp::MatchAndReplaceStrategy; +use rayon::prelude::*; + +pub struct BulkRenameOptions { + pub no_rename: bool, +} + +pub fn in_bulk<'p: 'r, 'r, R: MatchAndReplaceStrategy<'r> + std::marker::Sync>( + paths: &'p [PathBuf], + rename: &R, + options: &BulkRenameOptions, +) { + paths + .par_iter() + .filter_map(|p| { + let path_string = p.to_str(); + + if path_string.is_none() { + error!("Path is invalid unicode: {:?}", p); + } + + return match path_string { + Some(s) => rename.apply(s).map(|renamed| (s, renamed)), + None => None, + }; + }) + .for_each(|(from, to)| { + if options.no_rename { + println!("{:?} -> {:?}", from, to); + } else { + if let Err(err) = std::fs::rename(from, to.to_string()) { + error!("{:?}: {}", from, err); + } + }; + }) +} diff --git a/src/main.rs b/src/main.rs index a04cf28..fa3ce71 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,7 @@ -use std::{path::PathBuf, process::ExitCode}; +use std::process::ExitCode; use clap::{Args, Parser, Subcommand}; -use mrp::{MatchAndReplaceStrategy, MatchAndReplacer}; +use mrp::{parser::MatchAndReplaceExpression, MatchAndReplaceStrategy, MatchAndReplacer}; #[derive(Parser, Debug)] #[clap(author, version, about, setting = clap::AppSettings::DeriveDisplayOrder)] @@ -10,10 +10,18 @@ struct RenameArgs { #[clap(subcommand)] command: Command, - /// pattern for the paths to rename. + /// Pattern for the paths to rename. #[clap(global = true, long, conflicts_with = "paths")] glob: Option, + /// Prevent diagnostic logging + #[clap(global = true, short, long)] + quiet: bool, + + /// Determine diagnostic log level + #[clap(global = true, short, long = "verbose", parse(from_occurrences))] + verbosity: usize, + /// One or more paths to rename. #[clap(global = true)] paths: Vec, @@ -34,6 +42,14 @@ enum Command { fn main() -> ExitCode { let base_args = RenameArgs::parse(); + stderrlog::new() + .module("rename") + .quiet(base_args.quiet) + .verbosity(base_args.verbosity) + .timestamp(stderrlog::Timestamp::Millisecond) + .init() + .unwrap(); + let paths = if let Some(aw) = &base_args.glob { glob::glob(aw) .expect("invalid glob pattern") @@ -43,19 +59,17 @@ fn main() -> ExitCode { base_args.paths }; + let options = &rename::BulkRenameOptions { + no_rename: base_args.dry_run, + }; + match base_args.command { - Command::REGEX(ref args) => handle_regex_replacement(&args, &paths, base_args.dry_run), - Command::SIMPLE(ref args) => match mrp::parser::Parser::from(&args.expression).parse() { - Ok(ref e) => { - let mut replacer = MatchAndReplacer::new(e); - replacer.set_strip(args.strip); - handle_mrp_replacement(&paths, replacer, base_args.dry_run); - } - Err(e) => { - eprintln!("{e}"); - return ExitCode::FAILURE; - } - }, + Command::REGEX(args) => rename::in_bulk(&paths, &args, options), + Command::SIMPLE(args) => { + let mut replacer = MatchAndReplacer::new(args.expression); + replacer.set_strip(args.strip); + rename::in_bulk(&paths, &replacer, options); + } }; ExitCode::SUCCESS @@ -64,37 +78,12 @@ fn main() -> ExitCode { #[derive(Debug, Args)] struct SimpleArgs { /// A Match & Replace expression in the custom MRP syntax. - expression: String, + expression: MatchAndReplaceExpression<'static>, /// Strip off anything not explicitly matched for while replacting. #[clap(short, long)] strip: bool, } -fn handle_mrp_replacement<'e>(paths: &'e [PathBuf], replacer: MatchAndReplacer<'e>, dry_run: bool) { - paths - .iter() - .filter_map(|p| { - let str = p.to_str(); - - if str.is_none() { - eprintln!("Path is invalid unicode: {:?}", p); - } - - return str; - }) - .map(|p| (p, replacer.apply(p))) - .filter_map(|(from, to)| to.map(|t| (from, t))) - .for_each(|(from, to)| { - if dry_run { - println!("{:?} -> {:?}", from, to); - } else { - if let Err(err) = std::fs::rename(from, to.to_string()) { - eprintln!("{:?}: {}", from, err); - } - } - }); -} - #[derive(Debug, Args, Clone)] struct RegexArgs { /// The regex pattern with which to search. @@ -103,26 +92,8 @@ struct RegexArgs { replacement: String, } -fn handle_regex_replacement(args: &RegexArgs, paths: &[PathBuf], dry_run: bool) { - let transform = |name| { - return ( - name, - args.pattern.replace(name, &args.replacement).to_string(), - ); - }; - - paths - .iter() - .for_each(|path| match path.to_str().map(transform) { - None => eprintln!("Path is invalid unicode: {:?}", path), - Some((from, to)) => { - if dry_run { - println!("Rename {:?} to {:?}", path, to); - } else { - if let Err(err) = std::fs::rename(from, to) { - eprintln!("{}: {}", from, err); - } - } - } - }) +impl<'s> MatchAndReplaceStrategy<'s> for RegexArgs { + fn apply(&self, value: &'s str) -> Option> { + Some(self.pattern.replace(value, self.replacement.clone())) + } }