diff --git a/CHANGELOG.md b/CHANGELOG.md index 1efe33f..ce4257e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### next +- `regex_if!` and `bytes_regex_if!` +- `regex_switch!` and `bytes_regex_switch!` + ### v3.1.0 - 2023-11-09 - bytes_ prefixed macros create instances of `bytes::Regex` - Fix #30 diff --git a/README.md b/README.md index 60cc6ee..56f0d50 100644 --- a/README.md +++ b/README.md @@ -31,10 +31,11 @@ Other macros are specialized for testing a match, replacing with concise closure * `regex_captures!` * `regex_replace!` * `regex_replace_all!` +* `regex_switch!` They support the `B` flag for the `regex::bytes::Regex` variant. -All macros exist with a `bytes_` prefix for building `bytes::Regex`, so you also have `bytes_regex!`, `bytes_regex_is_match!`, `bytes_regex_find!`, `bytes_regex_captures!`, `bytes_regex_replace!`, and `bytes_regex_replace_all!`. +All macros exist with a `bytes_` prefix for building `bytes::Regex`, so you also have `bytes_regex!`, `bytes_regex_is_match!`, `bytes_regex_find!`, `bytes_regex_captures!`, `bytes_regex_replace!`, `bytes_regex_replace_all!`, and `bytes_regex_switch!`. Some structs of the regex crate are reexported to ease dependency managment. The regex crate itself is also reexported, to avoid the need to synchronize the versions/flavor (see [Features](#features_and_reexport) below) @@ -130,7 +131,7 @@ You receive `""` for optional groups with no value. The [regex_replace!] and [regex_replace_all!] macros bring once compilation and compilation time checks to the `replace` and `replace_all` functions. -## Replacing with a closure +## Replace with a closure ```rust use lazy_regex::regex_replace_all; @@ -147,7 +148,7 @@ The number of arguments given to the closure is checked at compilation time to m If it doesn't match you get, at compilation time, a clear error message. -## Replacing with another kind of Replacer +## Replace with another kind of Replacer ```rust use lazy_regex::regex_replace_all; @@ -156,6 +157,31 @@ let output = regex_replace_all!("U", text, "O"); assert_eq!(&output, "OwO"); ``` +# Switch over regexes + +Execute the expression bound to the first matching regex, with named captured groups declared as varibles: + +```rust +use lazy_regex::regex_switch; +pub enum ScrollCommand { + Top, + Bottom, + Lines(i32), + Pages(i32), +} +impl std::str::FromStr for ScrollCommand { + type Err = (); + fn from_str(s: &str) -> Result { + regex_switch!(s, + "^scroll-to-top$" => Self::Top, + "^scroll-to-bottom$" => Self::Bottom, + r#"^scroll-lines?\((?[+-]?\d{1,4})\)$"# => Self::Lines(n.parse().unwrap()), + r#"^scroll-pages?\((?[+-]?\d{1,4})\)$"# => Self::Pages(n.parse().unwrap()), + ).ok_or(()) + } +} +``` + # Shared lazy static When a regular expression is used in several functions, you sometimes don't want diff --git a/src/lib.rs b/src/lib.rs index 578528b..ccb9481 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,10 +16,11 @@ But most often, you won't even use the `regex!` macro but the other macros which * [regex_captures!] * [regex_replace!] * [regex_replace_all!] +* [regex_switch!] They support the `B` flag for the `regex::bytes::Regex` variant. -All macros exist with a `bytes_` prefix for building `bytes::Regex`, so you also have [bytes_regex!], [bytes_regex_is_match!], [bytes_regex_find!], [bytes_regex_captures!], [bytes_regex_replace!], and [bytes_regex_replace_all!]. +All macros exist with a `bytes_` prefix for building `bytes::Regex`, so you also have [bytes_regex!], [bytes_regex_is_match!], [bytes_regex_find!], [bytes_regex_captures!], [bytes_regex_replace!], [bytes_regex_replace_all!], and [bytes_regex_switch!]. Some structs of the regex crate are reexported to ease dependency managment. @@ -129,7 +130,7 @@ doc: [regex_captures!] The [regex_replace!] and [regex_replace_all!] macros bring once compilation and compilation time checks to the `replace` and `replace_all` functions. -## Replacing with a closure +## Replace with a closure ```rust use lazy_regex::regex_replace_all; @@ -146,7 +147,7 @@ The number of arguments given to the closure is checked at compilation time to m If it doesn't match you get, at compilation time, a clear error message. -## Replacing with another kind of Replacer +## Replace with another kind of Replacer ```rust use lazy_regex::regex_replace_all; @@ -155,6 +156,32 @@ let output = regex_replace_all!("U", text, "O"); assert_eq!(&output, "OwO"); ``` +# Switch over regexes + +Execute the expression bound to the first matching regex, with named captured groups declared as varibles: + +```rust +use lazy_regex::regex_switch; +pub enum ScrollCommand { + Top, + Bottom, + Lines(i32), + Pages(i32), +} +impl std::str::FromStr for ScrollCommand { + type Err = (); + fn from_str(s: &str) -> Result { + regex_switch!(s, + "^scroll-to-top$" => Self::Top, + "^scroll-to-bottom$" => Self::Bottom, + r#"^scroll-lines?\((?[+-]?\d{1,4})\)$"# => Self::Lines(n.parse().unwrap()), + r#"^scroll-pages?\((?[+-]?\d{1,4})\)$"# => Self::Pages(n.parse().unwrap()), + ).ok_or(()) + } +} +``` + +doc: [regex_switch!] # Shared lazy static @@ -187,16 +214,20 @@ pub use { regex, regex_captures, regex_find, + regex_if, regex_is_match, regex_replace, regex_replace_all, + regex_switch, bytes_lazy_regex, bytes_regex, bytes_regex_captures, bytes_regex_find, + bytes_regex_if, bytes_regex_is_match, bytes_regex_replace, bytes_regex_replace_all, + bytes_regex_switch, }, once_cell::sync::Lazy, }; diff --git a/src/proc_macros/args.rs b/src/proc_macros/args.rs index f2f15be..c51ddc6 100644 --- a/src/proc_macros/args.rs +++ b/src/proc_macros/args.rs @@ -1,6 +1,13 @@ use syn::{ - parse::{Parse, ParseStream, Result}, - Expr, ExprClosure, LitStr, Token, + parse::{ + Parse, + ParseStream, + Result, + }, + Expr, + ExprClosure, + LitStr, + Token, }; /// Wrapping of the two arguments given to one of the @@ -56,3 +63,68 @@ impl Parse for ReplaceArgs { } } +/// Wrapping of the arguments given to a regex_if macro +pub(crate) struct RexIfArgs { + pub regex_str: LitStr, + pub value: Expr, // this expression is (or produces) the text to search or check + pub then: Expr, +} + +impl Parse for RexIfArgs { + fn parse(input: ParseStream<'_>) -> Result { + let regex_str = input.parse::()?; + input.parse::()?; + let value = input.parse::()?; + input.parse::()?; + let then = input.parse::()?; + let _ = input.parse::(); // allow a trailing comma + Ok(Self { + regex_str, + value, + then, + }) + } +} + +/// Wrapping of the arguments given to a regex_switch macro +pub(crate) struct RexSwitchArgs { + pub value: Expr, // this expression is (or produces) the text to search or check + pub arms: Vec, +} +pub(crate) struct RexSwitchArmArgs { + pub regex_str: LitStr, + pub then: Expr, +} + +impl Parse for RexSwitchArgs { + fn parse(input: ParseStream<'_>) -> Result { + let value = input.parse::()?; + input.parse::()?; + let mut arms = Vec::new(); + loop { + let lookahead = input.lookahead1(); + if lookahead.peek(LitStr) { + let arm = input.parse::()?; + arms.push(arm); + } else { + break; + } + } + Ok(Self { + value, + arms, + }) + } +} +impl Parse for RexSwitchArmArgs { + fn parse(input: ParseStream<'_>) -> Result { + let regex_str = input.parse::()?; + input.parse::]>()?; + let then = input.parse::()?; + let _ = input.parse::(); // allow a trailing comma + Ok(Self { + regex_str, + then, + }) + } +} diff --git a/src/proc_macros/mod.rs b/src/proc_macros/mod.rs index ccaabc4..3d01c68 100644 --- a/src/proc_macros/mod.rs +++ b/src/proc_macros/mod.rs @@ -436,3 +436,219 @@ pub fn regex_replace_all(input: TokenStream) -> TokenStream { pub fn bytes_regex_replace_all(input: TokenStream) -> TokenStream { bytes_replacen(input, 0) } + +/// Return an Option, with T being the type returned by the block or expression +/// given as third argument. +/// +/// If the regex matches, executes the expression and return it as Some. +/// Return None if the regex doesn't match. +/// +/// ``` +/// let grey = regex_if!(r#"^gr(a|e)y\((?\d{1,2})\)$"#, "grey(22)", { +/// level.parse().unwrap() +/// }); +/// assert_eq!(grey, Some(22)); +/// ``` +#[proc_macro] +pub fn regex_if(input: TokenStream) -> TokenStream { + let RexIfArgs { + regex_str, + value, + then, + } = parse_macro_input!(input as RexIfArgs); + let regex_code = match RegexCode::from_lit_str(regex_str, false) { + Ok(r) => r, + Err(e) => { + return e.to_compile_error().into(); + } + }; + let statick = regex_code.statick(); + let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| { + let var_name = syn::Ident::new(name, proc_macro2::Span::call_site()); + quote! { + let #var_name: &str = caps.get(#idx).map_or("", |c| c.as_str()); + } + }); + quote! {{ + #statick; + match RE.captures(#value) { + Some(caps) => { + #(#assigns);* + Some(#then) + } + None => None, + } + }}.into() +} + +#[proc_macro] +pub fn bytes_regex_if(input: TokenStream) -> TokenStream { + let RexIfArgs { + regex_str, + value, + then, + } = parse_macro_input!(input as RexIfArgs); + let regex_code = match RegexCode::from_lit_str(regex_str, true) { + Ok(r) => r, + Err(e) => { + return e.to_compile_error().into(); + } + }; + let statick = regex_code.statick(); + let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| { + let var_name = syn::Ident::new(name, proc_macro2::Span::call_site()); + quote! { + let #var_name: &[u8] = caps.get(#idx).map_or(&b""[..], |c| c.as_bytes()); + } + }); + quote! {{ + #statick; + match RE.captures(#value) { + Some(caps) => { + #(#assigns);* + Some(#then) + } + None => None, + } + }}.into() +} + +/// Define a set of lazy static statically compiled regexes, with a block +/// or expression for each one. The first matching expression is computed +/// with the named capture groups declaring `&str` variables available for this +/// computation. +/// If no regex matches, return `None`. +/// +/// Example: +/// ``` +/// #[derive(Debug, PartialEq)] +/// enum Color { +/// Grey(u8), +/// Pink, +/// Rgb(u8, u8, u8), +/// } +/// +/// let input = "rgb(1, 2, 3)"; +/// let color = regex_switch!(input, +/// r#"^gr(a|e)y\((?\d{1,2})\)$"#i => { +/// Color::Grey(level.parse()?) +/// } +/// "^pink"i => Color::Pink, +/// r#"^rgb\((?\d+),\s*(?\d+),\s*(?\d+),?\)$"#i => Color::Rgb ( +/// r.parse()?, +/// g.parse()?, +/// b.parse()?, +/// ), +/// ); +/// assert_eq!(color, Some(Color::Rgb(1, 2, 3))); +/// +/// ``` +#[proc_macro] +pub fn regex_switch(input: TokenStream) -> TokenStream { + let RexSwitchArgs { + value, + arms, + } = parse_macro_input!(input as RexSwitchArgs); + let mut q_arms = Vec::new(); + for RexSwitchArmArgs { regex_str, then } in arms.into_iter() { + let regex_code = match RegexCode::from_lit_str(regex_str, false) { + Ok(r) => r, + Err(e) => { + return e.to_compile_error().into(); + } + }; + let statick = regex_code.statick(); + let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| { + let var_name = syn::Ident::new(name, proc_macro2::Span::call_site()); + quote! { + let #var_name: &str = caps.get(#idx).map_or("", |c| c.as_str()); + } + }); + q_arms.push( + quote! {{ + #statick; + if let Some(caps) = RE.captures(#value) { + #(#assigns);* + let output = Some(#then); + break 'switch output; + } + }} + ); + } + quote! {{ + 'switch: { + #(#q_arms)* + None + } + }}.into() +} + +/// Define a set of lazy static statically compiled regexes, with a block +/// or expression for each one. The first matching expression is computed +/// with the named capture groups declaring `&str` variables available for this +/// computation. +/// If no regex matches, return `None`. +/// +/// Example: +/// ``` +/// #[derive(Debug, PartialEq)] +/// enum Color { +/// Grey(u8), +/// Pink, +/// Rgb(u8, u8, u8), +/// } +/// +/// let input = "rgb(1, 2, 3)"; +/// let color = regex_switch!(input, +/// r#"^gr(a|e)y\((?\d{1,2})\)$"#i => { +/// Color::Grey(level.parse()?) +/// } +/// "^pink"i => Color::Pink, +/// r#"^rgb\((?\d+),\s*(?\d+),\s*(?\d+),?\)$"#i => Color::Rgb ( +/// r.parse()?, +/// g.parse()?, +/// b.parse()?, +/// ), +/// ); +/// assert_eq!(color, Some(Color::Rgb(1, 2, 3))); +/// +/// ``` +#[proc_macro] +pub fn bytes_regex_switch(input: TokenStream) -> TokenStream { + let RexSwitchArgs { + value, + arms, + } = parse_macro_input!(input as RexSwitchArgs); + let mut q_arms = Vec::new(); + for RexSwitchArmArgs { regex_str, then } in arms.into_iter() { + let regex_code = match RegexCode::from_lit_str(regex_str, true) { + Ok(r) => r, + Err(e) => { + return e.to_compile_error().into(); + } + }; + let statick = regex_code.statick(); + let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| { + let var_name = syn::Ident::new(name, proc_macro2::Span::call_site()); + quote! { + let #var_name: &[u8] = caps.get(#idx).map_or(&b""[..], |c| c.as_bytes()); + } + }); + q_arms.push( + quote! {{ + #statick; + if let Some(caps) = RE.captures(#value) { + #(#assigns);* + let output = Some(#then); + break 'switch output; + } + }} + ); + } + quote! {{ + 'switch: { + #(#q_arms)* + None + } + }}.into() +} diff --git a/src/proc_macros/regex_code.rs b/src/proc_macros/regex_code.rs index 917cdff..e06b02a 100644 --- a/src/proc_macros/regex_code.rs +++ b/src/proc_macros/regex_code.rs @@ -103,4 +103,18 @@ impl RegexCode { RegexInstance::Bytes(regex) => regex.captures_len(), } } + pub fn named_groups(&self) -> Vec<(usize, &str)> { + match &self.regex { + RegexInstance::Regex(regex) => regex + .capture_names() + .enumerate() + .filter_map(|(i, n)| Some((i, n?))) + .collect(), + RegexInstance::Bytes(regex) => regex + .capture_names() + .enumerate() + .filter_map(|(i, n)| Some((i, n?))) + .collect(), + } + } } diff --git a/tests/regex_if.rs b/tests/regex_if.rs new file mode 100644 index 0000000..87fde3a --- /dev/null +++ b/tests/regex_if.rs @@ -0,0 +1,49 @@ +use { + lazy_regex::{ + bytes_regex_if, + regex_if, + }, + std::num::ParseIntError, +}; + +#[test] +fn test_regex_if() { + fn extract_grey_level(s: &str) -> Option { + regex_if!( + r#"^gr(a|e)y\((?\d{1,2})\)$"#, + s, + level.parse().unwrap(), + ) + } + assert_eq!(extract_grey_level("gray(15)"), Some(15)); + assert_eq!(extract_grey_level("grey(22)"), Some(22)); + assert_eq!(extract_grey_level("grey(268)"), None); + assert_eq!(extract_grey_level("red"), None); +} + +#[test] +fn test_regex_if_with_error_handling() { + fn extract_grey_level(s: &str) -> Result, ParseIntError> { + let v = regex_if!(r#"^gr(a|e)y\((?\d{1,3})\)$"#, s, level.parse()?); + Ok(v) + } + assert_eq!(extract_grey_level("gray(15)"), Ok(Some(15))); + assert!(extract_grey_level("grey(268)").is_err()); + assert_eq!(extract_grey_level("red"), Ok(None)); +} + +#[test] +fn test_bytes_regex_if() { + fn extract_grey_level(s: &[u8]) -> Option { + bytes_regex_if!( + r#"^gr(a|e)y\((?\d{1,2})\)$"#, + s, + std::str::from_utf8(level).unwrap().parse().unwrap() + ) + } + assert_eq!(extract_grey_level(b"gray(15)"), Some(15)); + assert_eq!(extract_grey_level(b"grey(22)"), Some(22)); + assert_eq!(extract_grey_level(b"grey(268)"), None); + assert_eq!(extract_grey_level(b"red"), None); +} + diff --git a/tests/regex_switch.rs b/tests/regex_switch.rs new file mode 100644 index 0000000..b2c8881 --- /dev/null +++ b/tests/regex_switch.rs @@ -0,0 +1,84 @@ +use { + lazy_regex::*, + std::num::ParseIntError, +}; + +#[test] +fn test_regex_switch() { + #[derive(Debug, PartialEq, Eq)] + enum Color { + Grey(u8), + Pink, + Rgb(u8, u8, u8), + } + fn read(s: &str) -> Option { + regex_switch!(s, + r#"^gr(a|e)y\((?\d{1,2})\)$"#i => { + Color::Grey(level.parse().unwrap()) + } + "^pink"i => Color::Pink, + r#"^rgb\((?\d+),\s*(?\d+),\s*(?\d+),?\)$"#i => Color::Rgb ( + r.parse().unwrap(), + g.parse().unwrap(), + b.parse().unwrap(), + ), + ) + } + assert_eq!(read("gray(15)"), Some(Color::Grey(15))); + assert_eq!(read("pInk"), Some(Color::Pink)); + assert_eq!(read("pinkie"), Some(Color::Pink)); + assert_eq!(read("red"), None); + assert_eq!(read("rgb(1,2,3)"), Some(Color::Rgb(1, 2, 3))); +} + +#[test] +fn test_regex_switch_with_error_handling() -> Result<(), ParseIntError> { + #[derive(Debug, PartialEq)] + enum Color { + Grey(u8), + Pink, + Rgb(u8, u8, u8), + } + let input = "RGB(1, 2, 3)"; + let color = regex_switch!(input, + r#"^gr(a|e)y\((?\d{1,2})\)$"#i => { + Color::Grey(level.parse()?) + } + "^pink"i => Color::Pink, + r#"^rgb\((?\d+),\s*(?\d+),\s*(?\d+),?\)$"#i => Color::Rgb ( + r.parse()?, + g.parse()?, + b.parse()?, + ), + ); + assert_eq!(color, Some(Color::Rgb(1, 2, 3))); + Ok(()) +} + +#[test] +fn test_bytes_regex_switch() { + #[derive(Debug, PartialEq, Eq)] + enum Color { + Grey(u8), + Pink, + Rgb(u8, u8, u8), + } + fn read(s: &[u8]) -> Option { + bytes_regex_switch!(s, + r#"^gr(a|e)y\((?\d{1,2})\)$"#i => { + Color::Grey(std::str::from_utf8(level).unwrap().parse().unwrap()) + } + "^pink"i => Color::Pink, + r#"^rgb\((?\d+),\s*(?\d+),\s*(?\d+),?\)$"#i => Color::Rgb ( + std::str::from_utf8(r).unwrap().parse().unwrap(), + std::str::from_utf8(g).unwrap().parse().unwrap(), + std::str::from_utf8(b).unwrap().parse().unwrap(), + ), + ) + } + assert_eq!(read(b"gray(15)"), Some(Color::Grey(15))); + assert_eq!(read(b"pInk"), Some(Color::Pink)); + assert_eq!(read(b"pinkie"), Some(Color::Pink)); + assert_eq!(read(b"red"), None); + assert_eq!(read(b"rgb(1,2,3)"), Some(Color::Rgb(1, 2, 3))); +}