diff --git a/Cargo.lock b/Cargo.lock index 9f7cd78dd5..324b645471 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1399,6 +1399,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "fancy-regex" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1e1dacd0d2082dfcf1351c4bdd566bbe89a2b263235a2b50058f1e130a47277" +dependencies = [ + "bit-set", + "regex-automata", + "regex-syntax", +] + [[package]] name = "fastnoise-lite" version = "1.1.1" @@ -4452,9 +4463,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.10" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -5500,6 +5511,7 @@ dependencies = [ "convert_case 0.8.0", "core-types", "dyn-any", + "fancy-regex", "glam", "log", "node-macro", diff --git a/Cargo.toml b/Cargo.toml index de565064cb..802a5acce1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -107,6 +107,7 @@ bitflags = { version = "2.4", features = ["serde"] } ctor = "0.2" convert_case = "0.8" titlecase = "3.6" +fancy-regex = "0.18.0" unicode-segmentation = "1.13.2" indoc = "2.0.5" derivative = "2.2" diff --git a/editor/src/messages/portfolio/document/node_graph/document_node_definitions.rs b/editor/src/messages/portfolio/document/node_graph/document_node_definitions.rs index 33d6a90571..beebac4cbb 100644 --- a/editor/src/messages/portfolio/document/node_graph/document_node_definitions.rs +++ b/editor/src/messages/portfolio/document/node_graph/document_node_definitions.rs @@ -1489,6 +1489,113 @@ fn document_node_definitions() -> HashMap) + NodeInput::node(NodeId(2), 0), + ], + nodes: [ + // Node 0: regex_find proto node — returns Vec of [whole_match, ...capture_groups] + DocumentNode { + inputs: vec![ + NodeInput::import(concrete!(String), 0), + NodeInput::import(concrete!(String), 1), + NodeInput::import(concrete!(f64), 2), + NodeInput::import(concrete!(bool), 3), + NodeInput::import(concrete!(bool), 4), + ], + implementation: DocumentNodeImplementation::ProtoNode(text_nodes::regex::regex_find::IDENTIFIER), + ..Default::default() + }, + // Node 1: index_elements at index 0 — extracts the whole match as a String + DocumentNode { + inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)], + implementation: DocumentNodeImplementation::ProtoNode(graphic::index_elements::IDENTIFIER), + ..Default::default() + }, + // Node 2: omit_element at index 0 — returns capture groups as Vec + DocumentNode { + inputs: vec![NodeInput::node(NodeId(0), 0), NodeInput::value(TaggedValue::F64(0.), false)], + implementation: DocumentNodeImplementation::ProtoNode(graphic::omit_element::IDENTIFIER), + ..Default::default() + }, + ] + .into_iter() + .enumerate() + .map(|(id, node)| (NodeId(id as u64), node)) + .collect(), + ..Default::default() + }), + inputs: vec![ + NodeInput::value(TaggedValue::String(String::new()), true), + NodeInput::value(TaggedValue::String(String::new()), false), + NodeInput::value(TaggedValue::F64(0.), false), + NodeInput::value(TaggedValue::Bool(false), false), + NodeInput::value(TaggedValue::Bool(false), false), + ], + ..Default::default() + }, + persistent_node_metadata: DocumentNodePersistentMetadata { + input_metadata: vec![ + ("String", "The string to search within.").into(), + ("Pattern", "The regular expression pattern to search for.").into(), + ( + "Match Index", + "Which non-overlapping occurrence of the pattern to return, starting from 0 for the first match. Negative indices count backwards from the last match.", + ) + .into(), + ("Case Insensitive", "Match letters regardless of case.").into(), + ("Multiline", "Make `^` and `$` match the start and end of each line, not just the whole string.").into(), + ], + output_names: vec!["Match".to_string(), "Captures".to_string()], + network_metadata: Some(NodeNetworkMetadata { + persistent_metadata: NodeNetworkPersistentMetadata { + node_metadata: [ + DocumentNodeMetadata { + persistent_metadata: DocumentNodePersistentMetadata { + node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(0, 0)), + ..Default::default() + }, + ..Default::default() + }, + DocumentNodeMetadata { + persistent_metadata: DocumentNodePersistentMetadata { + node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(8, 0)), + ..Default::default() + }, + ..Default::default() + }, + DocumentNodeMetadata { + persistent_metadata: DocumentNodePersistentMetadata { + node_type_metadata: NodeTypePersistentMetadata::node(IVec2::new(8, 2)), + ..Default::default() + }, + ..Default::default() + }, + ] + .into_iter() + .enumerate() + .map(|(id, node)| (NodeId(id as u64), node)) + .collect(), + ..Default::default() + }, + ..Default::default() + }), + ..Default::default() + }, + }, + description: Cow::Borrowed( + r#"Finds a portion of the string matching a regular expression pattern. With "Match Index" at its default 0, it selects the first non-overlapping occurrence, but others may be selected. Capture groups, if any, are produced as a list in the "Captures" output."#, + ), + properties: None, + }, // Aims for interoperable compatibility with: // https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/#:~:text=levl%27%20%3D%20Levels-,%27curv%27%20%3D%20Curves,-%27expA%27%20%3D%20Exposure // https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/#:~:text=Max%20input%20range-,Curves,-Curves%20settings%20files diff --git a/node-graph/libraries/graphic-types/src/graphic.rs b/node-graph/libraries/graphic-types/src/graphic.rs index 001c0c33a2..878d702fdf 100644 --- a/node-graph/libraries/graphic-types/src/graphic.rs +++ b/node-graph/libraries/graphic-types/src/graphic.rs @@ -397,6 +397,41 @@ impl AtIndex for Table { } } +pub trait OmitIndex { + fn omit_index(&self, index: usize) -> Self; + fn omit_index_from_end(&self, index: usize) -> Self; +} +impl OmitIndex for Vec { + fn omit_index(&self, index: usize) -> Self { + self.iter().enumerate().filter(|(i, _)| *i != index).map(|(_, v)| v.clone()).collect() + } + + fn omit_index_from_end(&self, index: usize) -> Self { + if index == 0 || index > self.len() { + return self.clone(); + } + self.omit_index(self.len() - index) + } +} +impl OmitIndex for Table { + fn omit_index(&self, index: usize) -> Self { + let mut result = Self::default(); + for (i, row) in self.iter().enumerate() { + if i != index { + result.push(row.into_cloned()); + } + } + result + } + + fn omit_index_from_end(&self, index: usize) -> Self { + if index == 0 || index > self.len() { + return self.clone(); + } + self.omit_index(self.len() - index) + } +} + // TODO: Eventually remove this migration document upgrade code pub fn migrate_graphic<'de, D: serde::Deserializer<'de>>(deserializer: D) -> Result, D::Error> { use serde::Deserialize; diff --git a/node-graph/nodes/graphic/src/graphic.rs b/node-graph/nodes/graphic/src/graphic.rs index f939df8ee8..c370b25a5f 100644 --- a/node-graph/nodes/graphic/src/graphic.rs +++ b/node-graph/nodes/graphic/src/graphic.rs @@ -46,6 +46,39 @@ where .unwrap_or_default() } +/// Returns the collection with the element at the specified index removed. +/// If no value exists at that index, the collection is returned unchanged. +#[node_macro::node(category("General"))] +pub fn omit_element( + _: impl Ctx, + /// The collection of data, such as a list or table. + #[implementations( + Vec, + Vec, + Vec, + Vec, + Vec, + Table, + Table, + Table, + Table>, + Table>, + Table, + Table, + )] + collection: T, + /// The index of the item to remove, starting from 0 for the first item. Negative indices count backwards from the end of the collection, starting from -1 for the last item. + index: SignedInteger, +) -> T { + let index = index as i32; + + if index < 0 { + collection.omit_index_from_end(-index as usize) + } else { + collection.omit_index(index as usize) + } +} + #[node_macro::node(category("General"))] async fn map( ctx: impl Ctx + CloneVarArgs + ExtractAll, diff --git a/node-graph/nodes/text/Cargo.toml b/node-graph/nodes/text/Cargo.toml index 2216c488e9..e5558c741d 100644 --- a/node-graph/nodes/text/Cargo.toml +++ b/node-graph/nodes/text/Cargo.toml @@ -26,6 +26,7 @@ log = { workspace = true } serde_json = { workspace = true } convert_case = { workspace = true } titlecase = { workspace = true } +fancy-regex = { workspace = true } unicode-segmentation = { workspace = true } # Optional workspace dependencies diff --git a/node-graph/nodes/text/src/lib.rs b/node-graph/nodes/text/src/lib.rs index ad808ac7c1..50f14851c3 100644 --- a/node-graph/nodes/text/src/lib.rs +++ b/node-graph/nodes/text/src/lib.rs @@ -1,6 +1,7 @@ mod font_cache; pub mod json; mod path_builder; +pub mod regex; mod text_context; mod to_path; diff --git a/node-graph/nodes/text/src/regex.rs b/node-graph/nodes/text/src/regex.rs new file mode 100644 index 0000000000..4fe1373ade --- /dev/null +++ b/node-graph/nodes/text/src/regex.rs @@ -0,0 +1,201 @@ +use core_types::Ctx; +use core_types::registry::types::SignedInteger; + +/// Checks whether the string contains a match for the given regular expression pattern. Optionally restricts the match to only the start and/or end of the string. +#[node_macro::node(category("Text: Regex"))] +fn regex_contains( + _: impl Ctx, + /// The string to search within. + string: String, + /// The regular expression pattern to search for. + pattern: String, + /// Match letters regardless of case. + case_insensitive: bool, + /// Make `^` and `$` match the start and end of each line, not just the whole string. + multiline: bool, + /// Only match if the pattern appears at the start of the string. + at_start: bool, + /// Only match if the pattern appears at the end of the string. + at_end: bool, +) -> bool { + let flags = match (case_insensitive, multiline) { + (false, false) => "", + (true, false) => "(?i)", + (false, true) => "(?m)", + (true, true) => "(?im)", + }; + let anchored_pattern = match (at_start, at_end) { + (true, true) => format!("{flags}\\A(?:{pattern})\\z"), + (true, false) => format!("{flags}\\A(?:{pattern})"), + (false, true) => format!("{flags}(?:{pattern})\\z"), + (false, false) => format!("{flags}{pattern}"), + }; + + let Ok(regex) = fancy_regex::Regex::new(&anchored_pattern) else { + log::error!("Invalid regex pattern: {pattern}"); + return false; + }; + + regex.is_match(&string).unwrap_or(false) +} + +/// Replaces matches of a regular expression pattern in the string. The replacement string can reference captures: `$0` for the whole match and `$1`, `$2`, etc. for capture groups. +#[node_macro::node(category("Text: Regex"))] +fn regex_replace( + _: impl Ctx, + string: String, + /// The regular expression pattern to search for. + pattern: String, + /// The replacement string. Use `$0` for the whole match and `$1`, `$2`, etc. for capture groups. + replacement: String, + /// Replace all matches. When disabled, only the first match is replaced. + #[default(true)] + replace_all: bool, + /// Match letters regardless of case. + case_insensitive: bool, + /// Make `^` and `$` match the start and end of each line, not just the whole string. + multiline: bool, +) -> String { + let flags = match (case_insensitive, multiline) { + (false, false) => "", + (true, false) => "(?i)", + (false, true) => "(?m)", + (true, true) => "(?im)", + }; + let full_pattern = format!("{flags}{pattern}"); + + let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else { + log::warn!("Invalid regex pattern: {pattern}"); + return string; + }; + + if replace_all { + regex.replace_all(&string, replacement.as_str()).into_owned() + } else { + regex.replace(&string, replacement.as_str()).into_owned() + } +} + +/// Finds a regex match in the string and returns its components. The result is a list where the first element is the whole match (`$0`) and subsequent elements are the capture groups (`$1`, `$2`, etc., if any). +/// +/// The match index selects which non-overlapping occurrence to return (0 for the first match). Returns an empty list if no match is found at the given index. +#[node_macro::node(category(""))] +fn regex_find( + _: impl Ctx, + /// The string to search within. + string: String, + /// The regular expression pattern to search for. + pattern: String, + /// Which non-overlapping occurrence of the pattern to return, starting from 0 for the first match. Negative indices count backwards from the last match. + match_index: SignedInteger, + /// Match letters regardless of case. + case_insensitive: bool, + /// Make `^` and `$` match the start and end of each line, not just the whole string. + multiline: bool, +) -> Vec { + if pattern.is_empty() { + return Vec::new(); + } + + let flags = match (case_insensitive, multiline) { + (false, false) => "", + (true, false) => "(?i)", + (false, true) => "(?m)", + (true, true) => "(?im)", + }; + let full_pattern = format!("{flags}{pattern}"); + + let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else { + log::error!("Invalid regex pattern: {pattern}"); + return Vec::new(); + }; + + // Collect all matches since we need to support negative indexing + let matches: Vec<_> = regex.captures_iter(&string).filter_map(|c| c.ok()).collect(); + + let match_index = match_index as i32; + let resolved_index = if match_index < 0 { + let from_end = (-match_index) as usize; + if from_end > matches.len() { + return Vec::new(); + } + matches.len() - from_end + } else { + match_index as usize + }; + + let Some(captures) = matches.get(resolved_index) else { + return Vec::new(); + }; + + // Index 0 is the whole match, 1+ are capture groups + (0..captures.len()).map(|i| captures.get(i).map_or(String::new(), |m| m.as_str().to_string())).collect() +} + +/// Finds all non-overlapping matches of a regular expression pattern in the string, returning a list of the matched substrings. +#[node_macro::node(category("Text: Regex"))] +fn regex_find_all( + _: impl Ctx, + /// The string to search within. + string: String, + /// The regular expression pattern to search for. + pattern: String, + /// Match letters regardless of case. + case_insensitive: bool, + /// Make `^` and `$` match the start and end of each line, not just the whole string. + multiline: bool, +) -> Vec { + if pattern.is_empty() { + return Vec::new(); + } + + let flags = match (case_insensitive, multiline) { + (false, false) => "", + (true, false) => "(?i)", + (false, true) => "(?m)", + (true, true) => "(?im)", + }; + let full_pattern = format!("{flags}{pattern}"); + + let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else { + log::error!("Invalid regex pattern: {pattern}"); + return Vec::new(); + }; + + regex.find_iter(&string).filter_map(|m| m.ok()).map(|m| m.as_str().to_string()).collect() +} + +/// Splits a string into a list of substrings pulled from between separator characters as matched by a regular expression. +/// +/// For example, splitting "Three, two, one... LIFTOFF" with pattern `\W+` (non-word characters) produces `["Three", "two", "one", "LIFTOFF"]`. +#[node_macro::node(category("Text: Regex"))] +fn regex_split( + _: impl Ctx, + /// The string to split into substrings. + string: String, + /// The regular expression pattern to split on. Matches are consumed and not included in the output. + pattern: String, + /// Match letters regardless of case. + case_insensitive: bool, + /// Make `^` and `$` match the start and end of each line, not just the whole string. + multiline: bool, +) -> Vec { + if pattern.is_empty() { + return vec![string]; + } + + let flags = match (case_insensitive, multiline) { + (false, false) => "", + (true, false) => "(?i)", + (false, true) => "(?m)", + (true, true) => "(?im)", + }; + let full_pattern = format!("{flags}{pattern}"); + + let Ok(regex) = fancy_regex::Regex::new(&full_pattern) else { + log::error!("Invalid regex pattern: {pattern}"); + return vec![string]; + }; + + regex.split(&string).filter_map(|s| s.ok()).map(|s| s.to_string()).collect() +} diff --git a/tools/node-docs/src/utility.rs b/tools/node-docs/src/utility.rs index 18d5755cc1..27fd1caaa6 100644 --- a/tools/node-docs/src/utility.rs +++ b/tools/node-docs/src/utility.rs @@ -34,6 +34,7 @@ pub fn category_description(category: &str) -> &str { "Raster: Pattern" => "Nodes in this category generate procedural raster patterns, fractals, textures, and noise.", "Raster" => "Nodes in this category deal with fundamental raster image operations.", "Text" => "Nodes in this category support the manipulation, formatting, and rendering of text strings.", + "Text: Regex" => "Nodes in this category perform string operations involving regular expressions, such as pattern matching and replacement.", "Text: JSON" => "Nodes in this category perform string operations involving JSON data, such as parsing and stringifying.", "Value" => "Nodes in this category supply data values of common types such as numbers, colors, booleans, and strings.", "Vector: Measure" => "Nodes in this category perform measurements and analysis on vector graphics, such as length/area calculations, path traversal, and hit testing.",