-
Notifications
You must be signed in to change notification settings - Fork 376
Add support for captured groups in Find & Replace #222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
37a2faf
86e3c03
1f1da92
c39628b
104a762
bf471dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1048,7 +1048,8 @@ impl TextBuffer { | |
if let (Some(search), Some(..)) = (&mut self.search, &self.selection) { | ||
let search = search.get_mut(); | ||
if search.selection_generation == self.selection_generation { | ||
self.write(replacement.as_bytes(), true); | ||
let processed_replacement = self.get_regex_replacement(replacement); | ||
self.write(processed_replacement.as_bytes(), true); | ||
} | ||
} | ||
|
||
|
@@ -1062,7 +1063,6 @@ impl TextBuffer { | |
options: SearchOptions, | ||
replacement: &str, | ||
) -> apperr::Result<()> { | ||
let replacement = replacement.as_bytes(); | ||
let mut search = self.find_construct_search(pattern, options)?; | ||
let mut offset = 0; | ||
|
||
|
@@ -1071,7 +1071,9 @@ impl TextBuffer { | |
if !self.has_selection() { | ||
break; | ||
} | ||
self.write(replacement, true); | ||
|
||
let processed_replacement = self.get_regex_replacement(replacement); | ||
self.write(processed_replacement.as_bytes(), true); | ||
offset = self.cursor.offset; | ||
} | ||
|
||
|
@@ -2378,6 +2380,61 @@ impl TextBuffer { | |
pub fn read_forward(&self, off: usize) -> &[u8] { | ||
self.buffer.read_forward(off) | ||
} | ||
|
||
/// Processes the replacement string when using regex for capture groups. | ||
fn get_regex_replacement<'a>(&mut self, replacement: &'a str) -> Cow<'a, str> { | ||
let search = if let Some(search) = &mut self.search { | ||
search.get_mut() | ||
} else { | ||
return Cow::Borrowed(replacement); | ||
}; | ||
|
||
if !search.options.use_regex || !replacement.contains('$') { | ||
return Cow::Borrowed(replacement); | ||
} | ||
|
||
let scratch = scratch_arena(None); | ||
let mut result = String::with_capacity(replacement.len()); | ||
let mut chars = replacement.chars().peekable(); | ||
|
||
while let Some(ch) = chars.next() { | ||
match ch { | ||
'$' => { | ||
let mut digits = ArenaString::new_in(&scratch); | ||
|
||
while let Some(&next_ch) = chars.peek() { | ||
if digits.is_empty() && next_ch == '$' { | ||
// Consume the escaped dollar sign. | ||
chars.next(); | ||
break; | ||
} | ||
|
||
if !next_ch.is_ascii_digit() { | ||
break; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One feedback I can give early is that this is not quite correct, I believe. Try something like "$12345" in VS Code for instance. It'll only recognize "$1" as replacement but then use "2345" as a literal string. I believe this requires us to recognize here how many capture groups we have and then replace those. This may be outside of the scope of what you have time for to implement (probably requires adding new ICU FFIs!), so please feel free to say so and we'll merge it as is for now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
digits.push(next_ch); | ||
chars.next(); | ||
} | ||
|
||
if !digits.is_empty() { | ||
if let Ok(group_num) = digits.parse::<i32>() { | ||
if let Some(range) = search.regex.get_captured_group_range(group_num) { | ||
let mut out = Vec::new(); | ||
self.buffer.extract_raw(range.start, range.end, &mut out, 0); | ||
result.push_str(&String::from_utf8_lossy(&out)); | ||
} | ||
} | ||
} else { | ||
result.push(ch); | ||
} | ||
} | ||
_ => result.push(ch), | ||
} | ||
} | ||
|
||
Cow::Owned(result) | ||
} | ||
} | ||
|
||
pub enum Bom { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -634,6 +634,22 @@ impl Regex { | |
let mut status = icu_ffi::U_ZERO_ERROR; | ||
unsafe { (f.uregex_reset64)(self.0, index as i64, &mut status) }; | ||
} | ||
|
||
/// Gets the text range of a captured group by index. | ||
pub fn get_captured_group_range(&mut self, group: i32) -> Option<Range<usize>> { | ||
let f = assume_loaded(); | ||
|
||
let mut status = icu_ffi::U_ZERO_ERROR; | ||
let start = unsafe { (f.uregex_start64)(self.0, group, &mut status) }; | ||
let end = unsafe { (f.uregex_end64)(self.0, group, &mut status) }; | ||
if status.is_failure() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lhecker can There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but the nice property of ICU is that its functions short-circuit if you pass in an error status. So, if |
||
return None; | ||
} | ||
|
||
let start = start.max(0); | ||
let end = end.max(start); | ||
Some(start as usize..end as usize) | ||
} | ||
} | ||
|
||
impl Iterator for Regex { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can feel free to return an
ArenString
or better yet,Vec<u8, &Arena>
here. You can find various places in this code that demonstrates how to do that. Allocating in the arena is very cheap and copying memory is as well. 🙂This may help you below because then you won't need
String::from_utf8_lossy
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've changed the function to return
ArenaString
, but I couldn't find a way to removeString::from_utf8_lossy
without maybe changing too much other code, sorry. 😅