-
Notifications
You must be signed in to change notification settings - Fork 378
Add support for captured groups in Find & Replace #222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
37a2faf
86e3c03
1f1da92
c39628b
104a762
bf471dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -677,6 +677,31 @@ impl Regex { | |
let mut status = icu_ffi::U_ZERO_ERROR; | ||
unsafe { (f.uregex_reset64)(self.0, offset as i64, &mut status) }; | ||
} | ||
|
||
/// Gets captured group count. | ||
pub fn group_count(&mut self) -> i32 { | ||
let f = assume_loaded(); | ||
|
||
let mut status = icu_ffi::U_ZERO_ERROR; | ||
let count = unsafe { (f.uregex_groupCount)(self.0, &mut status) }; | ||
if status.is_failure() { 0 } else { count } | ||
} | ||
|
||
/// Gets the text range of a captured group by index. | ||
pub fn group(&mut self, group: i32) -> Option<Range<usize>> { | ||
let f = assume_loaded(); | ||
|
||
let mut status = icu_ffi::U_ZERO_ERROR; | ||
let start = unsafe { (f.uregex_start64)(self.0, group, &mut status) }; | ||
let end = unsafe { (f.uregex_end64)(self.0, group, &mut status) }; | ||
if status.is_failure() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lhecker can There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but the nice property of ICU is that its functions short-circuit if you pass in an error status. So, if |
||
None | ||
} else { | ||
let start = start.max(0); | ||
let end = end.max(start); | ||
Some(start as usize..end as usize) | ||
} | ||
} | ||
} | ||
|
||
impl Iterator for Regex { | ||
|
@@ -691,15 +716,7 @@ impl Iterator for Regex { | |
return None; | ||
} | ||
|
||
let start = unsafe { (f.uregex_start64)(self.0, 0, &mut status) }; | ||
let end = unsafe { (f.uregex_end64)(self.0, 0, &mut status) }; | ||
if status.is_failure() { | ||
return None; | ||
} | ||
|
||
let start = start.max(0); | ||
let end = end.max(start); | ||
Some(start as usize..end as usize) | ||
self.group(0) | ||
} | ||
} | ||
|
||
|
@@ -900,6 +917,7 @@ struct LibraryFunctions { | |
uregex_setUText: icu_ffi::uregex_setUText, | ||
uregex_reset64: icu_ffi::uregex_reset64, | ||
uregex_findNext: icu_ffi::uregex_findNext, | ||
uregex_groupCount: icu_ffi::uregex_groupCount, | ||
uregex_start64: icu_ffi::uregex_start64, | ||
uregex_end64: icu_ffi::uregex_end64, | ||
} | ||
|
@@ -919,7 +937,7 @@ const LIBICUUC_PROC_NAMES: [&CStr; 10] = [ | |
]; | ||
|
||
// Found in libicui18n.so on UNIX, icuin.dll/icu.dll on Windows. | ||
const LIBICUI18N_PROC_NAMES: [&CStr; 10] = [ | ||
const LIBICUI18N_PROC_NAMES: [&CStr; 11] = [ | ||
c"ucol_open", | ||
c"ucol_strcollUTF8", | ||
c"uregex_open", | ||
|
@@ -928,6 +946,7 @@ const LIBICUI18N_PROC_NAMES: [&CStr; 10] = [ | |
c"uregex_setUText", | ||
c"uregex_reset64", | ||
c"uregex_findNext", | ||
c"uregex_groupCount", | ||
c"uregex_start64", | ||
c"uregex_end64", | ||
]; | ||
|
@@ -1277,6 +1296,8 @@ mod icu_ffi { | |
unsafe extern "C" fn(regexp: *mut URegularExpression, index: i64, status: &mut UErrorCode); | ||
pub type uregex_findNext = | ||
unsafe extern "C" fn(regexp: *mut URegularExpression, status: &mut UErrorCode) -> bool; | ||
pub type uregex_groupCount = | ||
unsafe extern "C" fn(regexp: *mut URegularExpression, status: &mut UErrorCode) -> i32; | ||
pub type uregex_start64 = unsafe extern "C" fn( | ||
regexp: *mut URegularExpression, | ||
group_num: i32, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@lhecker we didn't check whether
replacement
contained two characters, only that it contained at least one. what happens if it only contains one?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can leave a comment there to make that logic a bit easier to understand. The tl;dr is that this should then result in
map_or
returningb'\\'
, becauseoff + 2 - 1
will be out of bounds.