Permalink
Browse files

regcomp.c: Skip UTF-8 decoding for invariants

By adding two branches, we can avoid the expensive UTF-8 decode step for
the common case of the input being an ASCII character.
  • Loading branch information...
khwilliamson committed Oct 27, 2017
1 parent e1342f6 commit 70a863550b5f1b4bda7681cfac2a1e59c242b1e2
Showing with 2 additions and 2 deletions.
  1. +2 −2 regcomp.c
View
@@ -13559,7 +13559,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* character we are appending, hence we can delay getting
* its representation until PASS2. */
if (SIZE_ONLY) {
if (UTF) {
if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
const STRLEN unilen = UVCHR_SKIP(ender);
s += unilen;
@@ -13577,7 +13577,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
} else { /* PASS2 */
not_fold_common:
if (UTF) {
if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
U8 * new_s = uvchr_to_utf8((U8*)s, ender);
len += (char *) new_s - s - 1;
s = (char *) new_s;

0 comments on commit 70a8635

Please sign in to comment.