From c6a115c72452887ae40e8b8660517420d9327a85 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Mon, 31 May 2021 15:38:21 -0600
Subject: [PATCH 1/8] regcomp.c: White-space only

My attempt to insulate from the leading tab removal the year-old commits
finally pushed as 77a6d54c0deb1165b37dcf11c21cd334ae2579bb and
403d7eb3e4320188571cf61b9dab62ff10799f49 failed miserably.

I think it is some bug in git.  Seemingly random groups of lines were
indented differently than adjacent ones.

Anyway, I spent a bunch of time sorting it all out, and this is the
result.
---
 regcomp.c | 1024 ++++++++++++++++++++++++++---------------------------
 1 file changed, 512 insertions(+), 512 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 512e6a165b3a..ed8143818275 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19312,77 +19312,77 @@ S_optimize_regclass(pTHX_
      * Certain of the parameters may be updated as a result of the changes
      * herein */
 
-        U8 op = ANYOF; /* The returned node-type, initialized to the unoptimized
-                        one. */
-        UV value;
-        PERL_UINT_FAST8_T i;
-        UV partial_cp_count = 0;
-        UV start[MAX_FOLD_FROMS+1] = { 0 }; /* +1 for the folded-to char */
-        UV   end[MAX_FOLD_FROMS+1] = { 0 };
-        bool single_range = FALSE;
+    U8 op = ANYOF; /* The returned node-type, initialized to the unoptimized
+                      one. */
+    UV value;
+    PERL_UINT_FAST8_T i;
+    UV partial_cp_count = 0;
+    UV start[MAX_FOLD_FROMS+1] = { 0 }; /* +1 for the folded-to char */
+    UV   end[MAX_FOLD_FROMS+1] = { 0 };
+    bool single_range = FALSE;
 
-        PERL_ARGS_ASSERT_OPTIMIZE_REGCLASS;
+    PERL_ARGS_ASSERT_OPTIMIZE_REGCLASS;
 
     if (cp_list) { /* Count the code points in enough ranges that we would see
                       all the ones possible in any fold in this version of
                       Unicode */
 
-            invlist_iterinit(cp_list);
-            for (i = 0; i <= MAX_FOLD_FROMS; i++) {
-                if (! invlist_iternext(cp_list, &start[i], &end[i])) {
-                    break;
-                }
-                partial_cp_count += end[i] - start[i] + 1;
+        invlist_iterinit(cp_list);
+        for (i = 0; i <= MAX_FOLD_FROMS; i++) {
+            if (! invlist_iternext(cp_list, &start[i], &end[i])) {
+                break;
             }
+            partial_cp_count += end[i] - start[i] + 1;
+        }
 
-            if (i == 1) {
-                single_range = TRUE;
-            }
-            invlist_iterfinish(cp_list);
+        if (i == 1) {
+            single_range = TRUE;
         }
+        invlist_iterfinish(cp_list);
+    }
 
     /* If we know at compile time that this matches every possible code point,
      * any run-time dependencies don't matter */
-        if (start[0] == 0 && end[0] == UV_MAX) {
-            if (*invert) {
-                op = OPFAIL;
-                *ret = reganode(pRExC_state, op, 0);
-            }
-            else {
-                op = SANY;
-                *ret = reg_node(pRExC_state, op);
-                MARK_NAUGHTY(1);
-            }
-            return op;
+    if (start[0] == 0 && end[0] == UV_MAX) {
+        if (*invert) {
+            op = OPFAIL;
+            *ret = reganode(pRExC_state, op, 0);
         }
+        else {
+            op = SANY;
+            *ret = reg_node(pRExC_state, op);
+            MARK_NAUGHTY(1);
+        }
+        return op;
+    }
 
     /* Similarly, for /l posix classes, if both a class and its complement
      * match, any run-time dependencies don't matter */
-        if (posixl) {
-            int namedclass;
+    if (posixl) {
+        int namedclass;
         for (namedclass = 0; namedclass < ANYOF_POSIXL_MAX; namedclass += 2) {
-                if (   POSIXL_TEST(posixl, namedclass)      /* class */
-                    && POSIXL_TEST(posixl, namedclass + 1)) /* its complement */
-                {
-                    if (*invert) {
-                        op = OPFAIL;
-                        *ret = reganode(pRExC_state, op, 0);
-                    }
-                    else {
-                        op = SANY;
-                        *ret = reg_node(pRExC_state, op);
-                        MARK_NAUGHTY(1);
-                    }
-                    return op;
+            if (   POSIXL_TEST(posixl, namedclass)      /* class */
+                && POSIXL_TEST(posixl, namedclass + 1)) /* its complement */
+            {
+                if (*invert) {
+                    op = OPFAIL;
+                    *ret = reganode(pRExC_state, op, 0);
                 }
+                else {
+                    op = SANY;
+                    *ret = reg_node(pRExC_state, op);
+                    MARK_NAUGHTY(1);
+                }
+                return op;
             }
+        }
 
         /* For well-behaved locales, some classes are subsets of others, so
          * complementing the subset and including the non-complemented superset
          * should match everything, like [\D[:alnum:]], and
-             * [[:^alpha:][:alnum:]], but some implementations of locales are
-             * buggy, and khw thinks its a bad idea to have optimization change
-             * behavior, even if it avoids an OS bug in a given case */
+         * [[:^alpha:][:alnum:]], but some implementations of locales are
+         * buggy, and khw thinks its a bad idea to have optimization change
+         * behavior, even if it avoids an OS bug in a given case */
 
 #define isSINGLE_BIT_SET(n) isPOWER_OF_2(n)
 
@@ -19391,98 +19391,98 @@ S_optimize_regclass(pTHX_
          * determinable until runtime, but will match whatever the class does
          * outside that range.  (Note that some classes won't match anything
          * outside the range, like [:ascii:]) */
-            if (    isSINGLE_BIT_SET(posixl)
-                && (partial_cp_count == 0 || start[0] > 255))
-            {
-                U8 classnum;
-                SV * class_above_latin1 = NULL;
-                bool already_inverted;
-                bool are_equivalent;
-
-                /* Compute which bit is set, which is the same thing as, e.g.,
-                 * ANYOF_CNTRL.  From
-                 * https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
-                 * */
+        if (   isSINGLE_BIT_SET(posixl)
+            && (partial_cp_count == 0 || start[0] > 255))
+        {
+            U8 classnum;
+            SV * class_above_latin1 = NULL;
+            bool already_inverted;
+            bool are_equivalent;
+
+            /* Compute which bit is set, which is the same thing as, e.g.,
+             * ANYOF_CNTRL.  From
+             * https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
+             * */
             static const int MultiplyDeBruijnBitPosition2[32] = {
-                    0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
-                    31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
-                    };
+                0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+                31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+                };
 
-                namedclass = MultiplyDeBruijnBitPosition2[(posixl
-                                                          * 0x077CB531U) >> 27];
-                classnum = namedclass_to_classnum(namedclass);
+            namedclass = MultiplyDeBruijnBitPosition2[(posixl
+                                                      * 0x077CB531U) >> 27];
+            classnum = namedclass_to_classnum(namedclass);
 
-                /* The named classes are such that the inverted number is one
-                 * larger than the non-inverted one */
+            /* The named classes are such that the inverted number is one
+             * larger than the non-inverted one */
             already_inverted = namedclass - classnum_to_namedclass(classnum);
 
             /* Create an inversion list of the official property, inverted if
              * the constructed node list is inverted, and restricted to only
              * the above latin1 code points, which are the only ones known at
              * compile time */
-                _invlist_intersection_maybe_complement_2nd(
-                                                    PL_AboveLatin1,
-                                                    PL_XPosix_ptrs[classnum],
-                                                    already_inverted,
-                                                    &class_above_latin1);
+            _invlist_intersection_maybe_complement_2nd(
+                                                PL_AboveLatin1,
+                                                PL_XPosix_ptrs[classnum],
+                                                already_inverted,
+                                                &class_above_latin1);
             are_equivalent = _invlistEQ(class_above_latin1, cp_list, FALSE);
-                SvREFCNT_dec_NN(class_above_latin1);
+            SvREFCNT_dec_NN(class_above_latin1);
 
-                if (are_equivalent) {
+            if (are_equivalent) {
 
-                    /* Resolve the run-time inversion flag with this possibly
-                     * inverted class */
-                    *invert = *invert ^ already_inverted;
+                /* Resolve the run-time inversion flag with this possibly
+                 * inverted class */
+                *invert = *invert ^ already_inverted;
 
-                    op = POSIXL + *invert * (NPOSIXL - POSIXL);
-                    *ret = reg_node(pRExC_state, op);
-                    FLAGS(REGNODE_p(*ret)) = classnum;
-                    return op;
-                }
+                op = POSIXL + *invert * (NPOSIXL - POSIXL);
+                *ret = reg_node(pRExC_state, op);
+                FLAGS(REGNODE_p(*ret)) = classnum;
+                return op;
             }
         }
+    }
 
     /* khw can't think of any other possible transformation involving these. */
-        if (has_runtime_dependency & HAS_USER_DEFINED_PROPERTY) {
-            return op;
-        }
+    if (has_runtime_dependency & HAS_USER_DEFINED_PROPERTY) {
+        return op;
+    }
 
-        if (! has_runtime_dependency) {
+    if (! has_runtime_dependency) {
 
         /* If the list is empty, nothing matches.  This happens, for example,
          * when a Unicode property that doesn't match anything is the only
          * element in the character class (perluniprops.pod notes such
          * properties). */
-            if (partial_cp_count == 0) {
-                if (*invert) {
-                    op = SANY;
-                    *ret = reg_node(pRExC_state, op);
-                }
-                else {
-                    op = OPFAIL;
-                    *ret = reganode(pRExC_state, op, 0);
-                }
-
-                return op;
-            }
-
-            /* If matches everything but \n */
-            if (   start[0] == 0 && end[0] == '\n' - 1
-                && start[1] == '\n' + 1 && end[1] == UV_MAX)
-            {
-                assert (! *invert);
-                op = REG_ANY;
+        if (partial_cp_count == 0) {
+            if (*invert) {
+                op = SANY;
                 *ret = reg_node(pRExC_state, op);
-                MARK_NAUGHTY(1);
-                return op;
             }
+            else {
+                op = OPFAIL;
+                *ret = reganode(pRExC_state, op, 0);
+            }
+
+            return op;
+        }
+
+        /* If matches everything but \n */
+        if (   start[0] == 0 && end[0] == '\n' - 1
+            && start[1] == '\n' + 1 && end[1] == UV_MAX)
+        {
+            assert (! *invert);
+            op = REG_ANY;
+            *ret = reg_node(pRExC_state, op);
+            MARK_NAUGHTY(1);
+            return op;
         }
+    }
 
-        /* Next see if can optimize classes that contain just a few code points
+    /* Next see if can optimize classes that contain just a few code points
      * into an EXACTish node.  The reason to do this is to let the optimizer
      * join this node with adjacent EXACTish ones, and ANYOF nodes require
      * runtime conversion to code point from UTF-8.
-         *
+     *
      * An EXACTFish node can be generated even if not under /i, and vice versa.
      * But care must be taken.  An EXACTFish node has to be such that it only
      * matches precisely the code points in the class, but we want to generate
@@ -19499,57 +19499,57 @@ S_optimize_regclass(pTHX_
      * colon participates in no fold whatsoever, and having it EXACT tells the
      * optimizer the target string cannot match unless it has a colon in it.
          */
-        if (   ! posixl
-            && ! *invert
+    if (   ! posixl
+        && ! *invert
 
-        /* Only try if there are no more code points in the class than in
-         * the max possible fold */
-            &&   inRANGE(partial_cp_count, 1, MAX_FOLD_FROMS + 1))
-        {
+            /* Only try if there are no more code points in the class than in
+             * the max possible fold */
+        &&   inRANGE(partial_cp_count, 1, MAX_FOLD_FROMS + 1))
+    {
         /* We can always make a single code point class into an EXACTish node.
          * */
         if (partial_cp_count == 1 && ! upper_latin1_only_utf8_matches) {
-                if (LOC) {
-
-            /* Here is /l:  Use EXACTL, except if there is a fold not known
-             * until runtime so shows as only a single code point here.
-             * For code points above 255, we know which can cause problems
-             * by having a potential fold to the Latin1 range. */
-                    if (  ! FOLD
-                        || (     start[0] > 255
-                            && ! is_PROBLEMATIC_LOCALE_FOLD_cp(start[0])))
-                    {
-                        op = EXACTL;
-                    }
-                    else {
-                        op = EXACTFL;
-                    }
+            if (LOC) {
+
+                /* Here is /l:  Use EXACTL, except if there is a fold not known
+                 * until runtime so shows as only a single code point here.
+                 * For code points above 255, we know which can cause problems
+                 * by having a potential fold to the Latin1 range. */
+                if (  ! FOLD
+                    || (     start[0] > 255
+                        && ! is_PROBLEMATIC_LOCALE_FOLD_cp(start[0])))
+                {
+                    op = EXACTL;
                 }
-                else if (! FOLD) { /* Not /l and not /i */
-                    op = (start[0] < 256) ? EXACT : EXACT_REQ8;
+                else {
+                    op = EXACTFL;
                 }
-                else if (start[0] < 256) { /* /i, not /l, and the code point is
-                                              small */
+            }
+            else if (! FOLD) { /* Not /l and not /i */
+                op = (start[0] < 256) ? EXACT : EXACT_REQ8;
+            }
+            else if (start[0] < 256) { /* /i, not /l, and the code point is
+                                          small */
 
-                    /* Under /i, it gets a little tricky.  A code point that
+                /* Under /i, it gets a little tricky.  A code point that
                  * doesn't participate in a fold should be an EXACT node.  We
                  * know this one isn't the result of a simple fold, or there'd
                  * be more than one code point in the list, but it could be
-                 * part of a multi- character fold.  In that case we better not
+                 * part of a multi-character fold.  In that case we better not
                  * create an EXACT node, as we would wrongly be telling the
                  * optimizer that this code point must be in the target string,
                  * and that is wrong.  This is because if the sequence around
                  * this code point forms a multi-char fold, what needs to be in
                  * the string could be the code point that folds to the
                  * sequence.
-                     *
+                 *
                  * This handles the case of below-255 code points, as we have
                  * an easy look up for those.  The next clause handles the
                  * above-256 one */
-                    op = IS_IN_SOME_FOLD_L1(start[0])
-                         ? EXACTFU
-                         : EXACT;
-                }
+                op = IS_IN_SOME_FOLD_L1(start[0])
+                     ? EXACTFU
+                     : EXACT;
+            }
             else {  /* /i, larger code point.  Since we are under /i, and have
                        just this code point, we know that it can't fold to
                        something else, so PL_InMultiCharFold applies to it */
@@ -19559,10 +19559,10 @@ S_optimize_regclass(pTHX_
                 }
 
                 value = start[0];
-            }
-            else if (  ! (has_runtime_dependency & ~HAS_D_RUNTIME_DEPENDENCY)
-                     && _invlist_contains_cp(PL_in_some_fold, start[0]))
-            {
+        }
+        else if (  ! (has_runtime_dependency & ~HAS_D_RUNTIME_DEPENDENCY)
+                 && _invlist_contains_cp(PL_in_some_fold, start[0]))
+        {
             /* Here, the only runtime dependency, if any, is from /d, and the
              * class matches more than one code point, and the lowest code
              * point participates in some fold.  It might be that the other
@@ -19570,56 +19570,56 @@ S_optimize_regclass(pTHX_
              * representable by an EXACTFish node.  Above, we eliminated
              * classes that contain too many code points to be EXACTFish, with
              * the test for MAX_FOLD_FROMS
-                 *
+             *
              * First, special case the ASCII fold pairs, like 'B' and 'b'.  We
              * do this because we have EXACTFAA at our disposal for the ASCII
              * range */
-                if (partial_cp_count == 2 && isASCII(start[0])) {
-
-                    /* The only ASCII characters that participate in folds are
-                     * alphabetics */
-                    assert(isALPHA(start[0]));
-                    if (   end[0] == start[0]   /* First range is a single
-                                                   character, so 2nd exists */
-                        && isALPHA_FOLD_EQ(start[0], start[1]))
-                    {
+            if (partial_cp_count == 2 && isASCII(start[0])) {
+
+                /* The only ASCII characters that participate in folds are
+                 * alphabetics */
+                assert(isALPHA(start[0]));
+                if (   end[0] == start[0]   /* First range is a single
+                                               character, so 2nd exists */
+                    && isALPHA_FOLD_EQ(start[0], start[1]))
+                {
 
-                        /* Here, is part of an ASCII fold pair */
+                    /* Here, is part of an ASCII fold pair */
 
-                        if (   ASCII_FOLD_RESTRICTED
-                            || HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(start[0]))
-                        {
-                    /* If the second clause just above was true, it means
-                     * we can't be under /i, or else the list would have
-                     * included more than this fold pair.  Therefore we
-                     * have to exclude the possibility of whatever else it
-                     * is that folds to these, by using EXACTFAA */
-                            op = EXACTFAA;
-                        }
-                        else if (HAS_NONLATIN1_FOLD_CLOSURE(start[0])) {
+                    if (   ASCII_FOLD_RESTRICTED
+                        || HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(start[0]))
+                    {
+                        /* If the second clause just above was true, it means
+                         * we can't be under /i, or else the list would have
+                         * included more than this fold pair.  Therefore we
+                         * have to exclude the possibility of whatever else it
+                         * is that folds to these, by using EXACTFAA */
+                        op = EXACTFAA;
+                    }
+                    else if (HAS_NONLATIN1_FOLD_CLOSURE(start[0])) {
 
-                            /* Here, there's no simple fold that start[0] is part
+                        /* Here, there's no simple fold that start[0] is part
                          * of, but there is a multi-character one.  If we are
                          * not under /i, we want to exclude that possibility;
                          * if under /i, we want to include it */
-                            op = (FOLD) ? EXACTFU : EXACTFAA;
-                        }
-                        else {
-
-                            /* Here, the only possible fold start[0] particpates in
-                             * is with start[1].  /i or not isn't relevant */
-                            op = EXACTFU;
-                        }
+                        op = (FOLD) ? EXACTFU : EXACTFAA;
+                    }
+                    else {
 
-                        value = toFOLD(start[0]);
+                        /* Here, the only possible fold start[0] particpates in
+                         * is with start[1].  /i or not isn't relevant */
+                        op = EXACTFU;
                     }
+
+                    value = toFOLD(start[0]);
                 }
-                else if (  ! upper_latin1_only_utf8_matches
+            }
+            else if (  ! upper_latin1_only_utf8_matches
                      || (   _invlist_len(upper_latin1_only_utf8_matches) == 2
-                             && PL_fold_latin1[
-                               invlist_highest(upper_latin1_only_utf8_matches)]
-                             == start[0]))
-                {
+                         && PL_fold_latin1[
+                           invlist_highest(upper_latin1_only_utf8_matches)]
+                         == start[0]))
+            {
                 /* Here, the smallest character is non-ascii or there are more
                  * than 2 code points matched by this node.  Also, we either
                  * don't have /d UTF-8 dependent matches, or if we do, they
@@ -19632,9 +19632,9 @@ S_optimize_regclass(pTHX_
                  * above about exceeding the array bounds of PL_fold_latin1[]
                  * because any code point in 'upper_latin1_only_utf8_matches'
                  * is below 256.)
-                     *
-                     * EXACTFAA would apply only to pairs (hence exactly 2 code
-                     * points) in the ASCII range, so we can't use it here to
+                 *
+                 * EXACTFAA would apply only to pairs (hence exactly 2 code
+                 * points) in the ASCII range, so we can't use it here to
                  * artificially restrict the fold domain, so we check if the
                  * class does or does not match some EXACTFish node.  Further,
                  * if we aren't under /i, and and the folded-to character is
@@ -19643,57 +19643,57 @@ S_optimize_regclass(pTHX_
                  * multi-character fold, and we don't here know the context, so
                  * we have to assume it is that multi-char fold, to prevent
                  * potential bugs.
-                     *
+                 *
                  * To do the general case, we first find the fold of the lowest
                  * code point (which may be higher than the lowest one), then
                  * find everything that folds to it.  (The data structure we
                  * have only maps from the folded code points, so we have to do
                  * the earlier step.) */
 
-                    Size_t foldlen;
-                    U8 foldbuf[UTF8_MAXBYTES_CASE];
+                Size_t foldlen;
+                U8 foldbuf[UTF8_MAXBYTES_CASE];
                 UV folded = _to_uni_fold_flags(start[0], foldbuf, &foldlen, 0);
-                    U32 first_fold;
-                    const U32 * remaining_folds;
-                    Size_t folds_to_this_cp_count = _inverse_folds(
+                U32 first_fold;
+                const U32 * remaining_folds;
+                Size_t folds_to_this_cp_count = _inverse_folds(
                                                             folded,
                                                             &first_fold,
                                                             &remaining_folds);
-                    Size_t folds_count = folds_to_this_cp_count + 1;
-                    SV * fold_list = _new_invlist(folds_count);
-                    unsigned int i;
-
-                    /* If there are UTF-8 dependent matches, create a temporary
-                     * list of what this node matches, including them. */
-                    SV * all_cp_list = NULL;
-                    SV ** use_this_list = &cp_list;
-
-                    if (upper_latin1_only_utf8_matches) {
-                        all_cp_list = _new_invlist(0);
-                        use_this_list = &all_cp_list;
-                        _invlist_union(cp_list,
-                                       upper_latin1_only_utf8_matches,
-                                       use_this_list);
-                    }
+                Size_t folds_count = folds_to_this_cp_count + 1;
+                SV * fold_list = _new_invlist(folds_count);
+                unsigned int i;
 
-                    /* Having gotten everything that participates in the fold
-                     * containing the lowest code point, we turn that into an
-                     * inversion list, making sure everything is included. */
-                    fold_list = add_cp_to_invlist(fold_list, start[0]);
-                    fold_list = add_cp_to_invlist(fold_list, folded);
-                    if (folds_to_this_cp_count > 0) {
-                        fold_list = add_cp_to_invlist(fold_list, first_fold);
-                        for (i = 0; i + 1 < folds_to_this_cp_count; i++) {
-                            fold_list = add_cp_to_invlist(fold_list,
-                                                        remaining_folds[i]);
-                        }
+                /* If there are UTF-8 dependent matches, create a temporary
+                 * list of what this node matches, including them. */
+                SV * all_cp_list = NULL;
+                SV ** use_this_list = &cp_list;
+
+                if (upper_latin1_only_utf8_matches) {
+                    all_cp_list = _new_invlist(0);
+                    use_this_list = &all_cp_list;
+                    _invlist_union(cp_list,
+                                   upper_latin1_only_utf8_matches,
+                                   use_this_list);
+                }
+
+                /* Having gotten everything that participates in the fold
+                 * containing the lowest code point, we turn that into an
+                 * inversion list, making sure everything is included. */
+                fold_list = add_cp_to_invlist(fold_list, start[0]);
+                fold_list = add_cp_to_invlist(fold_list, folded);
+                if (folds_to_this_cp_count > 0) {
+                    fold_list = add_cp_to_invlist(fold_list, first_fold);
+                    for (i = 0; i + 1 < folds_to_this_cp_count; i++) {
+                        fold_list = add_cp_to_invlist(fold_list,
+                                                    remaining_folds[i]);
                     }
+                }
 
                 /* If the fold list is identical to what's in this ANYOF node,
                  * the node can be represented by an EXACTFish one instead */
-                    if (_invlistEQ(*use_this_list, fold_list,
-                                   0 /* Don't complement */ )
-                    ) {
+                if (_invlistEQ(*use_this_list, fold_list,
+                               0 /* Don't complement */ )
+                ) {
 
                     /* But, we have to be careful, as mentioned above.  Just
                      * the right sequence of characters could match this if it
@@ -19705,105 +19705,105 @@ S_optimize_regclass(pTHX_
                      * node.  So, for each case below we have to check if we
                      * are folding and if not, if it is not part of a
                      * multi-char fold.  */
-                        if (start[0] > 255) {    /* Highish code point */
-                            if (FOLD || ! _invlist_contains_cp(
-                                            PL_InMultiCharFold, folded))
-                            {
-                                op = (LOC)
-                                     ? EXACTFLU8
-                                     : (ASCII_FOLD_RESTRICTED)
-                                       ? EXACTFAA
-                                       : EXACTFU_REQ8;
-                                value = folded;
-                            }
-                        }   /* Below, the lowest code point < 256 */
-                        else if (    FOLD
-                                 &&  folded == 's'
-                                 &&  DEPENDS_SEMANTICS)
+                    if (start[0] > 255) {    /* Highish code point */
+                        if (FOLD || ! _invlist_contains_cp(
+                                                   PL_InMultiCharFold, folded))
+                        {
+                            op = (LOC)
+                                 ? EXACTFLU8
+                                 : (ASCII_FOLD_RESTRICTED)
+                                   ? EXACTFAA
+                                   : EXACTFU_REQ8;
+                            value = folded;
+                        }
+                    }   /* Below, the lowest code point < 256 */
+                    else if (    FOLD
+                             &&  folded == 's'
+                             &&  DEPENDS_SEMANTICS)
                     {   /* An EXACTF node containing a single character 's',
                            can be an EXACTFU if it doesn't get joined with an
                            adjacent 's' */
-                            op = EXACTFU_S_EDGE;
-                            value = folded;
-                        }
-                        else if (    FOLD
-                                || ! HAS_NONLATIN1_FOLD_CLOSURE(start[0]))
-                        {
-                            if (upper_latin1_only_utf8_matches) {
-                                op = EXACTF;
+                        op = EXACTFU_S_EDGE;
+                        value = folded;
+                    }
+                    else if (     FOLD
+                             || ! HAS_NONLATIN1_FOLD_CLOSURE(start[0]))
+                    {
+                        if (upper_latin1_only_utf8_matches) {
+                            op = EXACTF;
 
-                                /* We can't use the fold, as that only matches
-                                 * under UTF-8 */
-                                value = start[0];
-                            }
-                            else if (     UNLIKELY(start[0] == MICRO_SIGN)
-                                     && ! UTF)
-                    {   /* EXACTFUP is a special node for this character */
-                                op = (ASCII_FOLD_RESTRICTED)
-                                     ? EXACTFAA
-                                     : EXACTFUP;
-                                value = MICRO_SIGN;
-                            }
-                            else if (     ASCII_FOLD_RESTRICTED
-                                     && ! isASCII(start[0]))
+                            /* We can't use the fold, as that only matches
+                             * under UTF-8 */
+                            value = start[0];
+                        }
+                        else if (     UNLIKELY(start[0] == MICRO_SIGN)
+                                 && ! UTF)
+                        {   /* EXACTFUP is a special node for this character */
+                            op = (ASCII_FOLD_RESTRICTED)
+                                 ? EXACTFAA
+                                 : EXACTFUP;
+                            value = MICRO_SIGN;
+                        }
+                        else if (     ASCII_FOLD_RESTRICTED
+                                 && ! isASCII(start[0]))
                         {   /* For ASCII under /iaa, we can use EXACTFU below
                              */
-                                op = EXACTFAA;
-                                value = folded;
-                            }
-                            else {
-                                op = EXACTFU;
-                                value = folded;
-                            }
+                            op = EXACTFAA;
+                            value = folded;
+                        }
+                        else {
+                            op = EXACTFU;
+                            value = folded;
                         }
                     }
-
-                    SvREFCNT_dec_NN(fold_list);
-                    SvREFCNT_dec(all_cp_list);
                 }
+
+                SvREFCNT_dec_NN(fold_list);
+                SvREFCNT_dec(all_cp_list);
             }
+        }
 
-            if (op != ANYOF) {
-                U8 len;
+        if (op != ANYOF) {
+            U8 len;
 
-                /* Here, we have calculated what EXACTish node to use.  Have to
-                 * convert to UTF-8 if not already there */
-                if (value > 255) {
-                    if (! UTF) {
-                        SvREFCNT_dec(cp_list);;
-                        REQUIRE_UTF8(flagp);
-                    }
+            /* Here, we have calculated what EXACTish node to use.  Have to
+             * convert to UTF-8 if not already there */
+            if (value > 255) {
+                if (! UTF) {
+                    SvREFCNT_dec(cp_list);;
+                    REQUIRE_UTF8(flagp);
+                }
 
-                    /* This is a kludge to the special casing issues with this
+                /* This is a kludge to the special casing issues with this
                  * ligature under /aa.  FB05 should fold to FB06, but the call
                  * above to _to_uni_fold_flags() didn't find this, as it didn't
                  * use the /aa restriction in order to not miss other folds
                  * that would be affected.  This is the only instance likely to
                  * ever be a problem in all of Unicode.  So special case it. */
-                    if (   value == LATIN_SMALL_LIGATURE_LONG_S_T
-                        && ASCII_FOLD_RESTRICTED)
-                    {
-                        value = LATIN_SMALL_LIGATURE_ST;
-                    }
+                if (   value == LATIN_SMALL_LIGATURE_LONG_S_T
+                    && ASCII_FOLD_RESTRICTED)
+                {
+                    value = LATIN_SMALL_LIGATURE_ST;
                 }
+            }
 
-                len = (UTF) ? UVCHR_SKIP(value) : 1;
+            len = (UTF) ? UVCHR_SKIP(value) : 1;
 
-                *ret = regnode_guts(pRExC_state, op, len, "exact");
-                FILL_NODE(*ret, op);
-                RExC_emit += 1 + STR_SZ(len);
-                setSTR_LEN(REGNODE_p(*ret), len);
-                if (len == 1) {
-                    *STRINGs(REGNODE_p(*ret)) = (U8) value;
-                }
-                else {
-                    uvchr_to_utf8((U8 *) STRINGs(REGNODE_p(*ret)), value);
-                }
-                return op;
+            *ret = regnode_guts(pRExC_state, op, len, "exact");
+            FILL_NODE(*ret, op);
+            RExC_emit += 1 + STR_SZ(len);
+            setSTR_LEN(REGNODE_p(*ret), len);
+            if (len == 1) {
+                *STRINGs(REGNODE_p(*ret)) = (U8) value;
+            }
+            else {
+                uvchr_to_utf8((U8 *) STRINGs(REGNODE_p(*ret)), value);
             }
+            return op;
         }
+    }
 
-        if (! has_runtime_dependency) {
+    if (! has_runtime_dependency) {
 
         /* See if this can be turned into an ANYOFM node.  Think about the bit
          * patterns in two different bytes.  In some positions, the bits in
@@ -19825,59 +19825,59 @@ S_optimize_regclass(pTHX_
          * can benefit from the speed up.  We can only do this on UTF-8
          * invariant bytes, because they have the same bit patterns under UTF-8
          * as not. */
-            PERL_UINT_FAST8_T inverted = 0;
+        PERL_UINT_FAST8_T inverted = 0;
 #ifdef EBCDIC
-            const PERL_UINT_FAST8_T max_permissible = 0xFF;
+        const PERL_UINT_FAST8_T max_permissible = 0xFF;
 #else
-            const PERL_UINT_FAST8_T max_permissible = 0x7F;
+        const PERL_UINT_FAST8_T max_permissible = 0x7F;
 #endif
         /* If doesn't fit the criteria for ANYOFM, invert and try again.  If
          * that works we will instead later generate an NANYOFM, and invert
          * back when through */
-            if (invlist_highest(cp_list) > max_permissible) {
-                _invlist_invert(cp_list);
-                inverted = 1;
-            }
+        if (invlist_highest(cp_list) > max_permissible) {
+            _invlist_invert(cp_list);
+            inverted = 1;
+        }
 
-            if (invlist_highest(cp_list) <= max_permissible) {
-                UV this_start, this_end;
-                UV lowest_cp = UV_MAX;  /* init'ed to suppress compiler warn */
-                U8 bits_differing = 0;
-                Size_t full_cp_count = 0;
-                bool first_time = TRUE;
+        if (invlist_highest(cp_list) <= max_permissible) {
+            UV this_start, this_end;
+            UV lowest_cp = UV_MAX;  /* init'ed to suppress compiler warn */
+            U8 bits_differing = 0;
+            Size_t full_cp_count = 0;
+            bool first_time = TRUE;
 
             /* Go through the bytes and find the bit positions that differ */
-                invlist_iterinit(cp_list);
-                while (invlist_iternext(cp_list, &this_start, &this_end)) {
-                    unsigned int i = this_start;
+            invlist_iterinit(cp_list);
+            while (invlist_iternext(cp_list, &this_start, &this_end)) {
+                unsigned int i = this_start;
 
-                    if (first_time) {
-                        if (! UVCHR_IS_INVARIANT(i)) {
-                            goto done_anyofm;
-                        }
+                if (first_time) {
+                    if (! UVCHR_IS_INVARIANT(i)) {
+                        goto done_anyofm;
+                    }
 
-                        first_time = FALSE;
-                        lowest_cp = this_start;
+                    first_time = FALSE;
+                    lowest_cp = this_start;
 
                     /* We have set up the code point to compare with.  Don't
                      * compare it with itself */
-                        i++;
-                    }
-
-                    /* Find the bit positions that differ from the lowest code
-                     * point in the node.  Keep track of all such positions by
-                     * OR'ing */
-                    for (; i <= this_end; i++) {
-                        if (! UVCHR_IS_INVARIANT(i)) {
-                            goto done_anyofm;
-                        }
+                    i++;
+                }
 
-                        bits_differing  |= i ^ lowest_cp;
+                /* Find the bit positions that differ from the lowest code
+                 * point in the node.  Keep track of all such positions by
+                 * OR'ing */
+                for (; i <= this_end; i++) {
+                    if (! UVCHR_IS_INVARIANT(i)) {
+                        goto done_anyofm;
                     }
 
-                    full_cp_count += this_end - this_start + 1;
+                    bits_differing  |= i ^ lowest_cp;
                 }
 
+                full_cp_count += this_end - this_start + 1;
+            }
+
             /* At the end of the loop, we count how many bits differ from the
              * bits in lowest code point, call the count 'd'.  If the set we
              * found contains 2**d elements, it is the closure of all code
@@ -19891,32 +19891,32 @@ S_optimize_regclass(pTHX_
              * has a 0.  But that would mean that one of them differs from the
              * lowest code point in that position, which possibility we've
              * already excluded.  */
-                if (  (inverted || full_cp_count > 1)
-                    && full_cp_count == 1U << PL_bitcount[bits_differing])
-                {
-                    U8 ANYOFM_mask;
+            if (  (inverted || full_cp_count > 1)
+                && full_cp_count == 1U << PL_bitcount[bits_differing])
+            {
+                U8 ANYOFM_mask;
 
-                    op = ANYOFM + inverted;;
+                op = ANYOFM + inverted;;
 
-                    /* We need to make the bits that differ be 0's */
-                    ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS */
+                /* We need to make the bits that differ be 0's */
+                ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS */
 
-                    /* The argument is the lowest code point */
-                    *ret = reganode(pRExC_state, op, lowest_cp);
-                    FLAGS(REGNODE_p(*ret)) = ANYOFM_mask;
-                }
-
-              done_anyofm:
-                invlist_iterfinish(cp_list);
+                /* The argument is the lowest code point */
+                *ret = reganode(pRExC_state, op, lowest_cp);
+                FLAGS(REGNODE_p(*ret)) = ANYOFM_mask;
             }
 
-            if (inverted) {
-                _invlist_invert(cp_list);
-            }
+          done_anyofm:
+            invlist_iterfinish(cp_list);
+        }
 
-            if (op != ANYOF) {
-                return op;
-            }
+        if (inverted) {
+            _invlist_invert(cp_list);
+        }
+
+        if (op != ANYOF) {
+            return op;
+        }
 
         /* XXX We could create an ANYOFR_LOW node here if we saved above if all
          * were invariants, it wasn't inverted, and there is a single range.
@@ -19924,121 +19924,121 @@ S_optimize_regclass(pTHX_
          * like /\d/a, but would be twice the size.  Without having actually
          * measured the gain, khw doesn't think the tradeoff is really worth it
          * */
-        }
+    }
 
-        if (! (*anyof_flags & ANYOF_LOCALE_FLAGS)) {
-            PERL_UINT_FAST8_T type;
-            SV * intersection = NULL;
-            SV* d_invlist = NULL;
+    if (! (*anyof_flags & ANYOF_LOCALE_FLAGS)) {
+        PERL_UINT_FAST8_T type;
+        SV * intersection = NULL;
+        SV* d_invlist = NULL;
 
         /* See if this matches any of the POSIX classes.  The POSIXA and POSIXD
          * ones are about the same speed as ANYOF ops, but take less room; the
          * ones that have above-Latin1 code point matches are somewhat faster
-         * than ANYOF.  */
+         * than ANYOF. */
 
-            for (type = POSIXA; type >= POSIXD; type--) {
-                int posix_class;
+        for (type = POSIXA; type >= POSIXD; type--) {
+            int posix_class;
 
-                if (type == POSIXL) {   /* But not /l posix classes */
-                    continue;
-                }
+            if (type == POSIXL) {   /* But not /l posix classes */
+                continue;
+            }
 
-                for (posix_class = 0;
-                     posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
-                     posix_class++)
-                {
-                    SV** our_code_points = &cp_list;
-                    SV** official_code_points;
-                    int try_inverted;
+            for (posix_class = 0;
+                 posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
+                 posix_class++)
+            {
+                SV** our_code_points = &cp_list;
+                SV** official_code_points;
+                int try_inverted;
 
-                    if (type == POSIXA) {
-                        official_code_points = &PL_Posix_ptrs[posix_class];
-                    }
-                    else {
-                        official_code_points = &PL_XPosix_ptrs[posix_class];
-                    }
+                if (type == POSIXA) {
+                    official_code_points = &PL_Posix_ptrs[posix_class];
+                }
+                else {
+                    official_code_points = &PL_XPosix_ptrs[posix_class];
+                }
 
                 /* Skip non-existent classes of this type.  e.g. \v only has an
                  * entry in PL_XPosix_ptrs */
-                    if (! *official_code_points) {
-                        continue;
-                    }
+                if (! *official_code_points) {
+                    continue;
+                }
 
-                    /* Try both the regular class, and its inversion */
-                    for (try_inverted = 0; try_inverted < 2; try_inverted++) {
-                        bool this_inverted = *invert ^ try_inverted;
+                /* Try both the regular class, and its inversion */
+                for (try_inverted = 0; try_inverted < 2; try_inverted++) {
+                    bool this_inverted = *invert ^ try_inverted;
 
-                        if (type != POSIXD) {
+                    if (type != POSIXD) {
 
                         /* This class that isn't /d can't match if we have /d
                          * dependencies */
-                            if (has_runtime_dependency
-                                                    & HAS_D_RUNTIME_DEPENDENCY)
-                            {
-                                continue;
-                            }
+                        if (has_runtime_dependency
+                                                & HAS_D_RUNTIME_DEPENDENCY)
+                        {
+                            continue;
                         }
-                        else /* is /d */ if (! this_inverted) {
+                    }
+                    else /* is /d */ if (! this_inverted) {
 
                         /* /d classes don't match anything non-ASCII below 256
                          * unconditionally (which cp_list contains) */
-                            _invlist_intersection(cp_list, PL_UpperLatin1,
-                                                           &intersection);
-                            if (_invlist_len(intersection) != 0) {
-                                continue;
-                            }
+                        _invlist_intersection(cp_list, PL_UpperLatin1,
+                                                       &intersection);
+                        if (_invlist_len(intersection) != 0) {
+                            continue;
+                        }
 
-                            SvREFCNT_dec(d_invlist);
-                            d_invlist = invlist_clone(cp_list, NULL);
+                        SvREFCNT_dec(d_invlist);
+                        d_invlist = invlist_clone(cp_list, NULL);
 
                         /* But under UTF-8 it turns into using /u rules.  Add
                          * the things it matches under these conditions so that
                          * we check below that these are identical to what the
                          * tested class should match */
-                            if (upper_latin1_only_utf8_matches) {
-                                _invlist_union(
-                                            d_invlist,
-                                            upper_latin1_only_utf8_matches,
-                                            &d_invlist);
-                            }
-                            our_code_points = &d_invlist;
+                        if (upper_latin1_only_utf8_matches) {
+                            _invlist_union(
+                                        d_invlist,
+                                        upper_latin1_only_utf8_matches,
+                                        &d_invlist);
                         }
-                        else {  /* POSIXD, inverted.  If this doesn't have this
-                                   flag set, it isn't /d. */
-                            if (! (*anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
-                            {
-                                continue;
-                            }
-                            our_code_points = &cp_list;
+                        our_code_points = &d_invlist;
+                    }
+                    else {  /* POSIXD, inverted.  If this doesn't have this
+                               flag set, it isn't /d. */
+                        if (! (*anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
+                        {
+                            continue;
                         }
+                        our_code_points = &cp_list;
+                    }
 
                     /* Here, have weeded out some things.  We want to see if
                      * the list of characters this node contains
-                         * ('*our_code_points') precisely matches those of the
-                         * class we are currently checking against
-                         * ('*official_code_points'). */
-                        if (_invlistEQ(*our_code_points,
-                                       *official_code_points,
-                                       try_inverted))
-                        {
-                            /* Here, they precisely match.  Optimize this ANYOF
+                     * ('*our_code_points') precisely matches those of the
+                     * class we are currently checking against
+                     * ('*official_code_points'). */
+                    if (_invlistEQ(*our_code_points,
+                                   *official_code_points,
+                                   try_inverted))
+                    {
+                        /* Here, they precisely match.  Optimize this ANYOF
                          * node into its equivalent POSIX one of the correct
                          * type, possibly inverted */
-                            op = (try_inverted)
-                                ? type + NPOSIXA - POSIXA
-                                : type;
-                            *ret = reg_node(pRExC_state, op);
-                            FLAGS(REGNODE_p(*ret)) = posix_class;
-                            SvREFCNT_dec(d_invlist);
-                            SvREFCNT_dec(intersection);
-                            return op;
-                        }
+                        op = (try_inverted)
+                            ? type + NPOSIXA - POSIXA
+                            : type;
+                        *ret = reg_node(pRExC_state, op);
+                        FLAGS(REGNODE_p(*ret)) = posix_class;
+                        SvREFCNT_dec(d_invlist);
+                        SvREFCNT_dec(intersection);
+                        return op;
                     }
                 }
             }
-            SvREFCNT_dec(d_invlist);
-            SvREFCNT_dec(intersection);
         }
+        SvREFCNT_dec(d_invlist);
+        SvREFCNT_dec(intersection);
+    }
 
     /* If it is a single contiguous range, ANYOFR is an efficient regnode, both
      * in size and speed.  Currently, a 20 bit range base (smallest code point
@@ -20046,110 +20046,110 @@ S_optimize_regclass(pTHX_
      * This allows for using it on all of the Unicode code points except for
      * the highest plane, which is only for private use code points.  khw
      * doubts that a bigger delta is likely in real world applications */
-        if (     single_range
-            && ! has_runtime_dependency
-            &&   *anyof_flags == 0
-            &&   start[0] < (1 << ANYOFR_BASE_BITS)
-            &&   end[0] - start[0]
-                    < ((1U << (sizeof(((struct regnode_1 *)NULL)->arg1)
-                                   * CHARBITS - ANYOFR_BASE_BITS))))
+    if (     single_range
+        && ! has_runtime_dependency
+        &&   *anyof_flags == 0
+        &&   start[0] < (1 << ANYOFR_BASE_BITS)
+        &&   end[0] - start[0]
+                < ((1U << (sizeof(((struct regnode_1 *)NULL)->arg1)
+                               * CHARBITS - ANYOFR_BASE_BITS))))
 
-        {
-            U8 low_utf8[UTF8_MAXBYTES+1];
-            U8 high_utf8[UTF8_MAXBYTES+1];
+    {
+        U8 low_utf8[UTF8_MAXBYTES+1];
+        U8 high_utf8[UTF8_MAXBYTES+1];
 
-            op = ANYOFR;
-            *ret = reganode(pRExC_state, op,
+        op = ANYOFR;
+        *ret = reganode(pRExC_state, op,
                         (start[0] | (end[0] - start[0]) << ANYOFR_BASE_BITS));
 
         /* Place the lowest UTF-8 start byte in the flags field, so as to allow
          * efficient ruling out at run time of many possible inputs.  */
-            (void) uvchr_to_utf8(low_utf8, start[0]);
-            (void) uvchr_to_utf8(high_utf8, end[0]);
+        (void) uvchr_to_utf8(low_utf8, start[0]);
+        (void) uvchr_to_utf8(high_utf8, end[0]);
 
-            /* If all code points share the same first byte, this can be an
-             * ANYOFRb.  Otherwise store the lowest UTF-8 start byte which can
+        /* If all code points share the same first byte, this can be an
+         * ANYOFRb.  Otherwise store the lowest UTF-8 start byte which can
          * quickly rule out many inputs at run-time without having to compute
          * the code point from UTF-8.  For EBCDIC, we use I8, as not doing that
          * transformation would not rule out nearly so many things */
-            if (low_utf8[0] == high_utf8[0]) {
-                op = ANYOFRb;
-                OP(REGNODE_p(*ret)) = op;
-                ANYOF_FLAGS(REGNODE_p(*ret)) = low_utf8[0];
-            }
-            else {
+        if (low_utf8[0] == high_utf8[0]) {
+            op = ANYOFRb;
+            OP(REGNODE_p(*ret)) = op;
+            ANYOF_FLAGS(REGNODE_p(*ret)) = low_utf8[0];
+        }
+        else {
             ANYOF_FLAGS(REGNODE_p(*ret)) = NATIVE_UTF8_TO_I8(low_utf8[0]);
-            }
-
-            return op;
         }
 
-        /* If didn't find an optimization and there is no need for a bitmap,
-         * optimize to indicate that */
-        if (     start[0] >= NUM_ANYOF_CODE_POINTS
-            && ! LOC
-            && ! upper_latin1_only_utf8_matches
-            &&   *anyof_flags == 0)
-        {
-            U8 low_utf8[UTF8_MAXBYTES+1];
-            UV highest_cp = invlist_highest(cp_list);
+        return op;
+    }
+
+    /* If didn't find an optimization and there is no need for a bitmap,
+     * optimize to indicate that */
+    if (     start[0] >= NUM_ANYOF_CODE_POINTS
+        && ! LOC
+        && ! upper_latin1_only_utf8_matches
+        &&   *anyof_flags == 0)
+    {
+        U8 low_utf8[UTF8_MAXBYTES+1];
+        UV highest_cp = invlist_highest(cp_list);
 
         /* Currently the maximum allowed code point by the system is IV_MAX.
          * Higher ones are reserved for future internal use.  This particular
          * regnode can be used for higher ones, but we can't calculate the code
          * point of those.  IV_MAX suffices though, as it will be a large first
          * byte */
-            Size_t low_len = uvchr_to_utf8(low_utf8, MIN(start[0], IV_MAX))
-                           - low_utf8;
+        Size_t low_len = uvchr_to_utf8(low_utf8, MIN(start[0], IV_MAX))
+                       - low_utf8;
 
         /* We store the lowest possible first byte of the UTF-8 representation,
          * using the flags field.  This allows for quick ruling out of some
          * inputs without having to convert from UTF-8 to code point.  For
          * EBCDIC, we use I8, as not doing that transformation would not rule
          * out nearly so many things */
-            *anyof_flags = NATIVE_UTF8_TO_I8(low_utf8[0]);
+        *anyof_flags = NATIVE_UTF8_TO_I8(low_utf8[0]);
 
-            op = ANYOFH;
+        op = ANYOFH;
 
-            /* If the first UTF-8 start byte for the highest code point in the
-             * range is suitably small, we may be able to get an upper bound as
-             * well */
-            if (highest_cp <= IV_MAX) {
-                U8 high_utf8[UTF8_MAXBYTES+1];
+        /* If the first UTF-8 start byte for the highest code point in the
+         * range is suitably small, we may be able to get an upper bound as
+         * well */
+        if (highest_cp <= IV_MAX) {
+            U8 high_utf8[UTF8_MAXBYTES+1];
             Size_t high_len = uvchr_to_utf8(high_utf8, highest_cp) - high_utf8;
 
-                /* If the lowest and highest are the same, we can get an exact
+            /* If the lowest and highest are the same, we can get an exact
              * first byte instead of a just minimum or even a sequence of exact
              * leading bytes.  We signal these with different regnodes */
-                if (low_utf8[0] == high_utf8[0]) {
-                    Size_t len = find_first_differing_byte_pos(low_utf8,
-                                                               high_utf8,
-                                                       MIN(low_len, high_len));
+            if (low_utf8[0] == high_utf8[0]) {
+                Size_t len = find_first_differing_byte_pos(low_utf8,
+                                                           high_utf8,
+                                                   MIN(low_len, high_len));
 
-                    if (len == 1) {
+                if (len == 1) {
 
                     /* No need to convert to I8 for EBCDIC as this is an exact
                      * match */
-                        *anyof_flags = low_utf8[0];
-                        op = ANYOFHb;
-                    }
-                    else {
-                        op = ANYOFHs;
-                        *ret = regnode_guts(pRExC_state, op,
-                                           regarglen[op] + STR_SZ(len),
-                                           "anyofhs");
-                        FILL_NODE(*ret, op);
-                        ((struct regnode_anyofhs *) REGNODE_p(*ret))->str_len
-                                                                        = len;
-                        Copy(low_utf8,  /* Add the common bytes */
-                        ((struct regnode_anyofhs *) REGNODE_p(*ret))->string,
-                           len, U8);
-                        RExC_emit += NODE_SZ_STR(REGNODE_p(*ret));
-                        set_ANYOF_arg(pRExC_state, REGNODE_p(*ret), cp_list,
-                                                  NULL, only_utf8_locale_list);
-                        return op;
-                    }
+                    *anyof_flags = low_utf8[0];
+                    op = ANYOFHb;
+                }
+                else {
+                    op = ANYOFHs;
+                    *ret = regnode_guts(pRExC_state, op,
+                                       regarglen[op] + STR_SZ(len),
+                                       "anyofhs");
+                    FILL_NODE(*ret, op);
+                    ((struct regnode_anyofhs *) REGNODE_p(*ret))->str_len
+                                                                    = len;
+                    Copy(low_utf8,  /* Add the common bytes */
+                    ((struct regnode_anyofhs *) REGNODE_p(*ret))->string,
+                       len, U8);
+                    RExC_emit += NODE_SZ_STR(REGNODE_p(*ret));
+                    set_ANYOF_arg(pRExC_state, REGNODE_p(*ret), cp_list,
+                                              NULL, only_utf8_locale_list);
+                    return op;
                 }
+            }
             else if (NATIVE_UTF8_TO_I8(high_utf8[0]) <= MAX_ANYOF_HRx_BYTE) {
 
                 /* Here, the high byte is not the same as the low, but is small
@@ -20159,27 +20159,27 @@ S_optimize_regclass(pTHX_
                  * platforms, I8 is used.  On ASCII platforms I8 is the same
                  * thing as UTF-8 */
 
-                    U8 bits = 0;
-                    U8 max_range_diff = MAX_ANYOF_HRx_BYTE - *anyof_flags;
-                    U8 range_diff = NATIVE_UTF8_TO_I8(high_utf8[0])
-                                - *anyof_flags;
+                U8 bits = 0;
+                U8 max_range_diff = MAX_ANYOF_HRx_BYTE - *anyof_flags;
+                U8 range_diff = NATIVE_UTF8_TO_I8(high_utf8[0])
+                            - *anyof_flags;
 
-                    if (range_diff <= max_range_diff / 8) {
-                        bits = 3;
-                    }
-                    else if (range_diff <= max_range_diff / 4) {
-                        bits = 2;
-                    }
-                    else if (range_diff <= max_range_diff / 2) {
-                        bits = 1;
-                    }
-                    *anyof_flags = (*anyof_flags - 0xC0) << 2 | bits;
-                    op = ANYOFHr;
+                if (range_diff <= max_range_diff / 8) {
+                    bits = 3;
                 }
+                else if (range_diff <= max_range_diff / 4) {
+                    bits = 2;
+                }
+                else if (range_diff <= max_range_diff / 2) {
+                    bits = 1;
+                }
+                *anyof_flags = (*anyof_flags - 0xC0) << 2 | bits;
+                op = ANYOFHr;
             }
         }
+    }
 
-        return op;
+    return op;
 }
 
 #undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION

From bb5a3bb755b90e0adfe5375d0626fa3415ff17c4 Mon Sep 17 00:00:00 2001
From: Hugo van der Sanden <hv@crypt.org>
Date: Tue, 1 Jun 2021 15:06:04 +0100
Subject: [PATCH 2/8] regcomp.c: comments

Comment change suggestions from @hvds in PR #18835.
---
 regcomp.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index ed8143818275..156825b224a3 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -17552,8 +17552,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
     AV* posix_warnings = NULL;
     const bool do_posix_warnings = ckWARN(WARN_REGEXP);
-    U8 op = ANYOF;    /* The returned node-type, initialized the expected type.
-                       */
+    U8 op = ANYOF;    /* The returned node-type, initialized to the expected
+                         type. */
     U8 anyof_flags = 0;   /* flag bits if the node is an ANYOF-type */
     U32 posixl = 0;       /* bit field of posix classes matched under /l */
 
@@ -19212,7 +19212,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                             &anyof_flags, &invert, &ret, flagp);
         RETURN_FAIL_ON_RESTART_FLAGP(flagp);
 
-        /* If optimized to something else, finish up and return */
+        /* If optimized to something else and emitted, clean up and return */
         if (ret >= 0) {
             Set_Node_Offset_Length(REGNODE_p(ret), orig_parse - RExC_start,
                                                    RExC_parse - orig_parse);;
@@ -19223,8 +19223,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         }
     }
 
-    /* Here didn't optimize, or optimized to a specialized ANYOF node.  If the
-     * former, set the particular type */
+    /* Here are going to emit an ANYOF; set the particular type */
     if (op == ANYOF) {
         if (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY) {
             op = ANYOFD;
@@ -19481,7 +19480,7 @@ S_optimize_regclass(pTHX_
     /* Next see if can optimize classes that contain just a few code points
      * into an EXACTish node.  The reason to do this is to let the optimizer
      * join this node with adjacent EXACTish ones, and ANYOF nodes require
-     * runtime conversion to code point from UTF-8.
+     * runtime conversion to code point from UTF-8, which we'd like to avoid.
      *
      * An EXACTFish node can be generated even if not under /i, and vice versa.
      * But care must be taken.  An EXACTFish node has to be such that it only
@@ -19496,9 +19495,9 @@ S_optimize_regclass(pTHX_
      * is no simple fold that includes \X{02BC}, there is a multi-char fold
      * that does, and so the node generated for it must be an EXACTFish one.
      * On the other hand qr/:/i should generate a plain EXACT node since the
-     * colon participates in no fold whatsoever, and having it EXACT tells the
-     * optimizer the target string cannot match unless it has a colon in it.
-         */
+     * colon participates in no fold whatsoever, and having it be EXACT tells
+     * the optimizer the target string cannot match unless it has a colon in
+     * it. */
     if (   ! posixl
         && ! *invert
 
@@ -19567,7 +19566,7 @@ S_optimize_regclass(pTHX_
              * class matches more than one code point, and the lowest code
              * point participates in some fold.  It might be that the other
              * code points are /i equivalent to this one, and hence they would
-             * representable by an EXACTFish node.  Above, we eliminated
+             * be representable by an EXACTFish node.  Above, we eliminated
              * classes that contain too many code points to be EXACTFish, with
              * the test for MAX_FOLD_FROMS
              *
@@ -19583,7 +19582,6 @@ S_optimize_regclass(pTHX_
                                                character, so 2nd exists */
                     && isALPHA_FOLD_EQ(start[0], start[1]))
                 {
-
                     /* Here, is part of an ASCII fold pair */
 
                     if (   ASCII_FOLD_RESTRICTED
@@ -19645,10 +19643,10 @@ S_optimize_regclass(pTHX_
                  * potential bugs.
                  *
                  * To do the general case, we first find the fold of the lowest
-                 * code point (which may be higher than the lowest one), then
-                 * find everything that folds to it.  (The data structure we
-                 * have only maps from the folded code points, so we have to do
-                 * the earlier step.) */
+                 * code point (which may be higher than that lowest unfolded
+                 * one), then find everything that folds to it.  (The data
+                 * structure we have only maps from the folded code points, so
+                 * we have to do the earlier step.) */
 
                 Size_t foldlen;
                 U8 foldbuf[UTF8_MAXBYTES_CASE];
@@ -19703,7 +19701,7 @@ S_optimize_regclass(pTHX_
                      * we aren't under /i and this character participates in a
                      * multi-char fold, we don't optimize into an EXACTFish
                      * node.  So, for each case below we have to check if we
-                     * are folding and if not, if it is not part of a
+                     * are folding, and if not, if it is not part of a
                      * multi-char fold.  */
                     if (start[0] > 255) {    /* Highish code point */
                         if (FOLD || ! _invlist_contains_cp(

From 02815c28e0a4b7c759adaec46add09a213d32014 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Mon, 31 May 2021 17:12:21 -0600
Subject: [PATCH 3/8] regcomp.c: S_optimize_regclass() return 0 if fail

Based on a comment from @hvds, I think it better if this function return
an impossible node value if it didn't find a node to use.
---
 regcomp.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 156825b224a3..06f01e9ab83d 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19221,6 +19221,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             SvREFCNT_dec(upper_latin1_only_utf8_matches);
             return ret;
         }
+
+        /* If no optimization was found, an END was returned and we will now
+         * emit an ANYOF */
+        if (op == END) {
+            op = ANYOF;
+        }
     }
 
     /* Here are going to emit an ANYOF; set the particular type */
@@ -19304,15 +19310,15 @@ S_optimize_regclass(pTHX_
      * ANYOF node.  The parameter names are the same as the corresponding
      * variables in S_regclass.
      *
-     * It returns the new op (ANYOF if no optimization found) and sets *ret to
-     * any created regnode.  If the new op is sufficiently like plain ANYOF, it
-     * leaves *ret unchanged for allocation in S_regclass.
+     * It returns the new op (the impossible END one if no optimization found)
+     * and sets *ret to any created regnode.  If the new op is sufficiently
+     * like plain ANYOF, it leaves *ret unchanged for allocation in S_regclass.
      *
      * Certain of the parameters may be updated as a result of the changes
      * herein */
 
-    U8 op = ANYOF; /* The returned node-type, initialized to the unoptimized
-                      one. */
+    U8 op = END;    /* The returned node-type, initialized to an impossible
+                       one.  */
     UV value;
     PERL_UINT_FAST8_T i;
     UV partial_cp_count = 0;
@@ -19443,7 +19449,7 @@ S_optimize_regclass(pTHX_
 
     /* khw can't think of any other possible transformation involving these. */
     if (has_runtime_dependency & HAS_USER_DEFINED_PROPERTY) {
-        return op;
+        return END;
     }
 
     if (! has_runtime_dependency) {
@@ -19761,7 +19767,7 @@ S_optimize_regclass(pTHX_
             }
         }
 
-        if (op != ANYOF) {
+        if (op != END) {
             U8 len;
 
             /* Here, we have calculated what EXACTish node to use.  Have to
@@ -19912,7 +19918,7 @@ S_optimize_regclass(pTHX_
             _invlist_invert(cp_list);
         }
 
-        if (op != ANYOF) {
+        if (op != END) {
             return op;
         }
 

From 45bf64dc4d1c770425c6b07476c5d7536d8cd5fb Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Sat, 2 May 2020 07:06:57 -0600
Subject: [PATCH 4/8] regcomp.c: Consolidate duplicate code

---
 regcomp.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 06f01e9ab83d..e51dd52948a7 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19350,15 +19350,11 @@ S_optimize_regclass(pTHX_
      * any run-time dependencies don't matter */
     if (start[0] == 0 && end[0] == UV_MAX) {
         if (*invert) {
-            op = OPFAIL;
-            *ret = reganode(pRExC_state, op, 0);
+            goto return_OPFAIL;
         }
         else {
-            op = SANY;
-            *ret = reg_node(pRExC_state, op);
-            MARK_NAUGHTY(1);
+            goto return_SANY;
         }
-        return op;
     }
 
     /* Similarly, for /l posix classes, if both a class and its complement
@@ -19370,13 +19366,10 @@ S_optimize_regclass(pTHX_
                 && POSIXL_TEST(posixl, namedclass + 1)) /* its complement */
             {
                 if (*invert) {
-                    op = OPFAIL;
-                    *ret = reganode(pRExC_state, op, 0);
+                    goto return_OPFAIL;
                 }
                 else {
-                    op = SANY;
-                    *ret = reg_node(pRExC_state, op);
-                    MARK_NAUGHTY(1);
+                    goto return_SANY;
                 }
                 return op;
             }
@@ -19460,15 +19453,11 @@ S_optimize_regclass(pTHX_
          * properties). */
         if (partial_cp_count == 0) {
             if (*invert) {
-                op = SANY;
-                *ret = reg_node(pRExC_state, op);
+                goto return_SANY;
             }
             else {
-                op = OPFAIL;
-                *ret = reganode(pRExC_state, op, 0);
+                goto return_OPFAIL;
             }
-
-            return op;
         }
 
         /* If matches everything but \n */
@@ -20184,6 +20173,17 @@ S_optimize_regclass(pTHX_
     }
 
     return op;
+
+  return_OPFAIL:
+    op = OPFAIL;
+    *ret = reganode(pRExC_state, op, 0);
+    return op;
+
+  return_SANY:
+    op = SANY;
+    *ret = reg_node(pRExC_state, op);
+    MARK_NAUGHTY(1);
+    return op;
 }
 
 #undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION

From f974d37db4bb733964fa4bf9cd31c8b4209dbb18 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Thu, 21 May 2020 11:30:13 -0600
Subject: [PATCH 5/8] regcomp.c: Move some code to within a block

This code is irrelevant unless the condition of the block immediately
before it is TRUE, so move it to within that block.
---
 regcomp.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index e51dd52948a7..6a498751e6c5 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19344,16 +19344,16 @@ S_optimize_regclass(pTHX_
             single_range = TRUE;
         }
         invlist_iterfinish(cp_list);
-    }
 
-    /* If we know at compile time that this matches every possible code point,
-     * any run-time dependencies don't matter */
-    if (start[0] == 0 && end[0] == UV_MAX) {
-        if (*invert) {
-            goto return_OPFAIL;
-        }
-        else {
-            goto return_SANY;
+        /* If we know at compile time that this matches every possible code
+         * point, any run-time dependencies don't matter */
+        if (start[0] == 0 && end[0] == UV_MAX) {
+            if (*invert) {
+                goto return_OPFAIL;
+            }
+            else {
+                goto return_SANY;
+            }
         }
     }
 

From d070adf5e69da6b7d28b4aa73421640c9e4b2f45 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Thu, 21 May 2020 11:39:48 -0600
Subject: [PATCH 6/8] regcomp.c: Add a clearer mnemonic

---
 regcomp.c | 56 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 30 insertions(+), 26 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 6a498751e6c5..bd6a6fe978c4 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19325,6 +19325,7 @@ S_optimize_regclass(pTHX_
     UV start[MAX_FOLD_FROMS+1] = { 0 }; /* +1 for the folded-to char */
     UV   end[MAX_FOLD_FROMS+1] = { 0 };
     bool single_range = FALSE;
+    UV lowest_cp = 0;
 
     PERL_ARGS_ASSERT_OPTIMIZE_REGCLASS;
 
@@ -19355,6 +19356,9 @@ S_optimize_regclass(pTHX_
                 goto return_SANY;
             }
         }
+
+        /* Use a clearer mnemonic for below */
+        lowest_cp = start[0];
     }
 
     /* Similarly, for /l posix classes, if both a class and its complement
@@ -19390,7 +19394,7 @@ S_optimize_regclass(pTHX_
          * outside that range.  (Note that some classes won't match anything
          * outside the range, like [:ascii:]) */
         if (   isSINGLE_BIT_SET(posixl)
-            && (partial_cp_count == 0 || start[0] > 255))
+            && (partial_cp_count == 0 || lowest_cp > 255))
         {
             U8 classnum;
             SV * class_above_latin1 = NULL;
@@ -19510,8 +19514,8 @@ S_optimize_regclass(pTHX_
                  * For code points above 255, we know which can cause problems
                  * by having a potential fold to the Latin1 range. */
                 if (  ! FOLD
-                    || (     start[0] > 255
-                        && ! is_PROBLEMATIC_LOCALE_FOLD_cp(start[0])))
+                    || (     lowest_cp > 255
+                        && ! is_PROBLEMATIC_LOCALE_FOLD_cp(lowest_cp)))
                 {
                     op = EXACTL;
                 }
@@ -19520,9 +19524,9 @@ S_optimize_regclass(pTHX_
                 }
             }
             else if (! FOLD) { /* Not /l and not /i */
-                op = (start[0] < 256) ? EXACT : EXACT_REQ8;
+                op = (lowest_cp < 256) ? EXACT : EXACT_REQ8;
             }
-            else if (start[0] < 256) { /* /i, not /l, and the code point is
+            else if (lowest_cp < 256) { /* /i, not /l, and the code point is
                                           small */
 
                 /* Under /i, it gets a little tricky.  A code point that
@@ -19540,22 +19544,22 @@ S_optimize_regclass(pTHX_
                  * This handles the case of below-255 code points, as we have
                  * an easy look up for those.  The next clause handles the
                  * above-256 one */
-                op = IS_IN_SOME_FOLD_L1(start[0])
+                op = IS_IN_SOME_FOLD_L1(lowest_cp)
                      ? EXACTFU
                      : EXACT;
             }
             else {  /* /i, larger code point.  Since we are under /i, and have
                        just this code point, we know that it can't fold to
                        something else, so PL_InMultiCharFold applies to it */
-                op = (_invlist_contains_cp(PL_InMultiCharFold, start[0]))
+                op = (_invlist_contains_cp(PL_InMultiCharFold, lowest_cp))
                          ? EXACTFU_REQ8
                          : EXACT_REQ8;
                 }
 
-                value = start[0];
+                value = lowest_cp;
         }
         else if (  ! (has_runtime_dependency & ~HAS_D_RUNTIME_DEPENDENCY)
-                 && _invlist_contains_cp(PL_in_some_fold, start[0]))
+                 && _invlist_contains_cp(PL_in_some_fold, lowest_cp))
         {
             /* Here, the only runtime dependency, if any, is from /d, and the
              * class matches more than one code point, and the lowest code
@@ -19568,11 +19572,11 @@ S_optimize_regclass(pTHX_
              * First, special case the ASCII fold pairs, like 'B' and 'b'.  We
              * do this because we have EXACTFAA at our disposal for the ASCII
              * range */
-            if (partial_cp_count == 2 && isASCII(start[0])) {
+            if (partial_cp_count == 2 && isASCII(lowest_cp)) {
 
                 /* The only ASCII characters that participate in folds are
                  * alphabetics */
-                assert(isALPHA(start[0]));
+                assert(isALPHA(lowest_cp));
                 if (   end[0] == start[0]   /* First range is a single
                                                character, so 2nd exists */
                     && isALPHA_FOLD_EQ(start[0], start[1]))
@@ -19580,7 +19584,7 @@ S_optimize_regclass(pTHX_
                     /* Here, is part of an ASCII fold pair */
 
                     if (   ASCII_FOLD_RESTRICTED
-                        || HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(start[0]))
+                        || HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(lowest_cp))
                     {
                         /* If the second clause just above was true, it means
                          * we can't be under /i, or else the list would have
@@ -19589,9 +19593,9 @@ S_optimize_regclass(pTHX_
                          * is that folds to these, by using EXACTFAA */
                         op = EXACTFAA;
                     }
-                    else if (HAS_NONLATIN1_FOLD_CLOSURE(start[0])) {
+                    else if (HAS_NONLATIN1_FOLD_CLOSURE(lowest_cp)) {
 
-                        /* Here, there's no simple fold that start[0] is part
+                        /* Here, there's no simple fold that lowest_cp is part
                          * of, but there is a multi-character one.  If we are
                          * not under /i, we want to exclude that possibility;
                          * if under /i, we want to include it */
@@ -19599,19 +19603,19 @@ S_optimize_regclass(pTHX_
                     }
                     else {
 
-                        /* Here, the only possible fold start[0] particpates in
+                        /* Here, the only possible fold lowest_cp particpates in
                          * is with start[1].  /i or not isn't relevant */
                         op = EXACTFU;
                     }
 
-                    value = toFOLD(start[0]);
+                    value = toFOLD(lowest_cp);
                 }
             }
             else if (  ! upper_latin1_only_utf8_matches
                      || (   _invlist_len(upper_latin1_only_utf8_matches) == 2
                          && PL_fold_latin1[
                            invlist_highest(upper_latin1_only_utf8_matches)]
-                         == start[0]))
+                         == lowest_cp))
             {
                 /* Here, the smallest character is non-ascii or there are more
                  * than 2 code points matched by this node.  Also, we either
@@ -19645,7 +19649,7 @@ S_optimize_regclass(pTHX_
 
                 Size_t foldlen;
                 U8 foldbuf[UTF8_MAXBYTES_CASE];
-                UV folded = _to_uni_fold_flags(start[0], foldbuf, &foldlen, 0);
+                UV folded = _to_uni_fold_flags(lowest_cp, foldbuf, &foldlen, 0);
                 U32 first_fold;
                 const U32 * remaining_folds;
                 Size_t folds_to_this_cp_count = _inverse_folds(
@@ -19672,7 +19676,7 @@ S_optimize_regclass(pTHX_
                 /* Having gotten everything that participates in the fold
                  * containing the lowest code point, we turn that into an
                  * inversion list, making sure everything is included. */
-                fold_list = add_cp_to_invlist(fold_list, start[0]);
+                fold_list = add_cp_to_invlist(fold_list, lowest_cp);
                 fold_list = add_cp_to_invlist(fold_list, folded);
                 if (folds_to_this_cp_count > 0) {
                     fold_list = add_cp_to_invlist(fold_list, first_fold);
@@ -19698,7 +19702,7 @@ S_optimize_regclass(pTHX_
                      * node.  So, for each case below we have to check if we
                      * are folding, and if not, if it is not part of a
                      * multi-char fold.  */
-                    if (start[0] > 255) {    /* Highish code point */
+                    if (lowest_cp > 255) {    /* Highish code point */
                         if (FOLD || ! _invlist_contains_cp(
                                                    PL_InMultiCharFold, folded))
                         {
@@ -19720,16 +19724,16 @@ S_optimize_regclass(pTHX_
                         value = folded;
                     }
                     else if (     FOLD
-                             || ! HAS_NONLATIN1_FOLD_CLOSURE(start[0]))
+                             || ! HAS_NONLATIN1_FOLD_CLOSURE(lowest_cp))
                     {
                         if (upper_latin1_only_utf8_matches) {
                             op = EXACTF;
 
                             /* We can't use the fold, as that only matches
                              * under UTF-8 */
-                            value = start[0];
+                            value = lowest_cp;
                         }
-                        else if (     UNLIKELY(start[0] == MICRO_SIGN)
+                        else if (     UNLIKELY(lowest_cp == MICRO_SIGN)
                                  && ! UTF)
                         {   /* EXACTFUP is a special node for this character */
                             op = (ASCII_FOLD_RESTRICTED)
@@ -19738,7 +19742,7 @@ S_optimize_regclass(pTHX_
                             value = MICRO_SIGN;
                         }
                         else if (     ASCII_FOLD_RESTRICTED
-                                 && ! isASCII(start[0]))
+                                 && ! isASCII(lowest_cp))
                         {   /* For ASCII under /iaa, we can use EXACTFU below
                              */
                             op = EXACTFAA;
@@ -20079,7 +20083,7 @@ S_optimize_regclass(pTHX_
 
     /* If didn't find an optimization and there is no need for a bitmap,
      * optimize to indicate that */
-    if (     start[0] >= NUM_ANYOF_CODE_POINTS
+    if (     lowest_cp >= NUM_ANYOF_CODE_POINTS
         && ! LOC
         && ! upper_latin1_only_utf8_matches
         &&   *anyof_flags == 0)
@@ -20092,7 +20096,7 @@ S_optimize_regclass(pTHX_
          * regnode can be used for higher ones, but we can't calculate the code
          * point of those.  IV_MAX suffices though, as it will be a large first
          * byte */
-        Size_t low_len = uvchr_to_utf8(low_utf8, MIN(start[0], IV_MAX))
+        Size_t low_len = uvchr_to_utf8(low_utf8, MIN(lowest_cp, IV_MAX))
                        - low_utf8;
 
         /* We store the lowest possible first byte of the UTF-8 representation,

From 1402c65391ff1641498225dd569d0468e8ae8f86 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Thu, 21 May 2020 11:40:24 -0600
Subject: [PATCH 7/8] regcomp.c: Save a value instead of re-calling fcn

This variable will be used in future commits in more places, so compute
it just once.
---
 regcomp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index bd6a6fe978c4..9b07fb9d0d09 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19325,7 +19325,7 @@ S_optimize_regclass(pTHX_
     UV start[MAX_FOLD_FROMS+1] = { 0 }; /* +1 for the folded-to char */
     UV   end[MAX_FOLD_FROMS+1] = { 0 };
     bool single_range = FALSE;
-    UV lowest_cp = 0;
+    UV lowest_cp = 0, highest_cp = 0;
 
     PERL_ARGS_ASSERT_OPTIMIZE_REGCLASS;
 
@@ -19359,6 +19359,8 @@ S_optimize_regclass(pTHX_
 
         /* Use a clearer mnemonic for below */
         lowest_cp = start[0];
+
+        highest_cp = invlist_highest(cp_list);
     }
 
     /* Similarly, for /l posix classes, if both a class and its complement
@@ -19831,7 +19833,7 @@ S_optimize_regclass(pTHX_
         /* If doesn't fit the criteria for ANYOFM, invert and try again.  If
          * that works we will instead later generate an NANYOFM, and invert
          * back when through */
-        if (invlist_highest(cp_list) > max_permissible) {
+        if (highest_cp > max_permissible) {
             _invlist_invert(cp_list);
             inverted = 1;
         }

From 998c08b5c864bcd2da30c5a7fda3c4be3ae1cc2e Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Mon, 31 May 2021 19:44:03 -0600
Subject: [PATCH 8/8] regcomp.c: Initialize a variable

to silence some compiler's that were warning
---
 regcomp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/regcomp.c b/regcomp.c
index 9b07fb9d0d09..e6b2f2f0a759 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19319,7 +19319,7 @@ S_optimize_regclass(pTHX_
 
     U8 op = END;    /* The returned node-type, initialized to an impossible
                        one.  */
-    UV value;
+    UV value = 0;
     PERL_UINT_FAST8_T i;
     UV partial_cp_count = 0;
     UV start[MAX_FOLD_FROMS+1] = { 0 }; /* +1 for the folded-to char */