From 70ffa98e0137be702201fac7cd12e41a44425b07 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 26 Sep 2025 04:08:34 -0600 Subject: [PATCH 1/9] toke.c: Change S_scan_word parameter Change it from being an array length, to being a pointer to the ending position of the array. This makes this function consistent with most of the others in this file. It allows us to usually use C_ARRAY_END() to wrap the parameter, which simplifies some expressions, --- embed.fnc | 2 +- proto.h | 4 ++-- toke.c | 47 ++++++++++++++++++++++++++++++----------------- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/embed.fnc b/embed.fnc index c5e6b04a5431..0186f1650c1e 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3072,7 +3072,7 @@ Adp |char * |scan_vstring |NN const char *s \ |NN SV *sv EXpx |char * |scan_word |NN char *s \ |NN char *dest \ - |STRLEN destlen \ + |NN char *dest_end \ |int allow_package \ |NN STRLEN *slp Cp |U32 |seed diff --git a/proto.h b/proto.h index ceb3022b65e2..2cc8727a2f45 100644 --- a/proto.h +++ b/proto.h @@ -4247,7 +4247,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char * const e, SV *sv); assert(s); assert(e); assert(sv) #define PERL_ARGS_ASSERT_SCAN_WORD \ - assert(s); assert(dest); assert(slp) + assert(s); assert(dest); assert(dest_end); assert(slp) PERL_CALLCONV U32 Perl_seed(pTHX); @@ -6139,7 +6139,7 @@ PERL_CALLCONV char * Perl_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, char **delimp) __attribute__warn_unused_result__; PERL_CALLCONV char * -Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp); +Perl_scan_word(pTHX_ char *s, char *dest, char *dest_end, int allow_package, STRLEN *slp); PERL_CALLCONV char * Perl_skipspace_flags(pTHX_ char *s, U32 flags) __attribute__warn_unused_result__; diff --git a/toke.c b/toke.c index 8c7567580841..cb14fcfc4fa4 100644 --- a/toke.c +++ b/toke.c @@ -2277,7 +2277,9 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || (allow_pack && *s == ':' && s[1] == ':') ) { - s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); + s = scan_word(s, + PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), + allow_pack, &len); if (check_keyword) { char *s2 = PL_tokenbuf; STRLEN len2 = len; @@ -4815,7 +4817,7 @@ S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) return *s == '(' ? METHCALL : METHCALL0; } - s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); + s = scan_word(s, tmpbuf, C_ARRAY_END(tmpbuf), TRUE, &len); /* start is the beginning of the possible filehandle/object, * and s is the end of it * tmpbuf is a copy of it (but with single quotes as double colons) @@ -5453,7 +5455,7 @@ yyl_dollar(pTHX_ char *s) } while (isSPACE(*t)); if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) { STRLEN len; - t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len); + t = scan_word(t, tmpbuf, C_ARRAY_END(tmpbuf), TRUE, &len); while (isSPACE(*t)) t++; if ( *t == ';' @@ -5486,7 +5488,7 @@ yyl_dollar(pTHX_ char *s) char tmpbuf[sizeof PL_tokenbuf]; int t2; STRLEN len; - scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); + scan_word(s, tmpbuf, C_ARRAY_END(tmpbuf), TRUE, &len); if ((t2 = keyword(tmpbuf, len, 0))) { /* binary operators exclude handle interpretations */ switch (t2) { @@ -5557,7 +5559,7 @@ yyl_sub(pTHX_ char *s, const int key) { PL_expect = XATTRBLOCK; - d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, &len); + d = scan_word(s, tmpbuf, C_ARRAY_END(PL_tokenbuf), TRUE, &len); if (key == KEY_format) format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); *PL_tokenbuf = '&'; @@ -6164,7 +6166,9 @@ yyl_colon(pTHX_ char *s) I32 tmp; SV *sv; STRLEN len; - char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); + char *d = scan_word(s, + PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), + FALSE, &len); if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { if (tmp < 0) tmp = -tmp; switch (tmp) { @@ -6343,7 +6347,8 @@ yyl_leftcurly(pTHX_ char *s, const U8 formbrack) } if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) { STRLEN len; - d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, + d = scan_word(d, + PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), FALSE, &len); while (d < PL_bufend && SPACE_OR_TAB(*d)) d++; @@ -7161,7 +7166,9 @@ yyl_foreach(pTHX_ char *s) /* skip optional package name, as in "for my abc $x (..)" */ if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) { STRLEN len; - p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); + p = scan_word(p, + PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), + TRUE, &len); p = skipspace(p); paren_is_valid = FALSE; } @@ -7190,7 +7197,8 @@ yyl_do(pTHX_ char *s, I32 orig_keyword) char *d; STRLEN len; *PL_tokenbuf = '&'; - d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, + d = scan_word(s, + PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), 1, &len); if (len && memNEs(PL_tokenbuf+1, len, "CORE") && !keyword(PL_tokenbuf + 1, len, 0)) { @@ -7246,7 +7254,9 @@ yyl_my(pTHX_ char *s, I32 my) s = skipspace(s); if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { STRLEN len; - s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); + s = scan_word(s, + PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), + TRUE, &len); if (memEQs(PL_tokenbuf, len, "sub")) /* my sub ... */ return yyl_sub(aTHX_ s, my); @@ -7720,7 +7730,8 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c) if ((*s == '\'' && FEATURE_APOS_AS_NAME_SEP_IS_ENABLED) || (*s == ':' && s[1] == ':')) { STRLEN morelen; - s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, + s = scan_word(s, + PL_tokenbuf + len, C_ARRAY_END(PL_tokenbuf), TRUE, &morelen); if (no_op_error) { S_warn_expect_operator(aTHX_ "Bareword",s,FALSE); @@ -8472,7 +8483,9 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct s = skipspace(s); if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { const char *t; - char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); + char *d = scan_word(s, + PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), + FALSE, &len); for (t=d; isSPACE(*t);) t++; if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) @@ -8905,7 +8918,7 @@ yyl_key_core(pTHX_ char *s, STRLEN len, struct code c) STRLEN olen = len; char *d = s; s += 2; - s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); + s = scan_word(s, PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), FALSE, &len); if ((*s == ':' && s[1] == ':') || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\'' && FEATURE_APOS_AS_NAME_SEP_IS_ENABLED)) @@ -8985,7 +8998,7 @@ yyl_keylookup(pTHX_ char *s, GV *gv) c.gv = gv; PL_bufptr = s; - s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); + s = scan_word(s, PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), FALSE, &len); /* Some keywords can be followed by any delimiter, including ':' */ anydelim = word_takes_any_delimiter(PL_tokenbuf, len); @@ -10341,12 +10354,12 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, } char * -Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) +Perl_scan_word(pTHX_ char *s, char *dest, char * dest_end, int allow_package, STRLEN *slp) { PERL_ARGS_ASSERT_SCAN_WORD; char *d = dest; - char * const e = d + destlen - 3; /* two-character token, ending NUL */ + char * const e = dest_end - 3; /* two-character token, ending NUL */ bool is_utf8 = cBOOL(UTF); parse_ident(&s, &d, e, allow_package, is_utf8, TRUE); @@ -13815,7 +13828,7 @@ Perl_parse_label(pTHX_ U32 flags) t = s = PL_bufptr; if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) goto no_label; - t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); + t = scan_word(s, PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), FALSE, &wlen); if (word_takes_any_delimiter(s, wlen)) goto no_label; bufptr_pos = s - SvPVX(PL_linestr); From cf8f3804635b6b20a6f40ace88eea62a7700700a Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 28 Sep 2025 08:59:42 -0600 Subject: [PATCH 2/9] toke.c: Change S_scan_ident parameter Change it from being an array length, to being a pointer to the ending position of the array. This makes this function consistent with most of the others in this file. It allows us to usually use C_ARRAY_END() to wrap the parameter, which simplifies some expressions, --- embed.fnc | 2 +- proto.h | 4 ++-- toke.c | 20 ++++++++++---------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/embed.fnc b/embed.fnc index 0186f1650c1e..93648c2a90c6 100644 --- a/embed.fnc +++ b/embed.fnc @@ -6110,7 +6110,7 @@ RS |char * |scan_formline |NN char *s RS |char * |scan_heredoc |NN char *s S |char * |scan_ident |NN char *s \ |NN char *dest \ - |STRLEN destlen \ + |NN char *dest_end \ |I32 ck_uni RS |char * |scan_inputsymbol \ |NN char *start diff --git a/proto.h b/proto.h index 2cc8727a2f45..7adfcdc701e9 100644 --- a/proto.h +++ b/proto.h @@ -9474,9 +9474,9 @@ S_scan_heredoc(pTHX_ char *s) assert(s) STATIC char * -S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni); +S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, I32 ck_uni); # define PERL_ARGS_ASSERT_SCAN_IDENT \ - assert(s); assert(dest) + assert(s); assert(dest); assert(dest_end) STATIC char * S_scan_inputsymbol(pTHX_ char *start) diff --git a/toke.c b/toke.c index cb14fcfc4fa4..834f8cc5405b 100644 --- a/toke.c +++ b/toke.c @@ -4617,7 +4617,7 @@ S_intuit_more(pTHX_ char *s, char *e) if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { int len; char tmpbuf[sizeof PL_tokenbuf * 4]; - scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); + scan_ident(s, tmpbuf, C_ARRAY_END(tmpbuf), FALSE); len = (int)strlen(tmpbuf); if ( len > 1 && gv_fetchpvn_flags(tmpbuf, @@ -5364,8 +5364,8 @@ yyl_dollar(pTHX_ char *s) || memCHRs("{$:+-@", s[2]))) { PL_tokenbuf[0] = '@'; - s = scan_ident(s + 1, PL_tokenbuf + 1, - sizeof PL_tokenbuf - 1, FALSE); + s = scan_ident(s + 1, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), + FALSE); S_warn_expect_operator(aTHX_ "Array length", s, POP_OLDBUFPTR); if (!PL_tokenbuf[1]) PREREF(DOLSHARP); @@ -5375,7 +5375,7 @@ yyl_dollar(pTHX_ char *s) } PL_tokenbuf[0] = '$'; - s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); + s = scan_ident(s, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), FALSE); S_warn_expect_operator(aTHX_ "Scalar", s, POP_OLDBUFPTR); if (!PL_tokenbuf[1]) { if (s == PL_bufend) @@ -6038,7 +6038,7 @@ yyl_star(pTHX_ char *s) POSTDEREF(PERLY_STAR); if (PL_expect != XOPERATOR) { - s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); + s = scan_ident(s, PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), TRUE); PL_expect = XOPERATOR; force_ident(PL_tokenbuf, PERLY_STAR); if (!*PL_tokenbuf) @@ -6086,7 +6086,7 @@ yyl_percent(pTHX_ char *s) POSTDEREF(PERLY_PERCENT_SIGN); PL_tokenbuf[0] = '%'; - s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); + s = scan_ident(s, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), FALSE); pl_yylval.ival = 0; if (!PL_tokenbuf[1]) { PREREF(PERLY_PERCENT_SIGN); @@ -6623,7 +6623,7 @@ yyl_ampersand(pTHX_ char *s) } PL_tokenbuf[0] = '&'; - s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); + s = scan_ident(s - 1, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), TRUE); pl_yylval.ival = (OPpENTERSUB_AMPER<<8); if (PL_tokenbuf[1]) @@ -6708,7 +6708,7 @@ yyl_snail(pTHX_ char *s) if (PL_expect == XPOSTDEREF) POSTDEREF(PERLY_SNAIL); PL_tokenbuf[0] = '@'; - s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); + s = scan_ident(s, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), FALSE); S_warn_expect_operator(aTHX_ "Array", s, POP_OLDBUFPTR); pl_yylval.ival = 0; if (!PL_tokenbuf[1]) { @@ -10376,13 +10376,13 @@ Perl_scan_word(pTHX_ char *s, char *dest, char * dest_end, int allow_package, ST * specific variable name. */ STATIC char * -S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) +S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, I32 ck_uni) { I32 herelines = PL_parser->herelines; SSize_t bracket = -1; char funny = *s++; char *d = dest; - char * const e = d + destlen - 3; /* two-character token, ending NUL */ + char * const e = dest_end - 3; /* two-character token, ending NUL */ bool is_utf8 = cBOOL(UTF); line_t orig_copline = 0, tmp_copline = 0; From 883081749c5212ef4099226e4d8ae0a27b328cc3 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 26 Sep 2025 03:35:44 -0600 Subject: [PATCH 3/9] toke.c: Convert sizeof() calls to use more mnemonic The length of a constant literal is STRLENs The end position of a C array is C_ARRAY_END The number of elements in a C array is C_ARRAY_LENGTH --- toke.c | 56 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/toke.c b/toke.c index 834f8cc5405b..15b18fd1d794 100644 --- a/toke.c +++ b/toke.c @@ -1908,7 +1908,7 @@ S_incline(pTHX_ const char *s, const char *end) while (SPACE_OR_TAB(*s)) s++; if (memBEGINs(s, (STRLEN) (end - s), "line")) - s += sizeof("line") - 1; + s += STRLENs("line"); else return; if (SPACE_OR_TAB(*s)) @@ -1961,7 +1961,7 @@ S_incline(pTHX_ const char *s, const char *end) char *tmpbuf2; GV *gv2; - if (tmplen2 + 2 <= sizeof smallbuf) + if (tmplen2 + 2 <= C_ARRAY_LENGTH(smallbuf)) tmpbuf2 = smallbuf; else Newx(tmpbuf2, tmplen2 + 2, char); @@ -2284,8 +2284,8 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) char *s2 = PL_tokenbuf; STRLEN len2 = len; if (allow_pack && memBEGINPs(s2, len, "CORE::")) { - s2 += sizeof("CORE::") - 1; - len2 -= sizeof("CORE::") - 1; + s2 += STRLENs("CORE::"); + len2 -= STRLENs("CORE::"); } if (keyword(s2, len2, 0)) return start; @@ -4134,7 +4134,7 @@ S_scan_const(pTHX_ char *start) char hex_string[4]; int len = my_snprintf(hex_string, - sizeof(hex_string), + C_ARRAY_LENGTH(hex_string), "%02X.", /* The regex compiler is @@ -4142,7 +4142,7 @@ S_scan_const(pTHX_ char *start) * native */ NATIVE_TO_LATIN1(*str)); PERL_MY_SNPRINTF_POST_GUARD(len, - sizeof(hex_string)); + C_ARRAY_LENGTH(hex_string)); Copy(hex_string, d, 3, char); d += 3; str++; @@ -4169,7 +4169,8 @@ S_scan_const(pTHX_ char *start) /* Convert first code point to Unicode hex, * including the boiler plate before it. */ output_length = - my_snprintf(hex_string, sizeof(hex_string), + my_snprintf(hex_string, + C_ARRAY_LENGTH(hex_string), "\\N{U+%X", (unsigned int) NATIVE_TO_UNI(uv)); @@ -4192,7 +4193,7 @@ S_scan_const(pTHX_ char *start) &char_length); output_length = my_snprintf(hex_string, - sizeof(hex_string), + C_ARRAY_LENGTH(hex_string), ".%X", (unsigned int) NATIVE_TO_UNI(uv)); @@ -4616,7 +4617,7 @@ S_intuit_more(pTHX_ char *s, char *e) * strongly suspect this isn't a character class */ if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { int len; - char tmpbuf[sizeof PL_tokenbuf * 4]; + char tmpbuf[ C_ARRAY_LENGTH(PL_tokenbuf) * 4 ]; scan_ident(s, tmpbuf, C_ARRAY_END(tmpbuf), FALSE); len = (int)strlen(tmpbuf); if ( len > 1 @@ -4782,7 +4783,7 @@ S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) return 0; char *s = start + (*start == '$'); - char tmpbuf[sizeof PL_tokenbuf]; + char tmpbuf[C_ARRAY_LENGTH(PL_tokenbuf)]; STRLEN len; GV* indirgv; /* Mustn't actually add anything to a symbol table. @@ -5261,7 +5262,7 @@ yyl_sigvar(pTHX_ char *s) char *dest = PL_tokenbuf + 1; /* read var name, including sigil, into PL_tokenbuf */ PL_tokenbuf[0] = sigil; - parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1, + parse_ident(&s, &dest, C_ARRAY_END(PL_tokenbuf), 0, cBOOL(UTF), FALSE); *dest = '\0'; assert(PL_tokenbuf[1]); /* we have a variable name */ @@ -5449,7 +5450,7 @@ yyl_dollar(pTHX_ char *s) && (t = (char *) memchr(s, '}', PL_bufend - s)) && (t = (char *) memchr(t, '=', PL_bufend - t))) { - char tmpbuf[sizeof PL_tokenbuf]; + char tmpbuf[C_ARRAY_LENGTH(PL_tokenbuf)]; do { t++; } while (isSPACE(*t)); @@ -5485,7 +5486,7 @@ yyl_dollar(pTHX_ char *s) PL_expect = XTERM; /* e.g. print $fh &sub */ } else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { - char tmpbuf[sizeof PL_tokenbuf]; + char tmpbuf[C_ARRAY_LENGTH(PL_tokenbuf)]; int t2; STRLEN len; scan_word(s, tmpbuf, C_ARRAY_END(tmpbuf), TRUE, &len); @@ -7086,7 +7087,7 @@ yyl_require(pTHX_ char *s, I32 orig_keyword) *PL_tokenbuf = '\0'; s = force_word(s,BAREWORD,TRUE,TRUE); if (isIDFIRST_lazy_if_safe(PL_tokenbuf, - PL_tokenbuf + sizeof(PL_tokenbuf), + C_ARRAY_END(PL_tokenbuf), UTF)) { gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), @@ -7268,8 +7269,9 @@ yyl_my(pTHX_ char *s, I32 my) char tmpbuf[1024]; int i; PL_bufptr = s; - i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); - PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf)); + i = my_snprintf(tmpbuf, C_ARRAY_LENGTH(tmpbuf), + "No such class %.1000s", PL_tokenbuf); + PERL_MY_SNPRINTF_POST_GUARD(i, C_ARRAY_LENGTH(tmpbuf)); yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); } } @@ -7404,8 +7406,8 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s) #ifdef ALTERNATE_SHEBANG else { static char const as[] = ALTERNATE_SHEBANG; - if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) - d = s + (sizeof(as) - 1); + if (*s == as[0] && strnEQ(s, as, C_ARRAY_LENGTH(as) - 1)) + d = s + (C_ARRAY_LENGTH(as) - 1); } #endif /* ALTERNATE_SHEBANG */ } @@ -9082,7 +9084,7 @@ yyl_keylookup(pTHX_ char *s, GV *gv) /* Check for lexical sub */ if (PL_expect != XOPERATOR) { - char tmpbuf[sizeof PL_tokenbuf + 1]; + char tmpbuf[C_ARRAY_LENGTH(PL_tokenbuf) + 1]; *tmpbuf = '&'; Copy(PL_tokenbuf, tmpbuf+1, len, char); c.off = pad_findmy_pvn(tmpbuf, len+1, 0); @@ -9155,7 +9157,7 @@ yyl_try(pTHX_ char *s) STRLEN len; /* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */ - while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end)) + while(s_end < PL_bufend && d < C_ARRAY_END(PL_tokenbuf)-1 && isPLUGINFIX(*s_end)) *d++ = *s_end++; *d = '\0'; @@ -10993,7 +10995,7 @@ S_scan_heredoc(pTHX_ char *s) s += 2; d = PL_tokenbuf + 1; - e = PL_tokenbuf + sizeof PL_tokenbuf - 1; + e = C_ARRAY_END(PL_tokenbuf); *PL_tokenbuf = '\n'; peek = s; @@ -11037,7 +11039,7 @@ S_scan_heredoc(pTHX_ char *s) d += len; } - if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) + if (d >= C_ARRAY_END(PL_tokenbuf) - 1) croak("Delimiter for here document is too long"); *d++ = '\n'; @@ -11404,7 +11406,7 @@ S_scan_heredoc(pTHX_ char *s) Safefree(indent); SvREFCNT_dec(tmpstr); CopLINE_set(PL_curcop, origline); - missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1); + missingterm(PL_tokenbuf + 1, C_ARRAY_LENGTH(PL_tokenbuf) - 1); } @@ -11434,7 +11436,7 @@ S_scan_inputsymbol(pTHX_ char *start) I32 len; bool nomagicopen = FALSE; char *d = PL_tokenbuf; /* start of temp holding space */ - const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ + const char * const e = C_ARRAY_END(PL_tokenbuf); /* end of temp holding space */ PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; @@ -11454,7 +11456,7 @@ S_scan_inputsymbol(pTHX_ char *start) or if it didn't end, or if we see a newline */ - if (len >= (I32)sizeof PL_tokenbuf) + if (len >= (I32) C_ARRAY_LENGTH(PL_tokenbuf)) croak("Excessively long <> operator"); if (s >= end) croak("Unterminated <> operator"); @@ -12394,7 +12396,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) case '6': case '7': case '8': case '9': case '.': decimal: d = PL_tokenbuf; - e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ + e = C_ARRAY_END(PL_tokenbuf) - 6; /* room for various punctuation */ floatit = FALSE; if (hexfp) { floatit = TRUE; @@ -13095,7 +13097,7 @@ S_swallow_bom(pTHX_ U8 *s) #ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); #endif - s += sizeof(BOM_UTF8) - 1; /* UTF-8 */ + s += C_ARRAY_LENGTH(BOM_UTF8) - 1; /* UTF-8 */ } break; } From 78c18854562ec4d1721b80463520c1ee83830f23 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 30 Sep 2025 06:54:12 -0600 Subject: [PATCH 4/9] toke.c: Change name of static function 'uni' commonly is short for Unicode. Here, it was short for 'unary', which I found highly confusing. This also changes the name of a formal parameter to also not be 'uni' when 'unary' is meant. --- toke.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/toke.c b/toke.c index 15b18fd1d794..74fab905be3a 100644 --- a/toke.c +++ b/toke.c @@ -2070,7 +2070,7 @@ Perl_skipspace_flags(pTHX_ char *s, U32 flags) } /* - * S_check_uni + * S_check_unary * Check the unary operators to ensure there's no ambiguity in how they're * used. An ambiguous piece of code would be: * rand + 5 @@ -2079,7 +2079,7 @@ Perl_skipspace_flags(pTHX_ char *s, U32 flags) */ STATIC void -S_check_uni(pTHX) +S_check_unary(pTHX) { const char *s; @@ -5998,7 +5998,7 @@ yyl_hyphen(pTHX_ char *s) } else { if (isSPACE(*s) || !isSPACE(*PL_bufptr)) - check_uni(); + check_unary(); OPERATOR(PERLY_MINUS); /* unary minus */ } } @@ -6027,7 +6027,7 @@ yyl_plus(pTHX_ char *s) } else { if (isSPACE(*s) || !isSPACE(*PL_bufptr)) - check_uni(); + check_unary(); OPERATOR(PERLY_PLUS); } } @@ -6762,7 +6762,7 @@ yyl_slash(pTHX_ char *s) || memNE(PL_last_uni, "study", 5) || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF) )) - check_uni(); + check_unary(); s = scan_pat(s,OP_MATCH); TERM(sublex_start()); } @@ -6853,7 +6853,7 @@ yyl_leftpointy(pTHX_ char *s) if (PL_expect != XOPERATOR) { if (s[1] != '<' && !memchr(s,'>', PL_bufend - s)) - check_uni(); + check_unary(); if (s[1] == '<' && s[2] != '>') s = scan_heredoc(s); else @@ -8901,7 +8901,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct } Mop(OP_REPEAT); } - check_uni(); + check_unary(); return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); case KEY_xor: @@ -10378,7 +10378,7 @@ Perl_scan_word(pTHX_ char *s, char *dest, char * dest_end, int allow_package, ST * specific variable name. */ STATIC char * -S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, I32 ck_uni) +S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, I32 chk_unary) { I32 herelines = PL_parser->herelines; SSize_t bracket = -1; @@ -10511,8 +10511,8 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, I32 ck_uni) /* Warn about ambiguous code after unary operators if {...} notation isn't used. There's no difference in ambiguity; it's merely a heuristic about when not to warn. */ - else if (ck_uni && bracket == -1) - check_uni(); + else if (chk_unary && bracket == -1) + check_unary(); if (bracket != -1) { bool skip; From 163e52c6715a9801119022ec660871fddebed13b Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 29 Sep 2025 09:35:51 -0600 Subject: [PATCH 5/9] S_scan_ident: Convert parameter to bool All calls to it set it to TRUE or FALSE --- embed.fnc | 4 ++-- embed.h | 2 +- proto.h | 6 +++--- toke.c | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/embed.fnc b/embed.fnc index 93648c2a90c6..d55ad054a0d3 100644 --- a/embed.fnc +++ b/embed.fnc @@ -6054,7 +6054,7 @@ S |int |ao |int toketype S |void |checkcomma |NN const char *s \ |NN const char *name \ |NN const char *what -S |void |check_uni +S |void |check_unary RS |char * |filter_gets |NN SV *sv \ |STRLEN append RS |HV * |find_in_my_stash \ @@ -6111,7 +6111,7 @@ RS |char * |scan_heredoc |NN char *s S |char * |scan_ident |NN char *s \ |NN char *dest \ |NN char *dest_end \ - |I32 ck_uni + |bool chk_unary RS |char * |scan_inputsymbol \ |NN char *start RS |char * |scan_pat |NN char *start \ diff --git a/embed.h b/embed.h index 8ac7210a5d76..2e59991a8f65 100644 --- a/embed.h +++ b/embed.h @@ -1671,7 +1671,7 @@ # endif # if defined(PERL_IN_TOKE_C) # define ao(a) S_ao(aTHX_ a) -# define check_uni() S_check_uni(aTHX) +# define check_unary() S_check_unary(aTHX) # define checkcomma(a,b,c) S_checkcomma(aTHX_ a,b,c) # define filter_gets(a,b) S_filter_gets(aTHX_ a,b) # define find_in_my_stash(a,b) S_find_in_my_stash(aTHX_ a,b) diff --git a/proto.h b/proto.h index 7adfcdc701e9..f1b5f7ca7df8 100644 --- a/proto.h +++ b/proto.h @@ -9362,8 +9362,8 @@ S_ao(pTHX_ int toketype); # define PERL_ARGS_ASSERT_AO STATIC void -S_check_uni(pTHX); -# define PERL_ARGS_ASSERT_CHECK_UNI +S_check_unary(pTHX); +# define PERL_ARGS_ASSERT_CHECK_UNARY STATIC void S_checkcomma(pTHX_ const char *s, const char *name, const char *what); @@ -9474,7 +9474,7 @@ S_scan_heredoc(pTHX_ char *s) assert(s) STATIC char * -S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, I32 ck_uni); +S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary); # define PERL_ARGS_ASSERT_SCAN_IDENT \ assert(s); assert(dest); assert(dest_end) diff --git a/toke.c b/toke.c index 74fab905be3a..86588ad73e46 100644 --- a/toke.c +++ b/toke.c @@ -10378,8 +10378,10 @@ Perl_scan_word(pTHX_ char *s, char *dest, char * dest_end, int allow_package, ST * specific variable name. */ STATIC char * -S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, I32 chk_unary) +S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary) { + PERL_ARGS_ASSERT_SCAN_IDENT; + I32 herelines = PL_parser->herelines; SSize_t bracket = -1; char funny = *s++; @@ -10388,8 +10390,6 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, I32 chk_unary) bool is_utf8 = cBOOL(UTF); line_t orig_copline = 0, tmp_copline = 0; - PERL_ARGS_ASSERT_SCAN_IDENT; - if (isSPACE(*s) || !*s) s = skipspace(s); if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */ From e5e52a1d48aa6a770f7d4beacf9930228a27d777 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 2 Oct 2025 14:58:14 -0600 Subject: [PATCH 6/9] toke.c: strlen returns size_t, not int --- toke.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toke.c b/toke.c index 86588ad73e46..c3824345c8f0 100644 --- a/toke.c +++ b/toke.c @@ -4616,10 +4616,10 @@ S_intuit_more(pTHX_ char *s, char *e) * identifier already found in the program by that name. If so, * strongly suspect this isn't a character class */ if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { - int len; + Size_t len; char tmpbuf[ C_ARRAY_LENGTH(PL_tokenbuf) * 4 ]; scan_ident(s, tmpbuf, C_ARRAY_END(tmpbuf), FALSE); - len = (int)strlen(tmpbuf); + len = strlen(tmpbuf); if ( len > 1 && gv_fetchpvn_flags(tmpbuf, len, From dfb62284df24a5ddb1b9b5351a478e94123a8023 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 2 Oct 2025 15:36:49 -0600 Subject: [PATCH 7/9] toke.c: Convert for(;;;) to simpler while() isSPACE only matches single-byte characters; no need to be concerned with UTF-8ness --- toke.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/toke.c b/toke.c index c3824345c8f0..21d1ceab005a 100644 --- a/toke.c +++ b/toke.c @@ -701,13 +701,11 @@ S_warn_expect_operator(pTHX_ const char *const what, char *s, I32 pop_oldbufptr) } else if (PL_oldoldbufptr) { /* yyerror (via yywarn) would do this itself, so we should too */ - const char *t; - for (t = PL_oldoldbufptr; - t < PL_bufptr && isSPACE(*t); - t += UTF ? UTF8SKIP(t) : 1) - { - NOOP; + const char *t = PL_oldoldbufptr; + while (t < PL_bufptr && isSPACE(*t)) { + t++; } + /* see if we can identify the cause of the warning */ if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF)) { From 87570a95a77af2031a4d4414548a6f4fbeec8059 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 2 Oct 2025 16:06:49 -0600 Subject: [PATCH 8/9] toke.c: Avoid a loop iteration By using the output of the first macro, and skipping ahead in the parse string, we avoid reparsing the same bytes. --- toke.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/toke.c b/toke.c index 21d1ceab005a..b58817103ebe 100644 --- a/toke.c +++ b/toke.c @@ -707,9 +707,10 @@ S_warn_expect_operator(pTHX_ const char *const what, char *s, I32 pop_oldbufptr) } /* see if we can identify the cause of the warning */ - if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF)) - { - const char *t_start= t; + Size_t advance; + if ((advance = isIDFIRST_lazy_if_safe(t, PL_bufend, UTF))) { + const char *t_start = t; + t += advance; for ( ; (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':'); t += UTF ? UTF8SKIP(t) : 1) From b96b189421d2f37c1b7160b2096214337966a49c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 2 Oct 2025 18:11:57 -0600 Subject: [PATCH 9/9] toke.c: Replace for() with simpler do while() It is a bit simpler --- toke.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/toke.c b/toke.c index b58817103ebe..439a4154ffd6 100644 --- a/toke.c +++ b/toke.c @@ -710,13 +710,14 @@ S_warn_expect_operator(pTHX_ const char *const what, char *s, I32 pop_oldbufptr) Size_t advance; if ((advance = isIDFIRST_lazy_if_safe(t, PL_bufend, UTF))) { const char *t_start = t; - t += advance; - for ( ; - (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':'); - t += UTF ? UTF8SKIP(t) : 1) - { - NOOP; + do { + t += advance; } + while ( (advance = (*t == ':')) + || (advance = isWORDCHAR_lazy_if_safe((U8 *) t, + (U8 *) PL_bufend, + UTF))); + if (t < PL_bufptr && isSPACE(*t)) { has_more = TRUE; sv_catpvf( message,