Skip to content

Commit

Permalink
handy.h: Set macro to false if can't ever be true
Browse files Browse the repository at this point in the history
It's unlikely that perl will be compiled with out the LC_CTYPE locale
category being enabled.  But if it isn't, there is no sense in having
per-interpreter variables for various conditions in it, and no sense
having code that tests those variables.

This commit changes a macro to always yield 'false' when this is
disabled, adds a new similar macro, and changes some occurrences that
test for a variable to use the macros instead of the variables.  That
way the compiler knows these to conditions can never be true.
  • Loading branch information
khwilliamson committed Oct 10, 2022
1 parent 7a615b7 commit f41910b
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 19 deletions.
8 changes: 7 additions & 1 deletion handy.h
Original file line number Diff line number Diff line change
Expand Up @@ -1880,7 +1880,13 @@ END_EXTERN_C
#define toUPPER_LATIN1_MOD(c) ((! FITS_IN_8_BITS(c)) \
? (c) \
: PL_mod_latin1_uc[ (U8) (c) ])
#define IN_UTF8_CTYPE_LOCALE PL_in_utf8_CTYPE_locale
#ifdef USE_LOCALE_CTYPE
# define IN_UTF8_CTYPE_LOCALE PL_in_utf8_CTYPE_locale
# define IN_UTF8_TURKIC_LOCALE PL_in_utf8_turkic_locale
#else
# define IN_UTF8_CTYPE_LOCALE false
# define IN_UTF8_TURKIC_LOCALE false
#endif

/* Use foo_LC_uvchr() instead of these for beyond the Latin1 range */

Expand Down
3 changes: 1 addition & 2 deletions intrpvar.h
Original file line number Diff line number Diff line change
Expand Up @@ -391,10 +391,9 @@ PERLVARI(I, locale_mutex_depth, int, 0) /* Emulate general semaphore */

#ifdef USE_LOCALE_CTYPE
PERLVAR(I, warn_locale, SV *)
#endif

PERLVAR(I, in_utf8_CTYPE_locale, bool)
PERLVAR(I, in_utf8_turkic_locale, bool)
#endif

PERLVARA(I, colors,6, char *) /* values from PERL_RE_COLORS env var */

Expand Down
2 changes: 2 additions & 0 deletions makedef.pl
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,8 @@ sub readvar {
unless ($define{USE_LOCALE_CTYPE}) {
++$skip{$_} foreach qw(
PL_ctype_name
PL_in_utf8_CTYPE_locale
PL_in_utf8_turkic_locale
);
}

Expand Down
4 changes: 2 additions & 2 deletions perl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6918,7 +6918,7 @@ the plain locale pragma without a parameter (S<C<use locale>>) is in effect.
* string, and an end position which it won't try to read past */
# define _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(cp) \
STMT_START { \
if (! PL_in_utf8_CTYPE_locale && ckWARN(WARN_LOCALE)) { \
if (! IN_UTF8_CTYPE_LOCALE && ckWARN(WARN_LOCALE)) { \
Perl_warner(aTHX_ packWARN(WARN_LOCALE), \
"Wide character (U+%" UVXf ") in %s",\
(UV) cp, OP_DESC(PL_op)); \
Expand All @@ -6927,7 +6927,7 @@ the plain locale pragma without a parameter (S<C<use locale>>) is in effect.

# define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(s, send) \
STMT_START { /* Check if to warn before doing the conversion work */\
if (! PL_in_utf8_CTYPE_locale && ckWARN(WARN_LOCALE)) { \
if (! IN_UTF8_CTYPE_LOCALE && ckWARN(WARN_LOCALE)) { \
UV cp = utf8_to_uvchr_buf((U8 *) (s), (U8 *) (send), NULL); \
Perl_warner(aTHX_ packWARN(WARN_LOCALE), \
"Wide character (U+%" UVXf ") in %s", \
Expand Down
16 changes: 8 additions & 8 deletions pp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3846,7 +3846,7 @@ PP(pp_ucfirst)
* call to lowercase above has handled this. But SpecialCasing.txt
* says we are supposed to remove the COMBINING DOT ABOVE. We can
* tell if we have this situation if I ==> i in a turkic locale. */
if ( UNLIKELY(PL_in_utf8_turkic_locale)
if ( UNLIKELY(IN_UTF8_TURKIC_LOCALE)
&& IN_LC_RUNTIME(LC_CTYPE)
&& (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')))
{
Expand Down Expand Up @@ -3890,7 +3890,7 @@ PP(pp_ucfirst)
#ifdef USE_LOCALE_CTYPE

if (IN_LC_RUNTIME(LC_CTYPE)) {
if ( UNLIKELY(PL_in_utf8_turkic_locale)
if ( UNLIKELY(IN_UTF8_TURKIC_LOCALE)
&& ( (op_type == OP_LCFIRST && UNLIKELY(*s == 'I'))
|| (op_type == OP_UCFIRST && UNLIKELY(*s == 'i'))))
{
Expand Down Expand Up @@ -4292,7 +4292,7 @@ PP(pp_uc)

#ifdef USE_LOCALE_CTYPE

&& (LIKELY( ! PL_in_utf8_turkic_locale
&& (LIKELY( ! IN_UTF8_TURKIC_LOCALE
|| ! IN_LC_RUNTIME(LC_CTYPE))
|| *s != 'i')
#endif
Expand Down Expand Up @@ -4398,7 +4398,7 @@ PP(pp_uc)
* its own loop */

#ifdef USE_LOCALE_CTYPE
if ( UNLIKELY(PL_in_utf8_turkic_locale)
if ( UNLIKELY(IN_UTF8_TURKIC_LOCALE)
&& UNLIKELY(IN_LC_RUNTIME(LC_CTYPE)))
{
for (; s < send; s++) {
Expand Down Expand Up @@ -4464,7 +4464,7 @@ PP(pp_lc)
#ifdef USE_LOCALE_CTYPE

&& ( LIKELY(! IN_LC_RUNTIME(LC_CTYPE))
|| LIKELY(! PL_in_utf8_turkic_locale))
|| LIKELY(! IN_UTF8_TURKIC_LOCALE))

#endif

Expand Down Expand Up @@ -4500,7 +4500,7 @@ PP(pp_lc)

/* Lowercasing in a Turkic locale can cause non-UTF-8 to need to become
* UTF-8 for the single case of the character 'I' */
if ( UNLIKELY(PL_in_utf8_turkic_locale)
if ( UNLIKELY(IN_UTF8_TURKIC_LOCALE)
&& ! DO_UTF8(source)
&& (next_I = (U8 *) memchr(s, 'I', len)))
{
Expand Down Expand Up @@ -4557,7 +4557,7 @@ PP(pp_lc)
* and if so, do it. We know that there is a DOT because
* _toLOWER_utf8_flags() wouldn't have returned 'i' unless there
* was one in a proper position. */
if ( UNLIKELY(PL_in_utf8_turkic_locale)
if ( UNLIKELY(IN_UTF8_TURKIC_LOCALE)
&& IN_LC_RUNTIME(LC_CTYPE))
{
if ( UNLIKELY(remove_dot_above)
Expand Down Expand Up @@ -4849,7 +4849,7 @@ PP(pp_fc)
for (; s < send; d++, s++) {
if ( UNLIKELY(*s == MICRO_SIGN)
#ifdef USE_LOCALE_CTYPE
|| ( UNLIKELY(PL_in_utf8_turkic_locale)
|| ( UNLIKELY(IN_UTF8_TURKIC_LOCALE)
&& UNLIKELY(IN_LC_RUNTIME(LC_CTYPE))
&& UNLIKELY(*s == 'I'))
#endif
Expand Down
6 changes: 3 additions & 3 deletions regexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -4623,7 +4623,7 @@ S_setup_EXACTISH_ST(pTHX_ const regnode * const text_node,
if ( (op == EXACTF && utf8_target)
|| (op == EXACTFL && IN_UTF8_CTYPE_LOCALE))
{
if (op == EXACTFL && PL_in_utf8_turkic_locale) {
if (op == EXACTFL && IN_UTF8_TURKIC_LOCALE) {
op = TURKISH;
}
else {
Expand Down Expand Up @@ -10823,7 +10823,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
}
else /* Failing that, hardcode the two tests for a Turkic
locale */
if ( UNLIKELY(PL_in_utf8_turkic_locale)
if ( UNLIKELY(IN_UTF8_TURKIC_LOCALE)
&& isALPHA_FOLD_EQ(*p, 'i'))
{
/* Turkish locales have these hard-coded rules
Expand Down Expand Up @@ -10854,7 +10854,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
/* In a Turkic locale under folding, hard-code the I i case pair
* matches; these wouldn't have the ANYOF_HAS_EXTRA_RUNTIME_MATCHES
* flag set unless [Ii] were match possibilities */
if (UNLIKELY(PL_in_utf8_turkic_locale) && ! match) {
if (UNLIKELY(IN_UTF8_TURKIC_LOCALE) && ! match) {
if (utf8_target) {
if (c == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
if (ANYOF_BITMAP_TEST(n, 'i')) {
Expand Down
6 changes: 3 additions & 3 deletions utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -3167,7 +3167,7 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags)
/* Treat a non-Turkic UTF-8 locale as not being in locale at all,
* except for potentially warning */
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if (IN_UTF8_CTYPE_LOCALE && ! PL_in_utf8_turkic_locale) {
if (IN_UTF8_CTYPE_LOCALE && ! IN_UTF8_TURKIC_LOCALE) {
flags &= ~FOLD_FLAGS_LOCALE;
}
else {
Expand Down Expand Up @@ -3720,7 +3720,7 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
if (flags & (locale_flags)) { \
CHECK_AND_WARN_PROBLEMATIC_LOCALE_; \
if (IN_UTF8_CTYPE_LOCALE) { \
if (UNLIKELY(PL_in_utf8_turkic_locale)) { \
if (UNLIKELY(IN_UTF8_TURKIC_LOCALE)) { \
UV ret = turkic(p, e, ustrp, lenp); \
if (ret) return ret; \
} \
Expand Down Expand Up @@ -4299,7 +4299,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,

if (flags & FOLDEQ_LOCALE) {
if (IN_UTF8_CTYPE_LOCALE) {
if (UNLIKELY(PL_in_utf8_turkic_locale)) {
if (UNLIKELY(IN_UTF8_TURKIC_LOCALE)) {
flags_for_folder |= FOLD_FLAGS_LOCALE;
}
else {
Expand Down

0 comments on commit f41910b

Please sign in to comment.