From 25f56ee4cb4a318d7e23d5665e3ce3045f9be7be Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 22 Feb 2021 19:16:39 -0700 Subject: [PATCH] Avoid mojibake in "$!" In stress testing, I discovered that the LC_CTYPE and LC_MESSAGES locales need to be the same locale, or strerror() can return question marks or mojibake instead of the proper message. This commit refactors the handling of stringifying "$!" to make the locales of both categories the same during the stringification. Actually, I suspect it isn't the locale, but the codeset of the locale that needs to be the same. I suspect that if the categories were both in different UTF-8 locales, or both in single-byte locales, that things would work fine. But it's cheaper to find the locale rather than the locale's codeset, so that is what is done. --- locale.c | 272 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 144 insertions(+), 128 deletions(-) diff --git a/locale.c b/locale.c index 7003e5411385..296562ac507b 100644 --- a/locale.c +++ b/locale.c @@ -6491,73 +6491,122 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) /* Used to shorten the definitions of the following implementations of * my_strerror() */ +#define DEBUG_STRERROR_ENTER(errnum, in_locale) \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: my_strerror called with errnum %d;" \ + " Within locale scope=%d\n", \ + __FILE__, __LINE__, errnum, in_locale)) + #define DEBUG_STRERROR_RETURN(errstr) \ - DEBUG_Lv((PerlIO_printf(Perl_debug_log, \ - "Strerror returned; saving a copy: '"), \ - print_bytes_for_locale(errstr, errstr + strlen(errstr), 0), \ - PerlIO_printf(Perl_debug_log, "'\n"))); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s:%d Strerror returned; saving a copy: '", __FILE__, __LINE__); \ + print_bytes_for_locale(errstr, errstr + strlen(errstr), 0)); + +/* On platforms that have precisely one of these categories (Windows + * qualifies), these yield the correct one */ +#if defined(USE_LOCALE_CTYPE) +# define WHICH_LC_INDEX LC_CTYPE_INDEX_ +#elif defined(USE_LOCALE_MESSAGES) +# define WHICH_LC_INDEX LC_MESSAGES_INDEX_ +#endif /* my_strerror() returns a mortalized copy of the text of the error message - * associated with 'errnum'. It uses the current locale's text unless the - * platform doesn't have the LC_MESSAGES category or we are not being called - * from within the scope of 'use locale'. In the former case, it uses whatever - * strerror returns; in the latter case it uses the text from the C locale. - * - * The function just calls strerror(), but temporarily switches, if needed, to - * the C locale. + * associated with 'errnum'. If not called from within the scope of 'use + * locale', it uses the text from the C locale. If Perl is compiled to + * not pay attention to LC_CTYPE nor LC_MESSAGES, it uses whatever strerror() + * returns. Otherwise the text is derived from the locale, LC_MESSAGES if we + * have that; LC_CTYPE if not. + * + * The function just calls strerror(), but temporarily switches locales, if + * needed. Many platforms require LC_CTYPE and LC_MESSAGES to be in the same + * CODESET in order for the return from strerror() to not contain '?' symbols, + * or worse, mojibaked. It's cheaper to just use the stricter criteria of + * being in the same locale. So the code below uses a common locale for both + * categories. Again, that is C if not within 'use locale' scope; or the + * LC_MESSAGES locale if in scope and we have that category; and LC_CTYPE if we + * don't have LC_MESSAGES; and whatever strerror returns if we don't have + * either category. + * + * There are two sets of implementations. The first below is if we have + * strerror_l(). This is the simpler. We just use the already-built C locale + * object if not in locale scope, or build up a custom one otherwise. * - * There are several implementations, depending on the capabilities of the - * platform. The preprocessing directives obscured the logic; so they are now - * each shown in whole. */ + * When strerror_l() is not available, we may have to swap locales temporarily + * to bring the two categories into sync with each other, and possibly to the C + * locale. + * + * Because the prepropessing directives to conditionally compile this function + * would greatly obscure the logic of the implementation various + * implementations, the whole function is repeated for each configuration, with + * some common macros. */ + +#if defined(USE_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L) +# if ! defined(USE_LOCALE_CTYPE) && ! defined(USE_LOCALE_MESSAGES) + +/* Here, neither category is defined: use the C locale */ +char * +Perl_my_strerror(pTHX_ const int errnum) +{ + char *errstr = savepv(strerror_l(errnum, PL_C_locale_obj)); + + DEBUG_STRERROR_ENTER(errnum, 0); + DEBUG_STRERROR_RETURN(errstr); + + SAVEFREEPV(errstr); + return errstr; +} + +# elif ! defined(USE_LOCALE_CTYPE) || ! defined(USE_LOCALE_MESSAGES) /*--------------------------------------------------------------------------*/ -#ifndef USE_LOCALE_MESSAGES + +/* Here one or the other of CTYPE or MESSAGES is defined, but not both. If we + * are not within 'use locale' scope of the only one defined, we use the C + * locale; otherwise use the current locale object */ char * Perl_my_strerror(pTHX_ const int errnum) { char *errstr; - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "%s: %d: my_strerror called with errnum %d;" - " Within locale scope is immaterial\n", - __FILE__, __LINE__, errnum)); + /* Use C if not within locale scope; Otherwise, use current locale */ + const locale_t which_obj = (IN_LC(categories[WHICH_LC_INDEX])) + ? PL_C_locale_obj + : use_curlocale_scratch(); - errstr = savepv(Strerror(errnum)); + PERL_ARGS_ASSERT_MY_STRERROR; + + DEBUG_STRERROR_ENTER(errnum, IN_LC(categories[WHICH_LC_INDEX])); + errstr = savepv(strerror_l(errnum, which_obj)); DEBUG_STRERROR_RETURN(errstr); SAVEFREEPV(errstr); return errstr; } + /*--------------------------------------------------------------------------*/ -#else -/* The rest of the invocations all share the same beginning, so show that: */ +# else /* Are using both categories. Place them in the same CODESET, + * either C or the LC_MESSAGES locale */ char * Perl_my_strerror(pTHX_ const int errnum) { char *errstr; - const bool within_locale_scope = IN_LC(LC_MESSAGES); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "%s: %d: my_strerror called with errnum %d; Within locale scope=%d\n", - __FILE__, __LINE__, errnum, within_locale_scope)); -/*--------------------------------------------------------------------------*/ -# if ! defined(USE_LOCALE_THREADS) + DEBUG_STRERROR_ENTER(errnum, IN_LC(LC_MESSAGES)); - /* This function is also pretty trivial without threads. */ - if (within_locale_scope) { - errstr = savepv(Strerror(errnum)); + if (! IN_LC(LC_MESSAGES)) { /* Use C if not within locale scope */ + errstr = savepv(strerror_l(errnum, PL_C_locale_obj)); } - else { - const char * save_locale = savepv(querylocale_c(LC_MESSAGES)); + else { /* Otherwise, use the LC_MESSAGES locale, making sure LC_CTYPE + matches */ + locale_t cur = duplocale(use_curlocale_scratch()); - void_setlocale_c(LC_MESSAGES, "C"); - errstr = savepv(Strerror(errnum)); - void_setlocale_c(LC_MESSAGES, save_locale); - Safefree(save_locale); + cur = newlocale(LC_CTYPE_MASK, querylocale_c(LC_MESSAGES), cur); + errstr = savepv(strerror_l(errnum, cur)); + freelocale(cur); } DEBUG_STRERROR_RETURN(errstr); @@ -6565,49 +6614,54 @@ Perl_my_strerror(pTHX_ const int errnum) SAVEFREEPV(errstr); return errstr; } -/*--------------------------------------------------------------------------*/ -# elif defined(USE_POSIX_2008_LOCALE) \ - && defined(HAS_STRERROR_L) \ - && defined(HAS_STRERROR_R) - - /* This function is also trivial if we don't have to worry about thread - * safety and have strerror_l(), as it handles the switch of locales so we - * don't have to deal with that. We don't have to worry about thread - * safety if strerror_r() is also available. Both it and strerror_l() are - * thread-safe. Plain strerror() isn't thread safe. But on threaded - * builds when strerror_r() is available, the apparent call to strerror() - * below is actually a macro that behind-the-scenes calls strerror_r(). */ - - if (within_locale_scope) { +# endif /* Above is using strerror_l */ +/*==========================================================================*/ +#else /* Below is not using strerror_l */ +# if ! defined(USE_LOCALE_CTYPE) && ! defined(USE_LOCALE_MESSAGES) + +/* If not using using either of the categories, return plain, unadorned + * strerror */ + +char * +Perl_my_strerror(pTHX_ const int errnum) +{ + char *errstr; + + DEBUG_STRERROR_ENTER(errnum, 0); + errstr = savepv(Strerror(errnum)); - } - else { - errstr = savepv(strerror_l(errnum, PL_C_locale_obj)); - } DEBUG_STRERROR_RETURN(errstr); SAVEFREEPV(errstr); return errstr; } + /*--------------------------------------------------------------------------*/ -# elif defined(USE_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L) +# elif ! defined(USE_LOCALE_CTYPE) || ! defined(USE_LOCALE_MESSAGES) - /* It's a little more complicated with strerror_l() but strerror_r() is not - * available. We use strerror_l() for everything, constructing a locale to - * pass to it if necessary */ +/* Here one or the other of CTYPE or MESSAGES is defined, but not both. If we + * are not within 'use locale' scope of the only one defined, we use the C + * locale; otherwise use the current locale */ - { - locale_t locale_to_use; +char * +Perl_my_strerror(pTHX_ const int errnum) +{ + char *errstr; - if (within_locale_scope) { - locale_to_use = use_curlocale_scratch(); - } - else { /* Use C locale if not within 'use locale' scope */ - locale_to_use = PL_C_locale_obj; + PERL_ARGS_ASSERT_MY_STRERROR; + + DEBUG_STRERROR_ENTER(errnum, IN_LC(categories[WHICH_LC_INDEX])); + + if (IN_LC(categories[WHICH_LC_INDEX])) { + errstr = savepv(Strerror(errnum)); } + else { + const char * orig_locale = toggle_locale_i(WHICH_LC_INDEX, "C"); + + errstr = savepv(Strerror(errnum)); - errstr = savepv(strerror_l(errnum, locale_to_use)); + restore_toggled_locale_i(WHICH_LC_INDEX, orig_locale); } DEBUG_STRERROR_RETURN(errstr); @@ -6615,81 +6669,43 @@ Perl_my_strerror(pTHX_ const int errnum) SAVEFREEPV(errstr); return errstr; } + /*--------------------------------------------------------------------------*/ # else - /* And most complicated of all is without strerror_l(). We have a critical - * section to prevent another thread from executing this same code at the - * same time. (On thread-safe perls, the LOCK is a no-op.) */ +/* Below, have both LC_CTYPE and LC_MESSAGES. Place them in the same CODESET, + * either C or the LC_MESSAGES locale */ - { - const char * save_locale = NULL; - bool locale_is_C = FALSE; - - /* We have a critical section to prevent another thread from executing this - * same code at the same time. (On thread-safe perls, the LOCK is a - * no-op.) Since this is the only place in core that changes LC_MESSAGES - * (unless the user has called setlocale(), this works to prevent races. */ - - if (! within_locale_scope) { - SETLOCALE_LOCK; - save_locale = querylocale_c(LC_MESSAGES); - if (! save_locale) { - SETLOCALE_UNLOCK; - Perl_croak(aTHX_ - "panic: %s: %d: Could not find current LC_MESSAGES locale," - " errno=%d\n", __FILE__, __LINE__, errno); - } - else { - locale_is_C = isNAME_C_OR_POSIX(save_locale); - - /* Switch to the C locale if not already in it */ - if (! locale_is_C) { - - /* The setlocale() just below likely will zap 'save_locale', so - * create a copy. */ - save_locale = savepv(save_locale); - if (! bool_setlocale_c(LC_MESSAGES, "C")) { - - /* If, for some reason, the locale change failed, we - * soldier on as best as possible under the circumstances, - * using the current locale, and clear save_locale, so we - * don't try to change back. On z/0S, all setlocale() - * calls fail after you've created a thread. This is their - * way of making sure the entire process is always a single - * locale. This means that 'use locale' is always in place - * for messages under these circumstances. */ - Safefree(save_locale); - save_locale = NULL; - } - } - } - } +char * +Perl_my_strerror(pTHX_ const int errnum) +{ + char *errstr; + const char * desired_locale = savepv((IN_LC(LC_MESSAGES)) + ? querylocale_c(LC_MESSAGES) + : "C"); + const char * orig_CTYPE_locale; + const char * orig_MESSAGES_locale; + /* XXX Can fail on z/OS */ + + DEBUG_STRERROR_ENTER(errnum, IN_LC(LC_MESSAGES)); + + orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, desired_locale); + orig_MESSAGES_locale = toggle_locale_c(LC_MESSAGES, desired_locale); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "Any locale change has been done; about to call Strerror\n")); errstr = savepv(Strerror(errnum)); - if (! within_locale_scope) { - if (save_locale && ! locale_is_C) { - if (! bool_setlocale_c(LC_MESSAGES, save_locale)) { - SETLOCALE_UNLOCK; - Perl_croak(aTHX_ - "panic: %s: %d: setlocale restore to '%s' failed, errno=%d\n", - __FILE__, __LINE__, save_locale, errno); - } - Safefree(save_locale); - } - } - } + restore_toggled_locale_c(LC_MESSAGES, orig_MESSAGES_locale); + restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale); DEBUG_STRERROR_RETURN(errstr); + Safefree(desired_locale); SAVEFREEPV(errstr); return errstr; } -# endif +/*--------------------------------------------------------------------------*/ +# endif /* end of not using strerror_l() */ #endif /* end of all the my_strerror() implementations */ /*