From e4072a83eac14631fe4ca5d14905adac58ec6977 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 3 Nov 2023 08:54:48 -0600 Subject: [PATCH] Add ability to emulate thread-safe locale operations Locale information was originally global for an entire process. Later, it was realized that different threads could want to be running in different locales. Windows added this ability, and POSIX 2008 followed suit (though using a completely different API). When available, perl automatically uses these capabilities. But many platforms have neither, or their implementation, such as on Darwin, is buggy. This commit adds the capability for Perl programs to operate as if the platform were thread-safe. This implementation is based on the observation that the underlying locale matters only to relatively few libc calls, and only during their execution. It can be anything at all at any other time. perl keeps what the proper locale should be for each category in a a per-thread array. Each locale-dependent operation must be wrapped in mutex lock/unlock operations. The lock additionally compares what libc knows the locale to be, and what it should be for this thread at this time, and changes the actual locale to the proper value if necessary. That's all that is needed. This commit adds macros to perl.h, for example "MBTOWC_LOCK_", that expand to do the mutex lock, and change the global locale to the expected value. On perls built without this emulation capability, they are no-ops. All code in the perl core (unless I've missed something), are changed to use these macros (there weren't actually many places that needed this). Thus, any pure perl program will automatically become locale-thread-safe under this Configuration. In order for XS code to also become locale-thread-safe, it must use these macros to wrap calls to locale-dependent functions. Relatively few modules call such functions. For example, the only one I found that ships with the perl core is Time::Piece, and it has more fundamental issues with running under threads than this. I am preparing pull requests for it. Thus, this is not completely transparent to code like native-thread-safe locale handling is. Therefore ${^SAFE_LOCALES} returns 2 (instead of 1) for this type of thread-safety. Another deficiency compared to the native thread safety is when a thread calls a non-perl library that accesses the locale. The typical example is Gtk (though this particular application can be configured to not be problematic). With the native safe threads, everything works as long as only one such thread is used per Perl program. That thread would then be the only one operating in the global locale, hence there are no conflicts. With this emulation, all threads are operating in the global locale, and mutexes would have to be used to prevent conflicts. To minimize those, the code added in this commit restores the global locale when through to the state it was in when started. A major concern is the performance impact. This is after all trading speed for accuracy. lib/locale_threads.t is noticeably slower when this is being used. But that is doing multiple threads constantly using locale-dependent operations. I don't notice any change with the rest of the test suite. In pure perl, this only comes into play while in the scope of 'use locale' or when using some of the few POSIX:: functions that are locale-dependent. And to some extent when formatting, but the regular overhead there should dwarf what this adds. This commit leaves this feature off by default. The next commit changes that for the next few 5.39 development releases, so we can see if there is actually an issue. --- embed.fnc | 28 ++- embed.h | 17 +- embedvar.h | 3 + handy.h | 21 +- inline.h | 43 ++++ intrpvar.h | 7 + locale.c | 616 ++++++++++++++++++++++++++++++++++++++++++++---- makedef.pl | 23 +- mg.c | 5 +- perl.c | 9 +- perl.h | 111 ++++++++- pod/perlvar.pod | 9 +- proto.h | 45 +++- sv.c | 10 + 14 files changed, 848 insertions(+), 99 deletions(-) diff --git a/embed.fnc b/embed.fnc index def504177a7a..e36912cc8f67 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3779,6 +3779,17 @@ p |void |dump_sv_child |NN SV *sv CRTip |unsigned int|variant_byte_number \ |PERL_UINTMAX_T word #endif +#if defined(EMULATE_THREAD_SAFE_LOCALES) +Cp |void |category_lock_i|const locale_category_index index \ + |NN const char *file \ + |const line_t caller_line +Cp |void |category_unlock_i \ + |const locale_category_index index \ + |NN const char *file \ + |const line_t caller_line +Cip |int |posix_LC_foo_ |const int c \ + |const U8 classnum +#endif #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) ARdp |I32 |my_chsize |int fd \ |Off_t length @@ -4457,6 +4468,13 @@ RS |char * |my_setlocale_debug_string_i \ |NULLOK const char *retval \ |const line_t line # endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) || \ + ( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) ) +S |void |update_PL_curlocales_i \ + |const locale_category_index index \ + |NN const char *new_locale \ + |const line_t caller_line +# endif # if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) S |const char *|my_langinfo_i \ |const nl_item item \ @@ -4517,14 +4535,8 @@ S |const char *|querylocale_2008_i \ |const locale_category_index index \ |const line_t line S |locale_t|use_curlocale_scratch -# if !defined(USE_QUERYLOCALE) -S |void |update_PL_curlocales_i \ - |const locale_category_index index \ - |NN const char *new_locale \ - |const line_t caller_line -# endif -# elif defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) && \ - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) +# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \ + defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) S |bool |less_dicey_bool_setlocale_r \ |const int cat \ |NN const char *locale diff --git a/embed.h b/embed.h index 8f5e05b257ef..b3fa97a54745 100644 --- a/embed.h +++ b/embed.h @@ -810,6 +810,11 @@ # if !defined(EBCDIC) # define variant_byte_number Perl_variant_byte_number # endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) +# define category_lock_i(a,b,c) Perl_category_lock_i(aTHX_ a,b,c) +# define category_unlock_i(a,b,c) Perl_category_unlock_i(aTHX_ a,b,c) +# define posix_LC_foo_(a,b) Perl_posix_LC_foo_(aTHX_ a,b) +# endif # if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) # define my_chsize(a,b) Perl_my_chsize(aTHX_ a,b) # endif @@ -1313,6 +1318,10 @@ # if defined(DEBUGGING) # define my_setlocale_debug_string_i(a,b,c,d) S_my_setlocale_debug_string_i(aTHX_ a,b,c,d) # endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) || \ + ( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) ) +# define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c) +# endif # if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) # define my_langinfo_i(a,b,c,d,e,f) S_my_langinfo_i(aTHX_ a,b,c,d,e,f) # else @@ -1343,12 +1352,8 @@ # define bool_setlocale_2008_i(a,b,c) S_bool_setlocale_2008_i(aTHX_ a,b,c) # define querylocale_2008_i(a,b) S_querylocale_2008_i(aTHX_ a,b) # define use_curlocale_scratch() S_use_curlocale_scratch(aTHX) -# if !defined(USE_QUERYLOCALE) -# define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c) -# endif -# elif defined(USE_LOCALE_THREADS) && \ - !defined(USE_THREAD_SAFE_LOCALE) && \ - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) +# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \ + defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) # define less_dicey_bool_setlocale_r(a,b) S_less_dicey_bool_setlocale_r(aTHX_ a,b) # define less_dicey_setlocale_r(a,b) S_less_dicey_setlocale_r(aTHX_ a,b) # endif diff --git a/embedvar.h b/embedvar.h index 3b8bff0a6bb1..849a5ccfbafd 100644 --- a/embedvar.h +++ b/embedvar.h @@ -224,6 +224,7 @@ # define PL_parser (vTHX->Iparser) # define PL_patchlevel (vTHX->Ipatchlevel) # define PL_peepp (vTHX->Ipeepp) +# define PL_perl_controls_locale (vTHX->Iperl_controls_locale) # define PL_perl_destruct_level (vTHX->Iperl_destruct_level) # define PL_perldb (vTHX->Iperldb) # define PL_perlio (vTHX->Iperlio) @@ -251,6 +252,8 @@ # define PL_replgv (vTHX->Ireplgv) # define PL_restartjmpenv (vTHX->Irestartjmpenv) # define PL_restartop (vTHX->Irestartop) +# define PL_restore_locale (vTHX->Irestore_locale) +# define PL_restore_locale_depth (vTHX->Irestore_locale_depth) # define PL_rpeepp (vTHX->Irpeepp) # define PL_rs (vTHX->Irs) # define PL_runops (vTHX->Irunops) diff --git a/handy.h b/handy.h index f2837a3bf45a..e4e461dbacf4 100644 --- a/handy.h +++ b/handy.h @@ -1555,9 +1555,16 @@ or casts # define HIGHEST_REGCOMP_DOT_H_SYNC_ CC_VERTSPACE_ -/* The members of the third group below do not need to be coordinated with data - * structures in regcomp.[ch] and regexec.c. */ -# define CC_IDFIRST_ 16 +/* These three follow immediately after the final function that has a version + * defined by C, like isascii(), so they overlap with anything else. They are + * used in the 'PL_libc_char_fcns' data structure, along with the ones above + * them */ +# define CC_IDFIRST_ 16 +# define CC_TOLOWER_ (CC_IDFIRST_ + 1) +# define CC_TOUPPER_ (CC_TOLOWER_ + 1) + +/* The members of the fourth group below do not need to be coordinated with + * data structures in regcomp.[ch] and regexec.c. */ # define CC_CHARNAME_CONT_ 17 # define CC_NONLATIN1_FOLD_ 18 # define CC_NONLATIN1_SIMPLE_FOLD_ 19 @@ -2025,7 +2032,7 @@ END_EXTERN_C # define is_posix_XDIGIT(c) isxdigit((U8) (c)) #endif -/* Below is the next level up, which currently expands to nothing more +/* Below is the next level up, which on most platforms expands to nothing more * than the previous layer. These are the macros to use if you really need * something whose input domain is a byte, and the locale isn't UTF-8; that is, * where you normally would have to use things like bare isalnum(). @@ -2037,7 +2044,13 @@ END_EXTERN_C * (Note, proper general operation of the bare libc functions requires you to * cast to U8. These do that for you automatically.) */ +/* In this one circumstance, the macro is implemented with a lock; otherwise it + * expands to just the layer below */ +#ifdef EMULATE_THREAD_SAFE_LOCALES +# define WRAP_U8_LC_(c, classnum, posix) posix_LC_foo_((c), (classnum)) +#else # define WRAP_U8_LC_(c, classnum, posix) posix(c) +#endif #define isU8_ALPHANUMERIC_LC(c) \ WRAP_U8_LC_((c), CC_ALPHANUMERIC_, is_posix_ALPHANUMERIC) diff --git a/inline.h b/inline.h index 59d16645b235..619020190b0b 100644 --- a/inline.h +++ b/inline.h @@ -318,6 +318,49 @@ S_PerlEnv_putenv(pTHX_ char * str) #endif +/* ------------------------------- handy.h ------------------------------- */ + +#ifdef EMULATE_THREAD_SAFE_LOCALES + +PERL_STATIC_INLINE int +Perl_posix_LC_foo_(pTHX_ const int c, const U8 classnum) { + int result; + + LC_CTYPE_LOCK; + + /* All calls to this (so far) are with a 'classnum' known at compile time, + * so the compiler should constant fold this down to a single assignment */ + switch (classnum) { + case CC_ALPHANUMERIC_:result = (bool) is_posix_ALPHANUMERIC(c); break; + case CC_ALPHA_: result = (bool) is_posix_ALPHA(c); break; + case CC_ASCII_: result = (bool) is_posix_ASCII(c); break; + case CC_BLANK_: result = (bool) is_posix_BLANK(c); break; + case CC_CASED_: result = (bool) is_posix_CASED(c); break; + case CC_CNTRL_: result = (bool) is_posix_CNTRL(c); break; + case CC_DIGIT_: result = (bool) is_posix_DIGIT(c); break; + case CC_GRAPH_: result = (bool) is_posix_GRAPH(c); break; + case CC_LOWER_: result = (bool) is_posix_LOWER(c); break; + case CC_PRINT_: result = (bool) is_posix_PRINT(c); break; + case CC_PUNCT_: result = (bool) is_posix_PUNCT(c); break; + case CC_SPACE_: result = (bool) is_posix_SPACE(c); break; + case CC_UPPER_: result = (bool) is_posix_UPPER(c); break; + case CC_WORDCHAR_: result = (bool) is_posix_WORDCHAR(c); break; + case CC_XDIGIT_: result = (bool) is_posix_XDIGIT(c); break; + case CC_IDFIRST_: result = (bool) is_posix_IDFIRST(c); break; + case CC_TOLOWER_: result = to_posix_LOWER(c); break; + case CC_TOUPPER_: result = to_posix_UPPER(c); break; + + default: + LC_CTYPE_UNLOCK; + locale_panic_(Perl_form(aTHX_ "Unknown charclass %d", classnum)); + } + + LC_CTYPE_UNLOCK; + return result; +} + +#endif + /* ------------------------------- mg.h ------------------------------- */ #if defined(PERL_CORE) || defined(PERL_EXT) diff --git a/intrpvar.h b/intrpvar.h index 4d5786aad958..0e6f5118643d 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -757,7 +757,14 @@ PERLVARI(I, cur_locale_obj, locale_t, LC_GLOBAL_LOCALE) * is almost always toggled into the C locale, and the locale it nominally is * is stored as PL_numeric_name. */ PERLVARA(I, curlocales, LOCALE_CATEGORIES_COUNT_ + 1, const char *) +#endif +#ifdef EMULATE_THREAD_SAFE_LOCALES +PERLVARA(I, restore_locale, LOCALE_CATEGORIES_COUNT_, const char *) +PERLVARA(I, restore_locale_depth, LOCALE_CATEGORIES_COUNT_, Size_t) +#endif +#if defined(USE_LOCALE) && (defined(WIN32) || ! defined(USE_THREAD_SAFE_LOCALE)) +PERLVARI(I, perl_controls_locale, bool, true) #endif #ifdef USE_PL_CUR_LC_ALL PERLVARI(I, cur_LC_ALL, const char *, NULL) diff --git a/locale.c b/locale.c index ad3b3c1b033c..21a0cc930e09 100644 --- a/locale.c +++ b/locale.c @@ -42,13 +42,6 @@ * platform than it actually is. This allows you to make changes and catch * some errors without having access to those other platforms. * - * This code now has multi-thread-safe locale handling on systems that support - * that. This is completely transparent to most XS code. On earlier systems, - * it would be possible to emulate thread-safe locales, but this likely would - * involve a lot of locale switching, and would require XS code changes. - * Macros could be written so that the code wouldn't have to know which type of - * system is being used. - * * Table-driven code is used for simplicity and clarity, as many operations * differ only in which category is being worked on. However the system * categories need not be small contiguous integers, so do not lend themselves @@ -69,6 +62,16 @@ * bool_setlocale_2008_i() function is used to hide the different API from the * outside. This makes it completely transparent to most XS code. * + * On other threaded-systems, the code here, in conjunction with other code in + * the system, emulates thread-safe locales by using mutexes to lock other + * threads out, and change the global locale to the desired per-thread value + * just before operations that care about it. All such operations must declare + * their need before executing, or it won't work. All of the Perl core does + * this, which makes pure Perl code locale thread-safe. XS code can be + * extended to work by using the macros for the purpose in perl.h. The need + * for mutexes means that in these platforms, much of the code in this file + * must be done while in critical sections. + * * A huge complicating factor is that the LC_NUMERIC category is normally held * in the C locale, except during those relatively rare times when it needs to * be in the underlying locale. There is a bunch of code to accomplish this, @@ -79,7 +82,7 @@ * opportunities for avoiding work. We don't have to necessarily create a safe * copy to return if no return is desired. * - * There are 3.5 major implementations here; which one chosen depends on what + * There are 4.5 major implementations here; which one chosen depends on what * the platform has available, and Configuration options. * * 1) Raw posix_setlocale(). This implementation is basically the libc @@ -102,7 +105,9 @@ * * 2) An implementation that adds a minimal layer above implementation 1), * making that implementation uninterruptible and returning a - * per-thread/per-category value. + * per-thread/per-category value. Currently, this is for threaded perls + * on platforms where layers 3a and 3b are not available, and where layer 4 + * has not been selected. * * 3a and 3b) An implementation of POSIX 2008 thread-safe locale handling, * hiding from the programmer the completely different API for this. @@ -119,7 +124,32 @@ * are buggy, in one way or another. There are workarounds encoded here, * where feasible, for platforms where the bugs are amenable to that * (glibc, for example). But other platforms instead don't use this - * implementation. + * implementation, but the next one below. + * + * 4) A thread-safe emulation implementation that, in conjunction with changes + * to C code, makes locale handling thread-safe. Those changes are simply + * to wrap locale-dependent system calls with macros that delimit a critical + * section in which they change the global locale to the one the thread + * expects. The perl core has made those changes, so pure perl programs + * become thread-safe. Well-behaved XS code also keeps things thread-safe, + * either by not using locale-dependent system calls, or by changing to use + * the wrapper macros. This layer is not chosen if the platform has native + * thread-safe locale handling. Also, currently perl must have been + * Configured with "-Accflags=-DEMULATE_THREAD_SAFE_LOCALES". + * + * This implementation is based on the observation that the underlying + * locale matters only to relatively few libc calls, and only during their + * execution. It can be anything at all at any other time. What the proper + * locale should be for each category is kept in the array PL_curlocales[]. + * Each locale-dependent operation must be wrapped in mutex lock/unlock + * operations. The lock additionally compares what libc knows the locale to + * be, and what it should be for this thread at this time, and changes the + * actual locale to the proper value if necessary. That's all that is + * needed. However additionally, the unlock restores the locale to what it + * was at the time of the lock. This improves the chances that a thread not + * under perl's control (such as Gtk) will still work. (If mutex calls are + * added to lock out that thread from running when the other threads are + * using locale-dependent functions, then things should completely work.) * * z/OS (os390) is an outlier. Locales really don't work under threads when * either the radix character isn't a dot, or attempts are made to change @@ -227,10 +257,11 @@ * crippled locale implementation. * * -Accflags=-DNO_THREAD_SAFE_LOCALE - * Even if thread-safe operations are available on this platform and - * would otherwise be used (because this is a perl with multiplicity), - * perl is compiled to not use them. This could be useful on - * platforms where the libc is buggy. + * Don't use the thread-safe operations on this platform (should they + * be available) nor try to emulate them (if they are not available) + * even on a perl with multiplicity. This could be useful on + * platforms where the libc is buggy, or the emulation runs into + * problems. * * -Accflags=-DNO_POSIX_2008_LOCALE * Even if the libc locale operations specified by the Posix 2008 @@ -271,6 +302,13 @@ * these have no effect. Otherwise they cause perl to be compiled to * always keep the named category(ies) in the C locale. * + * -Accflags=-DEMULATE_THREAD_SAFE_LOCALES + * This has no effect on unthreaded perls, nor when perl thinks that + * the platform has thread-safe locale handling. But otherwise, it + * enables the code to emulate thread-safe locale handling. + * Effectively it chooses implementation 4) instead of implementation + * 2) from the list above. + * * -Accflags=-DHAS_BROKEN_SETLOCALE_QUERY_LC_ALL * This would be set in a hints file to tell perl that doing a libc * setlocale(LC_ALL, NULL) @@ -1896,7 +1934,8 @@ S_setlocale_i(pTHX_ const int category, const char * locale) /*===========================================================================*/ #elif defined(USE_LOCALE_THREADS) \ - && ! defined(USE_THREAD_SAFE_LOCALE) + && ! defined(USE_THREAD_SAFE_LOCALE) \ + && ! defined(EMULATE_THREAD_SAFE_LOCALES) /* Here, there are threads, and there is no support for thread-safe * operation. This is a dangerous situation, which perl is documented as @@ -1980,6 +2019,306 @@ S_less_dicey_bool_setlocale_r(pTHX_ const int cat, const char * locale) * some shortcuts */ # define setlocale_i(i, locale) less_dicey_setlocale_r(categories[i], locale) +/*===========================================================================*/ +#elif defined(EMULATE_THREAD_SAFE_LOCALES) + +/* Here, use our emulation of thread safe locales. PL_curlocales[] keeps what + * the name of the locale should be for each category in the current thread. + * And so, S_bool_setlocale_emulate_safe_i() wraps each call to the system's + * setlocale() with saving the return into PL_curlocales. + * + * The locale is changed to the one specified by PL_curlocales[] just before + * any libc call affected by it, and restored just afterwards. */ + +# define querylocale_i(i) S_querylocale_emulate_safe_i(aTHX_ i, __LINE__) +# define querylocale_c(cat) querylocale_i(cat##_INDEX_) +# define querylocale_r(cat) querylocale_i(get_category_index(cat)) + +STATIC const char * +S_querylocale_emulate_safe_i(pTHX_ const unsigned int cat_index, + const line_t caller_line) +{ + assert(cat_index <= LOCALE_CATEGORIES_COUNT_ + 1); + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + " querylocale_emulate_safe_i(%u: %s);" + " called from %" LINE_Tf "\n", + cat_index, category_names[cat_index], caller_line)); + +# ifdef LC_ALL + + /* It can be somewhat expensive to calculate LC_ALL from its constituent + * categories, and the value might change many times before it is actually + * used. Therefore, it is only done as needed. This is such a place */ + if ( cat_index == LC_ALL_INDEX_ + && PL_curlocales[LC_ALL_INDEX_] == NULL) + { + /* Call just for its side effect */ + (void) calculate_LC_ALL_string(PL_curlocales, INTERNAL_FORMAT, + WANT_TEMP_PV, + caller_line); + } + +# endif + + return mortalized_pv_copy(PL_curlocales[cat_index]); +} + +/*---------------------------------------------------------------------------*/ + +# define bool_setlocale_r(cat, locale) \ + S_bool_setlocale_emulate_safe_r(aTHX_ cat, locale, __LINE__) +# define bool_setlocale_i(i, locale) \ + bool_setlocale_r(categories[i], locale) +# define bool_setlocale_c(cat, locale) bool_setlocale_r(cat, locale) + +STATIC bool +S_bool_setlocale_emulate_safe_r(pTHX_ + const int category, + const char * wanted_locale, + const line_t caller_line) +{ + /* Set the locale to 'wanted_locale' for the category given by our internal + * index number, and save the result for later use. */ + + assert(wanted_locale); + + STDIZED_SETLOCALE_LOCK; + const char * new_locale = savepv(stdized_setlocale(category, + wanted_locale)); + STDIZED_SETLOCALE_UNLOCK; + + if (! new_locale) { + SET_EINVAL; + return false; + } + + update_PL_curlocales_i(get_category_index(category), + new_locale, caller_line); + Safefree(new_locale); + return true; +} +/*---------------------------------------------------------------------------*/ + +# define void_setlocale_r_with_caller(cat, locale, file, line) \ + STMT_START { \ + if (! bool_setlocale_r(cat, locale)) \ + setlocale_failure_panic_via_i(get_category_index(cat), \ + NULL, locale, __LINE__, 0, \ + file, line); \ + } STMT_END + +# define void_setlocale_c_with_caller(cat, locale, file, line) \ + void_setlocale_r_with_caller(cat, locale, file, line) + +# define void_setlocale_i_with_caller(i, locale, file, line) \ + void_setlocale_r_with_caller(categories[i], locale, file, line) + +# define void_setlocale_r(cat, locale) \ + void_setlocale_r_with_caller(cat, locale, __FILE__, __LINE__) +# define void_setlocale_c(cat, locale) void_setlocale_r(cat, locale) +# define void_setlocale_i(i, locale) void_setlocale_r(categories[i], locale) + +/*---------------------------------------------------------------------------*/ +/* utility functions for emulating thread-safe locales. + * + * When emulating thread-safe locales, our per-thread data structures get set + * up as normal, but the actual locale is global to all threads. All functions + * that depend on the locale need to be protected by critical sections + * surrounded by these two functions that lock, and then unlock after the + * operation is completed. The first function does a lock and then changes the + * locale to the desired one for this thread, based on the per-thread data + * structures. The restore function restores to what the locale on original + * entry was, and unlocks. This is effectively a just-in-time locale setting + * scheme. + * + * In order to accommodate the need for more than one category being used by a + * function, this implements a stack. The lock is called for each needed + * category. Since the locks are general semaphores, only the first call + * results in an actual lock. Each call thus changes the locale for its + * category to the desired one, pushing onto the stack what it should be + * restored to afterwards. The paired unlock calls unwind the stack until the + * final one causes the mutex to be released. Most libc call require one or + * two categories. + * + * One could argue that there is no reason to restore afterwards, that the next + * just-in-time call will set the locale to the correct one. But doing the + * restore allows this scheme to work like truly thread-safe implementations + * when one thread is in the global locale. By restoring, we leave code not + * under this scheme to have the global thread for itself. There is a big + * caveat here, though. That thread must run in a critical section. This + * isn't the case with the other thread-safe implementations. */ + +void +Perl_category_lock_i(pTHX_ const locale_category_index cat_index, + const char * file, + const line_t caller_line) +{ + dSAVE_ERRNO; + +# ifndef DEBUGGING + PERL_UNUSED_ARG(file); +# endif + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Entering category_lock_i %s;" + " called from %s: %d\n", + category_names[cat_index], file, caller_line)); + + PERL_ARGS_ASSERT_CATEGORY_LOCK_I; + assert(cat_index < LC_ALL_INDEX_); + + LOCALE_LOCK; + + if (LIKELY(PL_perl_controls_locale)) { + + /* What locale we're supposed to be in */ + +# ifdef USE_LOCALE_NUMERIC + + /* For all categories except LC_NUMERIC, PL_curlocales[] contains the + * correct value to set to. LC_NUMERIC is more complicated. If it is + * known that we should be in the C locale, use C. If we are in the + * underlying locale, use that. Otherwise use the saved value */ + const char * wanted = (cat_index != LC_NUMERIC_INDEX_) + ? PL_curlocales[cat_index] + : ((PL_numeric_standard) + ? "C" + : ((NOT_IN_NUMERIC_UNDERLYING_) + ? PL_curlocales[cat_index] + : PL_numeric_name)); +# else + const char * wanted = PL_curlocales[cat_index]; +# endif + assert(wanted); + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s: wanted =%s\n", + category_names[cat_index], wanted)); + + /* Get the category desired, and what its current locale is */ + const int cat = categories[cat_index]; + const char * currently = stdized_setlocale(cat, NULL); + + /* If we aren't in the desired locale, change to it, saving a copy of + * the one we actually are in before the change */ + if (strNE(currently, wanted)) { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: Calling setlocale(%d, %s)\n", file, caller_line, + cat, wanted)); + if (stdized_setlocale(cat, wanted) == NULL) { + setlocale_failure_panic_i(cat_index, currently, wanted, + __LINE__, caller_line); + NOT_REACHED; /* NOTREACHED */ + } + } + else { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s: %d: Category %d already was %s\n", + file, caller_line, cat, wanted)); + } + + /* Here, we have toggled to the desired locale, so 'currently' eq + * 'wanted' + * + * This may be a recursive call. Everything remains locked during the + * recursion. We restore to the original locale after the recursion + * gets unwound. The intermediate values aren't needed. */ + if (PL_restore_locale_depth[cat_index] == 0) { + + /* Only need to change what's there if no current value or differs + * from the new one */ + if ( PL_restore_locale[cat_index] == NULL + || strNE(wanted, PL_restore_locale[cat_index])) + { + Safefree(PL_restore_locale[cat_index]); + PL_restore_locale[cat_index] = savepv(wanted); + } + } + + /* Indicate our new recursion depth */ + PL_restore_locale_depth[cat_index]++; + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: PL_restore is now %s," + " recursion depth=%zu\n", + file, caller_line, PL_restore_locale[cat_index], + PL_restore_locale_depth[cat_index])); + } + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Leaving category_lock_i: %s\n", + category_names[cat_index])); + + RESTORE_ERRNO; +} + +void +Perl_category_unlock_i(pTHX_ const locale_category_index cat_index, + const char * file, + const line_t caller_line) +{ + PERL_ARGS_ASSERT_CATEGORY_UNLOCK_I; + assert(cat_index < LC_ALL_INDEX_); + dSAVE_ERRNO; + + /* Undoes a matching category_lock(). Note that must be locked on input. + * Will unlock when recursion entirely gets unwound */ + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Entering category_unlock_i %s;" + " called from %s: %d\n", + category_names[cat_index], file, caller_line)); + + if (LIKELY(PL_perl_controls_locale)) { + const int cat = categories[cat_index]; + + /* Un-recursing */ + PL_restore_locale_depth[cat_index]--; + + /* Only restore when the depth gets back to 0 */ + if (PL_restore_locale_depth[cat_index] == 0) { + + /* What we currently are */ + const char * currently = stdized_setlocale(cat, NULL); + + /* And what we need to be changed to */ + const char * wanted = PL_restore_locale[cat_index]; + + /* If we need to change, do it */ + if (strNE(currently, wanted)) { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: Calling setlocale(%d, %s)\n", + file, caller_line, cat, wanted)); + if (stdized_setlocale(cat, wanted) == NULL) { + setlocale_failure_panic_via_i(cat_index, + currently, + wanted, + __LINE__, 0, + file, caller_line); + } + } + else { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s: %d: Category %d already was %s\n", + file, caller_line, cat, wanted)); + } + + Safefree(wanted); + PL_restore_locale[cat_index] = NULL; + } + } + + /* Doesn't actually unlock until recursion fully unwound */ + LOCALE_UNLOCK; + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Leaving category_unlock_i: %s\n", + category_names[cat_index])); + + RESTORE_ERRNO; +} + /*===========================================================================*/ #elif defined(USE_POSIX_2008_LOCALE) @@ -2678,27 +3017,49 @@ S_update_PL_curlocales_i(pTHX_ const char * new_locale, const line_t caller_line) { - /* Update PL_curlocales[], which is parallel to the other ones indexed by - * our mapping of libc category number to our internal equivalents. */ - PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I; assert(index <= LC_ALL_INDEX_); + /* There are two implementations that use PL_curlocales[], an array + * parallel to the other ones indexed by our mapping of libc category + * number to our internal equivalents. + * + * This function updates the 'index'th element to be 'new_locale'. It + * knows about the requirements of each implementation. In the POSIX 2008 + * case, everything is already calculated, so never does an element have to + * be checked for needing to stay in the "C" locale. In the thread-safe + * emulation case, no checking has yet been done, so this routine needs to + * do it */ +# ifndef LC_ALL + PERL_UNUSED_ARG(caller_line); +# else + if (index == LC_ALL_INDEX_) { /* For LC_ALL, we change all individual categories to correspond, * including the LC_ALL element */ for (unsigned int i = 0; i <= LC_ALL_INDEX_; i++) { Safefree(PL_curlocales[i]); - PL_curlocales[i] = NULL; } + /* In the POSIX 2008 case, everything is already calculated, so never + * does an element have to be checked for needing to stay in the "C" + * locale. In the thread-safe emulation case, no checking has yet been + * done, so this routine needs to do it */ + +# if ! defined(HAS_IGNORED_LOCALE_CATEGORIES_) || defined(USE_POSIX_2008) + + const parse_LC_ALL_STRING_action action = no_override; + +# else + + const parse_LC_ALL_STRING_action action = override_if_ignored; + +# endif + switch (parse_LC_ALL_string(new_locale, (const char **) &PL_curlocales, - check_that_overridden, /* things should - have already - been overridden - */ + action, true, /* Always fill array */ true, /* Panic if fails, as to get here it earlier had to have succeeded @@ -2718,7 +3079,11 @@ S_update_PL_curlocales_i(pTHX_ PL_curlocales[LC_ALL_INDEX_] = savepv(new_locale); } } - else { /* Not LC_ALL */ + else + +# endif + + { /* Not LC_ALL */ /* Update the single category's record */ Safefree(PL_curlocales[index]); @@ -3421,7 +3786,12 @@ S_new_numeric(pTHX_ const char *newnum, bool force) /* If not forcing this procedure, and there isn't actually a change from * our records, do nothing. (Our records can be wrong when sync'ing to the * locale set up by an external library, hence the 'force' parameter) */ - if (! force && strEQ(PL_numeric_name, newnum)) { + if ( (! force && strEQ(PL_numeric_name, newnum)) + +# if defined(USE_LOCALE) && ! defined(USE_THREAD_SAFE_LOCALE) + || ! PL_perl_controls_locale +# endif + ) { return; } @@ -3647,6 +4017,8 @@ S_new_ctype(pTHX_ const char *newctype, bool force) * Turkic. Make sure these two are the only anomalies. (We don't * require towupper and towlower because they aren't in C89.) */ + LC_CTYPE_LOCK; + # if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER) if (towupper('i') == 0x130 && towlower('I') == 0x131) @@ -3662,6 +4034,8 @@ S_new_ctype(pTHX_ const char *newctype, bool force) check_for_problems = TRUE; maybe_utf8_turkic = TRUE; } + + LC_CTYPE_UNLOCK; } else { /* Not a canned locale we know the values for. Compute them */ @@ -3766,7 +4140,9 @@ S_new_ctype(pTHX_ const char *newctype, bool force) * locale requires more than one byte, there are going to be BIG problems. * */ + LC_CTYPE_LOCK; const int mb_cur_max = MB_CUR_MAX; + LC_CTYPE_UNLOCK; if (mb_cur_max > 1 && ! PL_in_utf8_CTYPE_locale @@ -5281,6 +5657,7 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv, /* Some platforms require LC_CTYPE to be congruent with the category we are * looking for */ const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale); + LC_CTYPE_LOCK; # endif # ifdef USE_LOCALE_NUMERIC @@ -5330,6 +5707,7 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv, /* Finally ready to do the actual localeconv(). Lock to prevent other * accesses until we have made a copy of its returned static buffer */ + LC_MONETARY_LOCK; gwLOCALE_LOCK; # if defined(TS_W32_BROKEN_LOCALECONV) && defined(USE_THREAD_SAFE_LOCALE) @@ -5470,6 +5848,7 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv, gwLOCALE_UNLOCK; /* Finished with the critical section of a globally-accessible buffer */ + LC_MONETARY_UNLOCK; # if defined(USE_LOCALE_MONETARY) && defined(WIN32) @@ -5487,6 +5866,7 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv, # ifdef USE_LOCALE_CTYPE restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale); + LC_CTYPE_UNLOCK; # endif @@ -5881,6 +6261,33 @@ S_my_langinfo_i(pTHX_ /*--------------------------------------------------------------------------*/ # if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */ + + /* The only difference between the normal and emulation is the type of + * locks. We have to always lock because this nl_langinfo() isn't thread + * safe */ +# ifndef EMULATE_THREAD_SAFE_LOCALES +# define NL_LANGINFO_LOCK(cat_index) gwLOCALE_LOCK +# define NL_LANGINFO_UNLOCK(cat_index) gwLOCALE_UNLOCK +# elif defined(USE_LOCALE_CTYPE) +# define NL_LANGINFO_LOCK(cat_index) \ + STMT_START { \ + LC_CATEGORY_LOCK_c_(LC_CTYPE); \ + if (cat_index != LC_CTYPE_INDEX_) { \ + LC_CATEGORY_LOCK_i_(cat_index); \ + } \ + } STMT_END + +# define NL_LANGINFO_UNLOCK(cat_index) \ + STMT_START { \ + if (cat_index != LC_CTYPE_INDEX_) { \ + LC_CATEGORY_UNLOCK_i_(cat_index); \ + } \ + LC_CATEGORY_UNLOCK_c_(LC_CTYPE); \ + } STMT_END +# else +# define NL_LANGINFO_LOCK(cat_index) LC_CATEGORY_LOCK_i_(cat_index) +# define NL_LANGINFO_UNLOCK(cat_index) LC_CATEGORY_UNLOCK_i_(cat_index) +# endif # ifdef USE_LOCALE_CTYPE /* This function sorts out if things actually have to be switched or not, @@ -5891,9 +6298,9 @@ S_my_langinfo_i(pTHX_ const char * orig_switched_locale = toggle_locale_i(cat_index, locale); - gwLOCALE_LOCK; + NL_LANGINFO_LOCK(cat_index); retval = save_to_buffer(nl_langinfo(item), retbufp, retbuf_sizep); - gwLOCALE_UNLOCK; + NL_LANGINFO_UNLOCK(cat_index); if (utf8ness) { *utf8ness = get_locale_string_utf8ness_i(retval, @@ -5955,14 +6362,19 @@ S_my_langinfo_i(pTHX_ Newx(floatbuf, initial_size, char); /* 1.5 is exactly representable on binary computers */ + LC_NUMERIC_LOCK(0); Size_t needed_size = snprintf(floatbuf, initial_size, "%.1f", 1.5); + LC_NUMERIC_UNLOCK; /* If our guess wasn't big enough, increase and try again, based on * the real number that snprintf() is supposed to return */ if (UNLIKELY(needed_size >= initial_size)) { needed_size++; /* insurance */ Renew(floatbuf, needed_size, char); - Size_t new_needed = snprintf(floatbuf, needed_size, "%.1f", 1.5); + LC_NUMERIC_LOCK(0); + Size_t new_needed = snprintf(floatbuf, needed_size, "%.1f", + 1.5); + LC_NUMERIC_UNLOCK; assert(new_needed <= needed_size); needed_size = new_needed; } @@ -6284,6 +6696,8 @@ S_my_langinfo_i(pTHX_ * is documented and has been stable for many releases */ UINT ___lc_codepage_func(void); + LC_CTYPE_LOCK; + # ifndef WIN32_USE_FAKE_OLD_MINGW_LOCALES retval = save_to_buffer(Perl_form(aTHX_ "%d", ___lc_codepage_func()), @@ -6294,6 +6708,8 @@ S_my_langinfo_i(pTHX_ retbufp, retbuf_sizep); # endif + LC_CTYPE_UNLOCK; + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "locale='%s' cp=%s\n", locale, retval)); break; @@ -6990,7 +7406,8 @@ S_give_perl_locale_control(pTHX_ # endif # if ! defined(USE_THREAD_SAFE_LOCALE) \ - && ! defined(USE_POSIX_2008_LOCALE) + && ! defined(USE_POSIX_2008_LOCALE) \ + && ! defined(EMULATE_THREAD_SAFE_LOCALES) # if defined(LC_ALL) PERL_UNUSED_ARG(lc_all_string); # else @@ -7011,6 +7428,13 @@ S_give_perl_locale_control(pTHX_ } # endif +# endif +# if defined(USE_LOCALE) && ! defined(USE_THREAD_SAFE_LOCALE) + + /* This routine converts Perl to controlling the locale, and we need to + * tell this before calling new_LC_ALL() */ + PL_perl_controls_locale = true; + # endif /* Finally, update our remaining records. 'true' => force recalculation. @@ -7240,6 +7664,11 @@ Perl_init_i18nl10n(pTHX_ int printwarn) PL_cur_LC_ALL = savepv("C"); # endif +# ifdef EMULATE_THREAD_SAFE_LOCALES + for (unsigned int i = 0; i < LC_ALL_INDEX_; i++) { + PL_restore_locale[i] = NULL; + } +# endif # if ! defined(PERL_LC_ALL_USES_NAME_VALUE_PAIRS) && defined(LC_ALL) LOCALE_LOCK; @@ -8275,12 +8704,16 @@ Perl_mem_collxfrm_(pTHX_ const char *input_string, /* Then the transformation of the input. We loop until successful, or we * give up */ for (;;) { + LC_CTYPE_LOCK; + LC_COLLATE_LOCK; errno = 0; *xlen = my_strxfrm(xbuf + COLLXFRM_HDR_LEN, s, xAlloc - COLLXFRM_HDR_LEN); + LC_COLLATE_UNLOCK; + LC_CTYPE_UNLOCK; /* If the transformed string occupies less space than we told strxfrm() * was available, it means it transformed the whole string. */ @@ -8702,13 +9135,20 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) * qualifies), these yield the correct one */ #if defined(USE_LOCALE_CTYPE) # define WHICH_LC_INDEX LC_CTYPE_INDEX_ +# define WHICH_LOCK LC_CTYPE_LOCK +# define WHICH_UNLOCK LC_CTYPE_UNLOCK #elif defined(USE_LOCALE_MESSAGES) # define WHICH_LC_INDEX LC_MESSAGES_INDEX_ +# define WHICH_LOCK LC_MESSAGES_LOCK +# define WHICH_UNLOCK LC_MESSAGES_UNLOCK #endif /*===========================================================================*/ /* First set of implementations, when have strerror_l() */ +#define MY_STRERROR_LOCK LC_MESSAGES_LOCK +#define MY_STRERROR_UNLOCK LC_MESSAGES_UNLOCK + #if defined(USE_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L) # if ! defined(USE_LOCALE_CTYPE) && ! defined(USE_LOCALE_MESSAGES) @@ -8807,7 +9247,12 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) DEBUG_STRERROR_ENTER(errnum, 0); + gwLOCALE_LOCK; + const char *errstr = savepv(Strerror(errnum)); + + gwLOCALE_UNLOCK; + *utf8ness = UTF8NESS_IMMATERIAL; DEBUG_STRERROR_RETURN(errstr, utf8ness); @@ -8832,7 +9277,12 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) const char *errstr; if (IN_LC(categories[WHICH_LC_INDEX])) { + WHICH_LOCK; + gwLOCALE_LOCK; errstr = savepv(Strerror(errnum)); + gwLOCALE_UNLOCK; + WHICH_UNLOCK; + *utf8ness = get_locale_string_utf8ness_i(errstr, LOCALE_UTF8NESS_UNKNOWN, NULL, WHICH_LC_INDEX); @@ -8843,7 +9293,11 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) const char * orig_locale = toggle_locale_i(WHICH_LC_INDEX, "C"); + WHICH_LOCK; + gwLOCALE_LOCK; errstr = savepv(Strerror(errnum)); + gwLOCALE_UNLOCK; + WHICH_UNLOCK; restore_toggled_locale_i(WHICH_LC_INDEX, orig_locale); @@ -8878,11 +9332,18 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) LOCALE_LOCK; + LC_MESSAGES_LOCK; + LC_CTYPE_LOCK; + gwLOCALE_LOCK; + const char* orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, desired_locale); const char* orig_MESSAGES_locale = toggle_locale_c(LC_MESSAGES, desired_locale); const char *errstr = savepv(Strerror(errnum)); + gwLOCALE_UNLOCK; + LC_CTYPE_UNLOCK; + LC_MESSAGES_UNLOCK; restore_toggled_locale_c(LC_MESSAGES, orig_MESSAGES_locale); restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale); @@ -8923,10 +9384,21 @@ To return to Perl control, and restart the gotcha prevention services, call C>. Behavior is undefined for any pure Perl code that executes while the switch is in effect. -The global locale and the per-thread locales are independent. As long as just -one thread converts to the global locale, everything works smoothly. But if -more than one does, they can easily interfere with each other, and races are -likely. On Windows systems prior to Visual Studio 15 (at which point Microsoft +On perls without per thread-locales, there is only the global locale; so +calling this function effectively just disables the gotcha prevention services. + +On perls with per-thread locales, they are independent from the global locale. +As long as just one thread converts to the global locale, everything works +smoothly. But if more than one does, they can easily interfere with each +other, and races are likely. + +On perls that emulate per-thread locales, there is, behind the scenes, actually +just the global locale. Unlike the native per-thread locale platforms, any +thread that calls this function is likely to have races with the remaining +threads when calling locale-dependent libc functions, unless appropriate +mutexes have been inserted. + +On Windows systems prior to Visual Studio 15 (at which point Microsoft fixed a bug), races can occur (even if only one thread has been converted to the global locale), but only if you use the following operations: @@ -8977,6 +9449,9 @@ handle all cases of single- vs multi-thread, POSIX 2008-supported or not. freelocale(old_locale); \ } \ } STMT_END +#elif defined(EMULATE_THREAD_SAFE_LOCALES) +# define CHANGE_SYSTEM_LOCALE_TO_GLOBAL \ + PL_perl_controls_locale = false #else # define CHANGE_SYSTEM_LOCALE_TO_GLOBAL #endif @@ -9006,7 +9481,7 @@ Perl_switch_to_global_locale(pTHX) # else - const bool perl_controls = false; + const bool perl_controls = PL_perl_controls_locale; # endif @@ -9015,7 +9490,12 @@ Perl_switch_to_global_locale(pTHX) return; } -# ifdef LC_ALL +# if ! defined(USE_POSIX_2008_LOCALE) && ! defined(USE_THREAD_SAFE_LOCALE) + + PL_perl_controls_locale = false; + +# else +# ifdef LC_ALL const char * thread_locale = calculate_LC_ALL_string(NULL, EXTERNAL_FORMAT_FOR_SET, @@ -9024,7 +9504,7 @@ Perl_switch_to_global_locale(pTHX) CHANGE_SYSTEM_LOCALE_TO_GLOBAL; posix_setlocale(LC_ALL, thread_locale); -# else /* Must be USE_POSIX_2008_LOCALE) */ +# else const char * cur_thread_locales[LC_ALL_INDEX_]; @@ -9042,6 +9522,7 @@ Perl_switch_to_global_locale(pTHX) } POSIX_SETLOCALE_UNLOCK; +# endif # endif # ifdef USE_LOCALE_NUMERIC @@ -9062,9 +9543,9 @@ Perl_switch_to_global_locale(pTHX) This function copies the state of the program global locale into the calling thread, and converts that thread to using per-thread locales, if it wasn't -already, and the platform supports them. The LC_NUMERIC locale is toggled into -the standard state (using the C locale's conventions), if not within the -lexical scope of S>. +already, and the platform supports them or perl is Configured to emulate them. +The LC_NUMERIC locale is toggled into the standard state (using the C locale's +conventions), if not within the lexical scope of S>. Perl will now consider itself to have control of the locale. @@ -9085,8 +9566,11 @@ multi-threaded systems that don't have multi-thread safe locale operations. Using the libc L> function should be avoided. Nevertheless, certain non-Perl libraries called from XS, do call it, and their behavior may not be able to be changed. This function, along with -C>, can be used to get seamless behavior in these -circumstances, as long as only one thread is involved. +C>, can be used to get seamless behavior on systems +with per-thread locale handling, as long as only one thread is involved. +To get seamless behavior on platforms where perl emulates per-thread locale +handling, mutexes would have to be added to wrap that thread's locale-dependent +functions. If the library has an option to turn off its locale manipulation, doing that is preferable to using this mechanism. C is such a library. @@ -9107,9 +9591,18 @@ Perl_sync_locale(pTHX) #else - bool was_in_global = TRUE; + /* First, switch to the global locale, and note if we were already there */ + + bool was_in_global; -# ifdef USE_THREAD_SAFE_LOCALE +# if ! defined(USE_THREAD_SAFE_LOCALE) + + /* When not using thread-safe locales, as far as the system is concerned, + * there only is the global locale. */ + + was_in_global = PL_perl_controls_locale; + +# else /* Below is thread-safe */ # if defined(WIN32) int config_return = _configthreadlocale(_DISABLE_PER_THREAD_LOCALE); @@ -9118,7 +9611,7 @@ Perl_sync_locale(pTHX) } was_in_global = (config_return == _DISABLE_PER_THREAD_LOCALE); -# elif defined(USE_POSIX_2008_LOCALE) +# elif defined(USE_POSIX_2008_LOCALE) /* Thread-safe POSIX 2008 */ was_in_global = (LC_GLOBAL_LOCALE == uselocale(LC_GLOBAL_LOCALE)); @@ -9232,7 +9725,10 @@ Perl_switch_locale_context(pTHX) * * There are two implementations where this is an issue. For the other * implementations, it doesn't matter because libc is using global values - * that all threads know about. + * that all threads know about. This is true even for the thread-safe + * emulation, as everything to libc is still a global, and we use + * PL_curlocales (for example) to know what the correct locale(s) should + * be, and this variable is under control of aTHX. * * The two implementations are where libc keeps thread-specific information * on its own. These are @@ -9270,10 +9766,20 @@ Perl_switch_locale_context(pTHX) # elif defined(WIN32) - if (! bool_setlocale_c(LC_ALL, PL_cur_LC_ALL)) { - locale_panic_(Perl_form(aTHX_ "Can't setlocale(%s)", PL_cur_LC_ALL)); + if (! PL_perl_controls_locale) { + return; } + if (_configthreadlocale(_ENABLE_PER_THREAD_LOCALE) == -1) { + locale_panic_("_configthreadlocale returned an error"); + } + + const char * lc_all_copy = savepv(PL_cur_LC_ALL); + if (! bool_setlocale_c(LC_ALL, lc_all_copy)) { + locale_panic_(Perl_form(aTHX_ "Can't setlocale(%s)", lc_all_copy)); + } + Safefree(lc_all_copy); + # endif } @@ -9284,8 +9790,12 @@ void Perl_thread_locale_init(pTHX) { -#ifdef USE_THREAD_SAFE_LOCALE -# ifdef USE_POSIX_2008_LOCALE +#if defined(USE_LOCALE) +# ifndef USE_THREAD_SAFE_LOCALE + + PL_perl_controls_locale = TRUE; + +# elif defined(USE_POSIX_2008_LOCALE) /* Called from a thread on startup. * @@ -9312,12 +9822,18 @@ Perl_thread_locale_init(pTHX) PL_cur_locale_obj = PL_C_locale_obj; # endif -# elif defined(WIN32) +# else +# ifdef WIN32 /* On Windows, make sure new thread has per-thread locales enabled */ if (_configthreadlocale(_ENABLE_PER_THREAD_LOCALE) == -1) { locale_panic_("_configthreadlocale returned an error"); } + + PL_perl_controls_locale = true; + +# endif + void_setlocale_c(LC_ALL, "C"); # endif diff --git a/makedef.pl b/makedef.pl index 1bdc4ebe0815..2d2c8c613c04 100644 --- a/makedef.pl +++ b/makedef.pl @@ -68,6 +68,7 @@ BEGIN } use constant PLATFORM => $ARGS{PLATFORM}; +use constant LIBC => uc $Config{libc} =~ s/^-l//r; # This makes us able to use, e.g., $define{WIN32}, so you don't have to # remember what things came from %ARGS. @@ -175,8 +176,8 @@ BEGIN if ($define{USE_LOCALE_THREADS} && ! $define{NO_THREAD_SAFE_LOCALE}) { if ( $define{USE_POSIX_2008_LOCALE} - || ($define{WIN32} && ( $cctype !~ /\D/ - && $cctype >= 80))) + || ($define{WIN32} && ( ($cctype eq "GCC" && LIBC eq "UCRT") + || ($cctype !~ /\D/ && $cctype >= 80)))) { $define{USE_THREAD_SAFE_LOCALE} = 1; } @@ -482,6 +483,24 @@ sub readvar { ); } +unless ($define{USE_LOCALE} && ( $define{WIN32} + || ! $define{USE_THREAD_SAFE_LOCALE})) +{ + ++$skip{$_} foreach qw( + PL_perl_controls_locale + ); +} + +unless ($define{EMULATE_THREAD_SAFE_LOCALES}) +{ + ++$skip{$_} foreach qw( + PL_restore_locale + PL_restore_locale_depth + Perl_category_lock_i + Perl_category_unlock_i + ); +} + unless ($define{USE_PERL_SWITCH_LOCALE_CONTEXT}) { ++$skip{$_} foreach qw( diff --git a/mg.c b/mg.c index 4b6d4ab62266..82c6299dd26e 100644 --- a/mg.c +++ b/mg.c @@ -1096,12 +1096,11 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) else if (strEQ(remaining, "AFE_LOCALES")) { #if ! defined(USE_ITHREADS) || defined(USE_THREAD_SAFE_LOCALE) - sv_setuv(sv, (UV) 1); - +#elif defined(EMULATE_THREAD_SAFE_LOCALES) + sv_setuv(sv, (UV) 2); #else sv_setuv(sv, (UV) 0); - #endif } diff --git a/perl.c b/perl.c index 6d1752203d14..298350d1c5bb 100644 --- a/perl.c +++ b/perl.c @@ -1126,6 +1126,12 @@ perl_destruct(pTHXx) PL_curlocales[i] = NULL; } #endif +#if defined(EMULATE_THREAD_SAFE_LOCALES) + for (i = 0; i < (int) C_ARRAY_LENGTH(PL_restore_locale); i++) { + Safefree(PL_restore_locale[i]); + PL_restore_locale[i] = NULL; + } +#endif #if defined(USE_POSIX_2008_LOCALE) && defined(USE_THREADS) { /* This also makes sure we aren't using a locale object that gets freed @@ -2076,7 +2082,8 @@ S_Internals_V(pTHX_ CV *cv) # ifdef USE_SITECUSTOMIZE " USE_SITECUSTOMIZE" # endif -# ifdef USE_THREAD_SAFE_LOCALE +# if defined(USE_THREAD_SAFE_LOCALE) \ + || defined(EMULATE_THREAD_SAFE_LOCALES) " USE_THREAD_SAFE_LOCALE" # endif ""; /* keep this on a line by itself, WITH the empty string */ diff --git a/perl.h b/perl.h index 92e14cf5e9d6..d1aac447387b 100644 --- a/perl.h +++ b/perl.h @@ -1281,8 +1281,11 @@ typedef enum { # endif /* POSIX 2008 has no means of finding out the current locale without a - * querylocale; so must keep track of it ourselves */ -# if (defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE)) + * querylocale; so must keep track of it ourselves. And for thread-safe + * emulation, we keep track because the system doesn't have per-thread + * information */ +# if (defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE)) \ + || defined(EMULATE_THREAD_SAFE_LOCALES) # define USE_PL_CURLOCALES # endif @@ -7303,19 +7306,73 @@ the plain locale pragma without a parameter (S>) is in effect. #define LOCALE_READ_LOCK LOCALE_LOCK #define LOCALE_READ_UNLOCK LOCALE_UNLOCK +#ifdef EMULATE_THREAD_SAFE_LOCALES -#ifndef LC_NUMERIC_LOCK -# define LC_NUMERIC_LOCK(cond) NOOP -# define LC_NUMERIC_UNLOCK NOOP -#endif +/* The macros for the individual categories are defined in terms of +* these two sets. */ +# define LC_CATEGORY_LOCK_i_(i) category_lock_i(i, __FILE__, __LINE__) +# define LC_CATEGORY_UNLOCK_i_(i) category_unlock_i(i, __FILE__, __LINE__) +# define LC_CATEGORY_LOCK_c_(cat) LC_CATEGORY_LOCK_i_(cat##_INDEX_) +# define LC_CATEGORY_UNLOCK_c_(cat) LC_CATEGORY_UNLOCK_i_(cat##_INDEX_) + +# ifdef USE_LOCALE_COLLATE +# define LC_COLLATE_LOCK LC_CATEGORY_LOCK_c_(LC_COLLATE) +# define LC_COLLATE_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_COLLATE) +# endif +# ifdef USE_LOCALE_CTYPE +# define LC_CTYPE_LOCK LC_CATEGORY_LOCK_c_(LC_CTYPE) +# define LC_CTYPE_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_CTYPE) +# endif +# ifdef USE_LOCALE_MESSAGES +# define LC_MESSAGES_LOCK LC_CATEGORY_LOCK_c_(LC_MESSAGES) +# define LC_MESSAGES_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_MESSAGES) +# endif +# ifdef USE_LOCALE_MONETARY +# define LC_MONETARY_LOCK LC_CATEGORY_LOCK_c_(LC_MONETARY) +# define LC_MONETARY_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_MONETARY) +# endif +# ifdef USE_LOCALE_NUMERIC + + /* This is the one category we may already have defined. It needs to be + * overwritten. We ignore the parameter in this case, since in this + * thread-safe emulation, all the threads are jumbled together */ +# undef LC_NUMERIC_LOCK +# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \ + LC_CATEGORY_LOCK_c_(LC_NUMERIC) +# undef LC_NUMERIC_UNLOCK +# define LC_NUMERIC_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_NUMERIC) +# endif +# ifdef USE_LOCALE_TIME +# define LC_TIME_LOCK LC_CATEGORY_LOCK_c_(LC_TIME) +# define LC_TIME_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_TIME) +# endif +#endif + +/* Below are lock definitions for individual functions that Perl uses. All + * such need to be in terms of the locale category(ies) that affect them, plus + * gwLOCALE_LOCK if they read/write global space. It is best to create a + * definition for each function to hide those details, and allow it to be more + * easily maintained. */ +#ifdef LC_CTYPE_LOCK +# define MBLEN_LOCK_ LC_CTYPE_LOCK +# define MBLEN_UNLOCK_ LC_CTYPE_UNLOCK +# define MBRLEN_LOCK_ LC_CTYPE_LOCK +# define MBRLEN_UNLOCK_ LC_CTYPE_UNLOCK +# define MBTOWC_LOCK_ LC_CTYPE_LOCK +# define MBTOWC_UNLOCK_ LC_CTYPE_UNLOCK +# define MBRTOWC_LOCK_ LC_CTYPE_LOCK +# define MBRTOWC_UNLOCK_ LC_CTYPE_UNLOCK +# define WCTOMB_LOCK_ LC_CTYPE_LOCK +# define WCTOMB_UNLOCK_ LC_CTYPE_UNLOCK +# define WCRTOMB_LOCK_ LC_CTYPE_LOCK +# define WCRTOMB_UNLOCK_ LC_CTYPE_UNLOCK +#else /* These non-reentrant versions use global space */ # define MBLEN_LOCK_ gwLOCALE_LOCK # define MBLEN_UNLOCK_ gwLOCALE_UNLOCK - # define MBTOWC_LOCK_ gwLOCALE_LOCK # define MBTOWC_UNLOCK_ gwLOCALE_UNLOCK - # define WCTOMB_LOCK_ gwLOCALE_LOCK # define WCTOMB_UNLOCK_ gwLOCALE_UNLOCK @@ -7329,13 +7386,46 @@ the plain locale pragma without a parameter (S>) is in effect. # define WCRTOMB_LOCK_ NOOP # define WCRTOMB_UNLOCK_ NOOP +# define LC_CTYPE_LOCK LOCALE_LOCK +# define LC_CTYPE_UNLOCK LOCALE_UNLOCK +#endif + +#if ! defined(LC_COLLATE_LOCK) # define LC_COLLATE_LOCK LOCALE_LOCK # define LC_COLLATE_UNLOCK LOCALE_UNLOCK +#endif + +#if ! defined(LC_MESSAGES_LOCK) +# define LC_MESSAGES_LOCK LOCALE_LOCK +# define LC_MESSAGES_UNLOCK LOCALE_UNLOCK +#endif + +#if ! defined(LC_MONETARY_LOCK) +# define LC_MONETARY_LOCK LOCALE_LOCK +# define LC_MONETARY_UNLOCK LOCALE_UNLOCK +#endif +#ifdef LC_TIME_LOCK +# define STRFTIME_LOCK /* Needs one exclusive lock */ \ + STMT_START { LC_CTYPE_LOCK; LC_TIME_LOCK; ENV_READ_LOCK; \ + } STMT_END +# define STRFTIME_UNLOCK \ + STMT_START { ENV_READ_UNLOCK; LC_TIME_UNLOCK; LC_CTYPE_UNLOCK; \ + } STMT_END +#else # define STRFTIME_LOCK ENV_LOCK # define STRFTIME_UNLOCK ENV_UNLOCK +# define LC_TIME_LOCK LOCALE_LOCK +# define LC_TIME_UNLOCK LOCALE_UNLOCK +#endif + #ifdef USE_LOCALE_NUMERIC +# ifndef LC_NUMERIC_LOCK +# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \ + LOCALE_LOCK_(cond_to_panic_if_already_locked) +# define LC_NUMERIC_UNLOCK LOCALE_UNLOCK_ +# endif /* These macros are for toggling between the underlying locale (UNDERLYING or * LOCAL) and the C locale (STANDARD). (Actually we don't have to use the C @@ -7659,6 +7749,11 @@ cannot have changed since the precalculation. #endif /* !USE_LOCALE_NUMERIC */ +#ifndef LC_NUMERIC_LOCK +# define LC_NUMERIC_LOCK(cond) NOOP +# define LC_NUMERIC_UNLOCK NOOP +#endif + #ifdef USE_LOCALE_THREADS # define ENV_LOCK PERL_WRITE_LOCK(&PL_env_mutex) # define ENV_UNLOCK PERL_WRITE_UNLOCK(&PL_env_mutex) diff --git a/pod/perlvar.pod b/pod/perlvar.pod index a2b0abfbdcdb..f6ef208dfca8 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -2547,10 +2547,11 @@ This variable was added in Perl v5.8.0. =item ${^SAFE_LOCALES} X<${^SAFE_LOCALES}> -Reflects if safe locale operations are available to this perl (when the -value is 1) or not (the value is 0). This variable is always 1 if the -perl has been compiled without threads. It is also 1 if this perl is -using thread-safe locale operations. Note that an individual thread may +Reflects if safe locale operations are available to this perl or not. +This variable is always 1 if the perl has been compiled without threads. +It is also 1 if this perl is using thread-safe locale operations. And +it is 2 if this perl is emulating thread-safe locale operations. Note +that an individual thread may choose to use the global locale (generally unsafe) by calling L. This variable currently is still set to 1 in such threads. diff --git a/proto.h b/proto.h index 27d4e7143d1d..fc147ffd23df 100644 --- a/proto.h +++ b/proto.h @@ -5407,6 +5407,24 @@ Perl_variant_byte_number(PERL_UINTMAX_T word) # endif #endif +#if defined(EMULATE_THREAD_SAFE_LOCALES) +PERL_CALLCONV void +Perl_category_lock_i(pTHX_ const locale_category_index index, const char *file, const line_t caller_line); +# define PERL_ARGS_ASSERT_CATEGORY_LOCK_I \ + assert(file) + +PERL_CALLCONV void +Perl_category_unlock_i(pTHX_ const locale_category_index index, const char *file, const line_t caller_line); +# define PERL_ARGS_ASSERT_CATEGORY_UNLOCK_I \ + assert(file) + +# if !defined(PERL_NO_INLINE_FUNCTIONS) +PERL_STATIC_INLINE int +Perl_posix_LC_foo_(pTHX_ const int c, const U8 classnum); +# define PERL_ARGS_ASSERT_POSIX_LC_FOO_ + +# endif +#endif /* defined(EMULATE_THREAD_SAFE_LOCALES) */ #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) PERL_CALLCONV I32 Perl_my_chsize(pTHX_ int fd, Off_t length) @@ -7063,6 +7081,14 @@ S_my_setlocale_debug_string_i(pTHX_ const locale_category_index cat_index, const __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_MY_SETLOCALE_DEBUG_STRING_I +# endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) || \ + ( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) ) +STATIC void +S_update_PL_curlocales_i(pTHX_ const locale_category_index index, const char *new_locale, const line_t caller_line); +# define PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I \ + assert(new_locale) + # endif # if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) STATIC const char * @@ -7150,16 +7176,9 @@ STATIC locale_t S_use_curlocale_scratch(pTHX); # define PERL_ARGS_ASSERT_USE_CURLOCALE_SCRATCH -# if !defined(USE_QUERYLOCALE) -STATIC void -S_update_PL_curlocales_i(pTHX_ const locale_category_index index, const char *new_locale, const line_t caller_line); -# define PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I \ - assert(new_locale) - -# endif -# elif defined(USE_LOCALE_THREADS) && \ - !defined(USE_THREAD_SAFE_LOCALE) && \ - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) /* && +# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \ + defined(USE_LOCALE_THREADS) && \ + !defined(USE_THREAD_SAFE_LOCALE) /* && !defined(USE_POSIX_2008_LOCALE) */ STATIC bool S_less_dicey_bool_setlocale_r(pTHX_ const int cat, const char *locale); @@ -7170,10 +7189,10 @@ STATIC const char * S_less_dicey_setlocale_r(pTHX_ const int category, const char *locale); # define PERL_ARGS_ASSERT_LESS_DICEY_SETLOCALE_R -# endif /* defined(USE_LOCALE_THREADS) && +# endif /* !defined(EMULATE_THREAD_SAFE_LOCALES) && + defined(USE_LOCALE_THREADS) && !defined(USE_POSIX_2008_LOCALE) && - !defined(USE_THREAD_SAFE_LOCALE) && - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) */ + !defined(USE_THREAD_SAFE_LOCALE) */ # if defined(WIN32) || defined(WIN32_USE_FAKE_OLD_MINGW_LOCALES) STATIC wchar_t * S_Win_byte_string_to_wstring(const UINT code_page, const char *byte_string); diff --git a/sv.c b/sv.c index 91b64d528cef..e1556f4dbbf2 100644 --- a/sv.c +++ b/sv.c @@ -16045,6 +16045,10 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_subname = sv_dup_inc(proto_perl->Isubname, param); +/* The new locale starts in the global C locale. */ +#if defined(USE_LOCALE) && (defined(WIN32) || ! defined(USE_THREAD_SAFE_LOCALE)) + PL_perl_controls_locale = false; +#endif #ifdef USE_PL_CURLOCALES for (i = 0; i < (int) C_ARRAY_LENGTH(PL_curlocales); i++) { PL_curlocales[i] = SAVEPV("C"); @@ -16053,6 +16057,12 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, #ifdef USE_PL_CUR_LC_ALL PL_cur_LC_ALL = SAVEPV("C"); #endif +# ifdef EMULATE_THREAD_SAFE_LOCALES + for (i = 0; i < (int) C_ARRAY_LENGTH(PL_restore_locale); i++) { + PL_restore_locale[i] = NULL; + PL_restore_locale_depth[i] = 0; + } +#endif #ifdef USE_LOCALE_CTYPE Copy(PL_fold, PL_fold_locale, 256, U8);