diff --git a/embed.fnc b/embed.fnc index def504177a7a..e36912cc8f67 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3779,6 +3779,17 @@ p |void |dump_sv_child |NN SV *sv CRTip |unsigned int|variant_byte_number \ |PERL_UINTMAX_T word #endif +#if defined(EMULATE_THREAD_SAFE_LOCALES) +Cp |void |category_lock_i|const locale_category_index index \ + |NN const char *file \ + |const line_t caller_line +Cp |void |category_unlock_i \ + |const locale_category_index index \ + |NN const char *file \ + |const line_t caller_line +Cip |int |posix_LC_foo_ |const int c \ + |const U8 classnum +#endif #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) ARdp |I32 |my_chsize |int fd \ |Off_t length @@ -4457,6 +4468,13 @@ RS |char * |my_setlocale_debug_string_i \ |NULLOK const char *retval \ |const line_t line # endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) || \ + ( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) ) +S |void |update_PL_curlocales_i \ + |const locale_category_index index \ + |NN const char *new_locale \ + |const line_t caller_line +# endif # if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) S |const char *|my_langinfo_i \ |const nl_item item \ @@ -4517,14 +4535,8 @@ S |const char *|querylocale_2008_i \ |const locale_category_index index \ |const line_t line S |locale_t|use_curlocale_scratch -# if !defined(USE_QUERYLOCALE) -S |void |update_PL_curlocales_i \ - |const locale_category_index index \ - |NN const char *new_locale \ - |const line_t caller_line -# endif -# elif defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) && \ - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) +# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \ + defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) S |bool |less_dicey_bool_setlocale_r \ |const int cat \ |NN const char *locale diff --git a/embed.h b/embed.h index 8f5e05b257ef..b3fa97a54745 100644 --- a/embed.h +++ b/embed.h @@ -810,6 +810,11 @@ # if !defined(EBCDIC) # define variant_byte_number Perl_variant_byte_number # endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) +# define category_lock_i(a,b,c) Perl_category_lock_i(aTHX_ a,b,c) +# define category_unlock_i(a,b,c) Perl_category_unlock_i(aTHX_ a,b,c) +# define posix_LC_foo_(a,b) Perl_posix_LC_foo_(aTHX_ a,b) +# endif # if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) # define my_chsize(a,b) Perl_my_chsize(aTHX_ a,b) # endif @@ -1313,6 +1318,10 @@ # if defined(DEBUGGING) # define my_setlocale_debug_string_i(a,b,c,d) S_my_setlocale_debug_string_i(aTHX_ a,b,c,d) # endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) || \ + ( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) ) +# define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c) +# endif # if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) # define my_langinfo_i(a,b,c,d,e,f) S_my_langinfo_i(aTHX_ a,b,c,d,e,f) # else @@ -1343,12 +1352,8 @@ # define bool_setlocale_2008_i(a,b,c) S_bool_setlocale_2008_i(aTHX_ a,b,c) # define querylocale_2008_i(a,b) S_querylocale_2008_i(aTHX_ a,b) # define use_curlocale_scratch() S_use_curlocale_scratch(aTHX) -# if !defined(USE_QUERYLOCALE) -# define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c) -# endif -# elif defined(USE_LOCALE_THREADS) && \ - !defined(USE_THREAD_SAFE_LOCALE) && \ - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) +# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \ + defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) # define less_dicey_bool_setlocale_r(a,b) S_less_dicey_bool_setlocale_r(aTHX_ a,b) # define less_dicey_setlocale_r(a,b) S_less_dicey_setlocale_r(aTHX_ a,b) # endif diff --git a/embedvar.h b/embedvar.h index 3b8bff0a6bb1..849a5ccfbafd 100644 --- a/embedvar.h +++ b/embedvar.h @@ -224,6 +224,7 @@ # define PL_parser (vTHX->Iparser) # define PL_patchlevel (vTHX->Ipatchlevel) # define PL_peepp (vTHX->Ipeepp) +# define PL_perl_controls_locale (vTHX->Iperl_controls_locale) # define PL_perl_destruct_level (vTHX->Iperl_destruct_level) # define PL_perldb (vTHX->Iperldb) # define PL_perlio (vTHX->Iperlio) @@ -251,6 +252,8 @@ # define PL_replgv (vTHX->Ireplgv) # define PL_restartjmpenv (vTHX->Irestartjmpenv) # define PL_restartop (vTHX->Irestartop) +# define PL_restore_locale (vTHX->Irestore_locale) +# define PL_restore_locale_depth (vTHX->Irestore_locale_depth) # define PL_rpeepp (vTHX->Irpeepp) # define PL_rs (vTHX->Irs) # define PL_runops (vTHX->Irunops) diff --git a/handy.h b/handy.h index f2837a3bf45a..e4e461dbacf4 100644 --- a/handy.h +++ b/handy.h @@ -1555,9 +1555,16 @@ or casts # define HIGHEST_REGCOMP_DOT_H_SYNC_ CC_VERTSPACE_ -/* The members of the third group below do not need to be coordinated with data - * structures in regcomp.[ch] and regexec.c. */ -# define CC_IDFIRST_ 16 +/* These three follow immediately after the final function that has a version + * defined by C, like isascii(), so they overlap with anything else. They are + * used in the 'PL_libc_char_fcns' data structure, along with the ones above + * them */ +# define CC_IDFIRST_ 16 +# define CC_TOLOWER_ (CC_IDFIRST_ + 1) +# define CC_TOUPPER_ (CC_TOLOWER_ + 1) + +/* The members of the fourth group below do not need to be coordinated with + * data structures in regcomp.[ch] and regexec.c. */ # define CC_CHARNAME_CONT_ 17 # define CC_NONLATIN1_FOLD_ 18 # define CC_NONLATIN1_SIMPLE_FOLD_ 19 @@ -2025,7 +2032,7 @@ END_EXTERN_C # define is_posix_XDIGIT(c) isxdigit((U8) (c)) #endif -/* Below is the next level up, which currently expands to nothing more +/* Below is the next level up, which on most platforms expands to nothing more * than the previous layer. These are the macros to use if you really need * something whose input domain is a byte, and the locale isn't UTF-8; that is, * where you normally would have to use things like bare isalnum(). @@ -2037,7 +2044,13 @@ END_EXTERN_C * (Note, proper general operation of the bare libc functions requires you to * cast to U8. These do that for you automatically.) */ +/* In this one circumstance, the macro is implemented with a lock; otherwise it + * expands to just the layer below */ +#ifdef EMULATE_THREAD_SAFE_LOCALES +# define WRAP_U8_LC_(c, classnum, posix) posix_LC_foo_((c), (classnum)) +#else # define WRAP_U8_LC_(c, classnum, posix) posix(c) +#endif #define isU8_ALPHANUMERIC_LC(c) \ WRAP_U8_LC_((c), CC_ALPHANUMERIC_, is_posix_ALPHANUMERIC) diff --git a/inline.h b/inline.h index 59d16645b235..619020190b0b 100644 --- a/inline.h +++ b/inline.h @@ -318,6 +318,49 @@ S_PerlEnv_putenv(pTHX_ char * str) #endif +/* ------------------------------- handy.h ------------------------------- */ + +#ifdef EMULATE_THREAD_SAFE_LOCALES + +PERL_STATIC_INLINE int +Perl_posix_LC_foo_(pTHX_ const int c, const U8 classnum) { + int result; + + LC_CTYPE_LOCK; + + /* All calls to this (so far) are with a 'classnum' known at compile time, + * so the compiler should constant fold this down to a single assignment */ + switch (classnum) { + case CC_ALPHANUMERIC_:result = (bool) is_posix_ALPHANUMERIC(c); break; + case CC_ALPHA_: result = (bool) is_posix_ALPHA(c); break; + case CC_ASCII_: result = (bool) is_posix_ASCII(c); break; + case CC_BLANK_: result = (bool) is_posix_BLANK(c); break; + case CC_CASED_: result = (bool) is_posix_CASED(c); break; + case CC_CNTRL_: result = (bool) is_posix_CNTRL(c); break; + case CC_DIGIT_: result = (bool) is_posix_DIGIT(c); break; + case CC_GRAPH_: result = (bool) is_posix_GRAPH(c); break; + case CC_LOWER_: result = (bool) is_posix_LOWER(c); break; + case CC_PRINT_: result = (bool) is_posix_PRINT(c); break; + case CC_PUNCT_: result = (bool) is_posix_PUNCT(c); break; + case CC_SPACE_: result = (bool) is_posix_SPACE(c); break; + case CC_UPPER_: result = (bool) is_posix_UPPER(c); break; + case CC_WORDCHAR_: result = (bool) is_posix_WORDCHAR(c); break; + case CC_XDIGIT_: result = (bool) is_posix_XDIGIT(c); break; + case CC_IDFIRST_: result = (bool) is_posix_IDFIRST(c); break; + case CC_TOLOWER_: result = to_posix_LOWER(c); break; + case CC_TOUPPER_: result = to_posix_UPPER(c); break; + + default: + LC_CTYPE_UNLOCK; + locale_panic_(Perl_form(aTHX_ "Unknown charclass %d", classnum)); + } + + LC_CTYPE_UNLOCK; + return result; +} + +#endif + /* ------------------------------- mg.h ------------------------------- */ #if defined(PERL_CORE) || defined(PERL_EXT) diff --git a/intrpvar.h b/intrpvar.h index 4d5786aad958..0e6f5118643d 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -757,7 +757,14 @@ PERLVARI(I, cur_locale_obj, locale_t, LC_GLOBAL_LOCALE) * is almost always toggled into the C locale, and the locale it nominally is * is stored as PL_numeric_name. */ PERLVARA(I, curlocales, LOCALE_CATEGORIES_COUNT_ + 1, const char *) +#endif +#ifdef EMULATE_THREAD_SAFE_LOCALES +PERLVARA(I, restore_locale, LOCALE_CATEGORIES_COUNT_, const char *) +PERLVARA(I, restore_locale_depth, LOCALE_CATEGORIES_COUNT_, Size_t) +#endif +#if defined(USE_LOCALE) && (defined(WIN32) || ! defined(USE_THREAD_SAFE_LOCALE)) +PERLVARI(I, perl_controls_locale, bool, true) #endif #ifdef USE_PL_CUR_LC_ALL PERLVARI(I, cur_LC_ALL, const char *, NULL) diff --git a/locale.c b/locale.c index ad3b3c1b033c..21a0cc930e09 100644 --- a/locale.c +++ b/locale.c @@ -42,13 +42,6 @@ * platform than it actually is. This allows you to make changes and catch * some errors without having access to those other platforms. * - * This code now has multi-thread-safe locale handling on systems that support - * that. This is completely transparent to most XS code. On earlier systems, - * it would be possible to emulate thread-safe locales, but this likely would - * involve a lot of locale switching, and would require XS code changes. - * Macros could be written so that the code wouldn't have to know which type of - * system is being used. - * * Table-driven code is used for simplicity and clarity, as many operations * differ only in which category is being worked on. However the system * categories need not be small contiguous integers, so do not lend themselves @@ -69,6 +62,16 @@ * bool_setlocale_2008_i() function is used to hide the different API from the * outside. This makes it completely transparent to most XS code. * + * On other threaded-systems, the code here, in conjunction with other code in + * the system, emulates thread-safe locales by using mutexes to lock other + * threads out, and change the global locale to the desired per-thread value + * just before operations that care about it. All such operations must declare + * their need before executing, or it won't work. All of the Perl core does + * this, which makes pure Perl code locale thread-safe. XS code can be + * extended to work by using the macros for the purpose in perl.h. The need + * for mutexes means that in these platforms, much of the code in this file + * must be done while in critical sections. + * * A huge complicating factor is that the LC_NUMERIC category is normally held * in the C locale, except during those relatively rare times when it needs to * be in the underlying locale. There is a bunch of code to accomplish this, @@ -79,7 +82,7 @@ * opportunities for avoiding work. We don't have to necessarily create a safe * copy to return if no return is desired. * - * There are 3.5 major implementations here; which one chosen depends on what + * There are 4.5 major implementations here; which one chosen depends on what * the platform has available, and Configuration options. * * 1) Raw posix_setlocale(). This implementation is basically the libc @@ -102,7 +105,9 @@ * * 2) An implementation that adds a minimal layer above implementation 1), * making that implementation uninterruptible and returning a - * per-thread/per-category value. + * per-thread/per-category value. Currently, this is for threaded perls + * on platforms where layers 3a and 3b are not available, and where layer 4 + * has not been selected. * * 3a and 3b) An implementation of POSIX 2008 thread-safe locale handling, * hiding from the programmer the completely different API for this. @@ -119,7 +124,32 @@ * are buggy, in one way or another. There are workarounds encoded here, * where feasible, for platforms where the bugs are amenable to that * (glibc, for example). But other platforms instead don't use this - * implementation. + * implementation, but the next one below. + * + * 4) A thread-safe emulation implementation that, in conjunction with changes + * to C code, makes locale handling thread-safe. Those changes are simply + * to wrap locale-dependent system calls with macros that delimit a critical + * section in which they change the global locale to the one the thread + * expects. The perl core has made those changes, so pure perl programs + * become thread-safe. Well-behaved XS code also keeps things thread-safe, + * either by not using locale-dependent system calls, or by changing to use + * the wrapper macros. This layer is not chosen if the platform has native + * thread-safe locale handling. Also, currently perl must have been + * Configured with "-Accflags=-DEMULATE_THREAD_SAFE_LOCALES". + * + * This implementation is based on the observation that the underlying + * locale matters only to relatively few libc calls, and only during their + * execution. It can be anything at all at any other time. What the proper + * locale should be for each category is kept in the array PL_curlocales[]. + * Each locale-dependent operation must be wrapped in mutex lock/unlock + * operations. The lock additionally compares what libc knows the locale to + * be, and what it should be for this thread at this time, and changes the + * actual locale to the proper value if necessary. That's all that is + * needed. However additionally, the unlock restores the locale to what it + * was at the time of the lock. This improves the chances that a thread not + * under perl's control (such as Gtk) will still work. (If mutex calls are + * added to lock out that thread from running when the other threads are + * using locale-dependent functions, then things should completely work.) * * z/OS (os390) is an outlier. Locales really don't work under threads when * either the radix character isn't a dot, or attempts are made to change @@ -227,10 +257,11 @@ * crippled locale implementation. * * -Accflags=-DNO_THREAD_SAFE_LOCALE - * Even if thread-safe operations are available on this platform and - * would otherwise be used (because this is a perl with multiplicity), - * perl is compiled to not use them. This could be useful on - * platforms where the libc is buggy. + * Don't use the thread-safe operations on this platform (should they + * be available) nor try to emulate them (if they are not available) + * even on a perl with multiplicity. This could be useful on + * platforms where the libc is buggy, or the emulation runs into + * problems. * * -Accflags=-DNO_POSIX_2008_LOCALE * Even if the libc locale operations specified by the Posix 2008 @@ -271,6 +302,13 @@ * these have no effect. Otherwise they cause perl to be compiled to * always keep the named category(ies) in the C locale. * + * -Accflags=-DEMULATE_THREAD_SAFE_LOCALES + * This has no effect on unthreaded perls, nor when perl thinks that + * the platform has thread-safe locale handling. But otherwise, it + * enables the code to emulate thread-safe locale handling. + * Effectively it chooses implementation 4) instead of implementation + * 2) from the list above. + * * -Accflags=-DHAS_BROKEN_SETLOCALE_QUERY_LC_ALL * This would be set in a hints file to tell perl that doing a libc * setlocale(LC_ALL, NULL) @@ -1896,7 +1934,8 @@ S_setlocale_i(pTHX_ const int category, const char * locale) /*===========================================================================*/ #elif defined(USE_LOCALE_THREADS) \ - && ! defined(USE_THREAD_SAFE_LOCALE) + && ! defined(USE_THREAD_SAFE_LOCALE) \ + && ! defined(EMULATE_THREAD_SAFE_LOCALES) /* Here, there are threads, and there is no support for thread-safe * operation. This is a dangerous situation, which perl is documented as @@ -1980,6 +2019,306 @@ S_less_dicey_bool_setlocale_r(pTHX_ const int cat, const char * locale) * some shortcuts */ # define setlocale_i(i, locale) less_dicey_setlocale_r(categories[i], locale) +/*===========================================================================*/ +#elif defined(EMULATE_THREAD_SAFE_LOCALES) + +/* Here, use our emulation of thread safe locales. PL_curlocales[] keeps what + * the name of the locale should be for each category in the current thread. + * And so, S_bool_setlocale_emulate_safe_i() wraps each call to the system's + * setlocale() with saving the return into PL_curlocales. + * + * The locale is changed to the one specified by PL_curlocales[] just before + * any libc call affected by it, and restored just afterwards. */ + +# define querylocale_i(i) S_querylocale_emulate_safe_i(aTHX_ i, __LINE__) +# define querylocale_c(cat) querylocale_i(cat##_INDEX_) +# define querylocale_r(cat) querylocale_i(get_category_index(cat)) + +STATIC const char * +S_querylocale_emulate_safe_i(pTHX_ const unsigned int cat_index, + const line_t caller_line) +{ + assert(cat_index <= LOCALE_CATEGORIES_COUNT_ + 1); + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + " querylocale_emulate_safe_i(%u: %s);" + " called from %" LINE_Tf "\n", + cat_index, category_names[cat_index], caller_line)); + +# ifdef LC_ALL + + /* It can be somewhat expensive to calculate LC_ALL from its constituent + * categories, and the value might change many times before it is actually + * used. Therefore, it is only done as needed. This is such a place */ + if ( cat_index == LC_ALL_INDEX_ + && PL_curlocales[LC_ALL_INDEX_] == NULL) + { + /* Call just for its side effect */ + (void) calculate_LC_ALL_string(PL_curlocales, INTERNAL_FORMAT, + WANT_TEMP_PV, + caller_line); + } + +# endif + + return mortalized_pv_copy(PL_curlocales[cat_index]); +} + +/*---------------------------------------------------------------------------*/ + +# define bool_setlocale_r(cat, locale) \ + S_bool_setlocale_emulate_safe_r(aTHX_ cat, locale, __LINE__) +# define bool_setlocale_i(i, locale) \ + bool_setlocale_r(categories[i], locale) +# define bool_setlocale_c(cat, locale) bool_setlocale_r(cat, locale) + +STATIC bool +S_bool_setlocale_emulate_safe_r(pTHX_ + const int category, + const char * wanted_locale, + const line_t caller_line) +{ + /* Set the locale to 'wanted_locale' for the category given by our internal + * index number, and save the result for later use. */ + + assert(wanted_locale); + + STDIZED_SETLOCALE_LOCK; + const char * new_locale = savepv(stdized_setlocale(category, + wanted_locale)); + STDIZED_SETLOCALE_UNLOCK; + + if (! new_locale) { + SET_EINVAL; + return false; + } + + update_PL_curlocales_i(get_category_index(category), + new_locale, caller_line); + Safefree(new_locale); + return true; +} +/*---------------------------------------------------------------------------*/ + +# define void_setlocale_r_with_caller(cat, locale, file, line) \ + STMT_START { \ + if (! bool_setlocale_r(cat, locale)) \ + setlocale_failure_panic_via_i(get_category_index(cat), \ + NULL, locale, __LINE__, 0, \ + file, line); \ + } STMT_END + +# define void_setlocale_c_with_caller(cat, locale, file, line) \ + void_setlocale_r_with_caller(cat, locale, file, line) + +# define void_setlocale_i_with_caller(i, locale, file, line) \ + void_setlocale_r_with_caller(categories[i], locale, file, line) + +# define void_setlocale_r(cat, locale) \ + void_setlocale_r_with_caller(cat, locale, __FILE__, __LINE__) +# define void_setlocale_c(cat, locale) void_setlocale_r(cat, locale) +# define void_setlocale_i(i, locale) void_setlocale_r(categories[i], locale) + +/*---------------------------------------------------------------------------*/ +/* utility functions for emulating thread-safe locales. + * + * When emulating thread-safe locales, our per-thread data structures get set + * up as normal, but the actual locale is global to all threads. All functions + * that depend on the locale need to be protected by critical sections + * surrounded by these two functions that lock, and then unlock after the + * operation is completed. The first function does a lock and then changes the + * locale to the desired one for this thread, based on the per-thread data + * structures. The restore function restores to what the locale on original + * entry was, and unlocks. This is effectively a just-in-time locale setting + * scheme. + * + * In order to accommodate the need for more than one category being used by a + * function, this implements a stack. The lock is called for each needed + * category. Since the locks are general semaphores, only the first call + * results in an actual lock. Each call thus changes the locale for its + * category to the desired one, pushing onto the stack what it should be + * restored to afterwards. The paired unlock calls unwind the stack until the + * final one causes the mutex to be released. Most libc call require one or + * two categories. + * + * One could argue that there is no reason to restore afterwards, that the next + * just-in-time call will set the locale to the correct one. But doing the + * restore allows this scheme to work like truly thread-safe implementations + * when one thread is in the global locale. By restoring, we leave code not + * under this scheme to have the global thread for itself. There is a big + * caveat here, though. That thread must run in a critical section. This + * isn't the case with the other thread-safe implementations. */ + +void +Perl_category_lock_i(pTHX_ const locale_category_index cat_index, + const char * file, + const line_t caller_line) +{ + dSAVE_ERRNO; + +# ifndef DEBUGGING + PERL_UNUSED_ARG(file); +# endif + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Entering category_lock_i %s;" + " called from %s: %d\n", + category_names[cat_index], file, caller_line)); + + PERL_ARGS_ASSERT_CATEGORY_LOCK_I; + assert(cat_index < LC_ALL_INDEX_); + + LOCALE_LOCK; + + if (LIKELY(PL_perl_controls_locale)) { + + /* What locale we're supposed to be in */ + +# ifdef USE_LOCALE_NUMERIC + + /* For all categories except LC_NUMERIC, PL_curlocales[] contains the + * correct value to set to. LC_NUMERIC is more complicated. If it is + * known that we should be in the C locale, use C. If we are in the + * underlying locale, use that. Otherwise use the saved value */ + const char * wanted = (cat_index != LC_NUMERIC_INDEX_) + ? PL_curlocales[cat_index] + : ((PL_numeric_standard) + ? "C" + : ((NOT_IN_NUMERIC_UNDERLYING_) + ? PL_curlocales[cat_index] + : PL_numeric_name)); +# else + const char * wanted = PL_curlocales[cat_index]; +# endif + assert(wanted); + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s: wanted =%s\n", + category_names[cat_index], wanted)); + + /* Get the category desired, and what its current locale is */ + const int cat = categories[cat_index]; + const char * currently = stdized_setlocale(cat, NULL); + + /* If we aren't in the desired locale, change to it, saving a copy of + * the one we actually are in before the change */ + if (strNE(currently, wanted)) { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: Calling setlocale(%d, %s)\n", file, caller_line, + cat, wanted)); + if (stdized_setlocale(cat, wanted) == NULL) { + setlocale_failure_panic_i(cat_index, currently, wanted, + __LINE__, caller_line); + NOT_REACHED; /* NOTREACHED */ + } + } + else { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s: %d: Category %d already was %s\n", + file, caller_line, cat, wanted)); + } + + /* Here, we have toggled to the desired locale, so 'currently' eq + * 'wanted' + * + * This may be a recursive call. Everything remains locked during the + * recursion. We restore to the original locale after the recursion + * gets unwound. The intermediate values aren't needed. */ + if (PL_restore_locale_depth[cat_index] == 0) { + + /* Only need to change what's there if no current value or differs + * from the new one */ + if ( PL_restore_locale[cat_index] == NULL + || strNE(wanted, PL_restore_locale[cat_index])) + { + Safefree(PL_restore_locale[cat_index]); + PL_restore_locale[cat_index] = savepv(wanted); + } + } + + /* Indicate our new recursion depth */ + PL_restore_locale_depth[cat_index]++; + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: PL_restore is now %s," + " recursion depth=%zu\n", + file, caller_line, PL_restore_locale[cat_index], + PL_restore_locale_depth[cat_index])); + } + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Leaving category_lock_i: %s\n", + category_names[cat_index])); + + RESTORE_ERRNO; +} + +void +Perl_category_unlock_i(pTHX_ const locale_category_index cat_index, + const char * file, + const line_t caller_line) +{ + PERL_ARGS_ASSERT_CATEGORY_UNLOCK_I; + assert(cat_index < LC_ALL_INDEX_); + dSAVE_ERRNO; + + /* Undoes a matching category_lock(). Note that must be locked on input. + * Will unlock when recursion entirely gets unwound */ + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Entering category_unlock_i %s;" + " called from %s: %d\n", + category_names[cat_index], file, caller_line)); + + if (LIKELY(PL_perl_controls_locale)) { + const int cat = categories[cat_index]; + + /* Un-recursing */ + PL_restore_locale_depth[cat_index]--; + + /* Only restore when the depth gets back to 0 */ + if (PL_restore_locale_depth[cat_index] == 0) { + + /* What we currently are */ + const char * currently = stdized_setlocale(cat, NULL); + + /* And what we need to be changed to */ + const char * wanted = PL_restore_locale[cat_index]; + + /* If we need to change, do it */ + if (strNE(currently, wanted)) { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: Calling setlocale(%d, %s)\n", + file, caller_line, cat, wanted)); + if (stdized_setlocale(cat, wanted) == NULL) { + setlocale_failure_panic_via_i(cat_index, + currently, + wanted, + __LINE__, 0, + file, caller_line); + } + } + else { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s: %d: Category %d already was %s\n", + file, caller_line, cat, wanted)); + } + + Safefree(wanted); + PL_restore_locale[cat_index] = NULL; + } + } + + /* Doesn't actually unlock until recursion fully unwound */ + LOCALE_UNLOCK; + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Leaving category_unlock_i: %s\n", + category_names[cat_index])); + + RESTORE_ERRNO; +} + /*===========================================================================*/ #elif defined(USE_POSIX_2008_LOCALE) @@ -2678,27 +3017,49 @@ S_update_PL_curlocales_i(pTHX_ const char * new_locale, const line_t caller_line) { - /* Update PL_curlocales[], which is parallel to the other ones indexed by - * our mapping of libc category number to our internal equivalents. */ - PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I; assert(index <= LC_ALL_INDEX_); + /* There are two implementations that use PL_curlocales[], an array + * parallel to the other ones indexed by our mapping of libc category + * number to our internal equivalents. + * + * This function updates the 'index'th element to be 'new_locale'. It + * knows about the requirements of each implementation. In the POSIX 2008 + * case, everything is already calculated, so never does an element have to + * be checked for needing to stay in the "C" locale. In the thread-safe + * emulation case, no checking has yet been done, so this routine needs to + * do it */ +# ifndef LC_ALL + PERL_UNUSED_ARG(caller_line); +# else + if (index == LC_ALL_INDEX_) { /* For LC_ALL, we change all individual categories to correspond, * including the LC_ALL element */ for (unsigned int i = 0; i <= LC_ALL_INDEX_; i++) { Safefree(PL_curlocales[i]); - PL_curlocales[i] = NULL; } + /* In the POSIX 2008 case, everything is already calculated, so never + * does an element have to be checked for needing to stay in the "C" + * locale. In the thread-safe emulation case, no checking has yet been + * done, so this routine needs to do it */ + +# if ! defined(HAS_IGNORED_LOCALE_CATEGORIES_) || defined(USE_POSIX_2008) + + const parse_LC_ALL_STRING_action action = no_override; + +# else + + const parse_LC_ALL_STRING_action action = override_if_ignored; + +# endif + switch (parse_LC_ALL_string(new_locale, (const char **) &PL_curlocales, - check_that_overridden, /* things should - have already - been overridden - */ + action, true, /* Always fill array */ true, /* Panic if fails, as to get here it earlier had to have succeeded @@ -2718,7 +3079,11 @@ S_update_PL_curlocales_i(pTHX_ PL_curlocales[LC_ALL_INDEX_] = savepv(new_locale); } } - else { /* Not LC_ALL */ + else + +# endif + + { /* Not LC_ALL */ /* Update the single category's record */ Safefree(PL_curlocales[index]); @@ -3421,7 +3786,12 @@ S_new_numeric(pTHX_ const char *newnum, bool force) /* If not forcing this procedure, and there isn't actually a change from * our records, do nothing. (Our records can be wrong when sync'ing to the * locale set up by an external library, hence the 'force' parameter) */ - if (! force && strEQ(PL_numeric_name, newnum)) { + if ( (! force && strEQ(PL_numeric_name, newnum)) + +# if defined(USE_LOCALE) && ! defined(USE_THREAD_SAFE_LOCALE) + || ! PL_perl_controls_locale +# endif + ) { return; } @@ -3647,6 +4017,8 @@ S_new_ctype(pTHX_ const char *newctype, bool force) * Turkic. Make sure these two are the only anomalies. (We don't * require towupper and towlower because they aren't in C89.) */ + LC_CTYPE_LOCK; + # if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER) if (towupper('i') == 0x130 && towlower('I') == 0x131) @@ -3662,6 +4034,8 @@ S_new_ctype(pTHX_ const char *newctype, bool force) check_for_problems = TRUE; maybe_utf8_turkic = TRUE; } + + LC_CTYPE_UNLOCK; } else { /* Not a canned locale we know the values for. Compute them */ @@ -3766,7 +4140,9 @@ S_new_ctype(pTHX_ const char *newctype, bool force) * locale requires more than one byte, there are going to be BIG problems. * */ + LC_CTYPE_LOCK; const int mb_cur_max = MB_CUR_MAX; + LC_CTYPE_UNLOCK; if (mb_cur_max > 1 && ! PL_in_utf8_CTYPE_locale @@ -5281,6 +5657,7 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv, /* Some platforms require LC_CTYPE to be congruent with the category we are * looking for */ const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale); + LC_CTYPE_LOCK; # endif # ifdef USE_LOCALE_NUMERIC @@ -5330,6 +5707,7 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv, /* Finally ready to do the actual localeconv(). Lock to prevent other * accesses until we have made a copy of its returned static buffer */ + LC_MONETARY_LOCK; gwLOCALE_LOCK; # if defined(TS_W32_BROKEN_LOCALECONV) && defined(USE_THREAD_SAFE_LOCALE) @@ -5470,6 +5848,7 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv, gwLOCALE_UNLOCK; /* Finished with the critical section of a globally-accessible buffer */ + LC_MONETARY_UNLOCK; # if defined(USE_LOCALE_MONETARY) && defined(WIN32) @@ -5487,6 +5866,7 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv, # ifdef USE_LOCALE_CTYPE restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale); + LC_CTYPE_UNLOCK; # endif @@ -5881,6 +6261,33 @@ S_my_langinfo_i(pTHX_ /*--------------------------------------------------------------------------*/ # if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */ + + /* The only difference between the normal and emulation is the type of + * locks. We have to always lock because this nl_langinfo() isn't thread + * safe */ +# ifndef EMULATE_THREAD_SAFE_LOCALES +# define NL_LANGINFO_LOCK(cat_index) gwLOCALE_LOCK +# define NL_LANGINFO_UNLOCK(cat_index) gwLOCALE_UNLOCK +# elif defined(USE_LOCALE_CTYPE) +# define NL_LANGINFO_LOCK(cat_index) \ + STMT_START { \ + LC_CATEGORY_LOCK_c_(LC_CTYPE); \ + if (cat_index != LC_CTYPE_INDEX_) { \ + LC_CATEGORY_LOCK_i_(cat_index); \ + } \ + } STMT_END + +# define NL_LANGINFO_UNLOCK(cat_index) \ + STMT_START { \ + if (cat_index != LC_CTYPE_INDEX_) { \ + LC_CATEGORY_UNLOCK_i_(cat_index); \ + } \ + LC_CATEGORY_UNLOCK_c_(LC_CTYPE); \ + } STMT_END +# else +# define NL_LANGINFO_LOCK(cat_index) LC_CATEGORY_LOCK_i_(cat_index) +# define NL_LANGINFO_UNLOCK(cat_index) LC_CATEGORY_UNLOCK_i_(cat_index) +# endif # ifdef USE_LOCALE_CTYPE /* This function sorts out if things actually have to be switched or not, @@ -5891,9 +6298,9 @@ S_my_langinfo_i(pTHX_ const char * orig_switched_locale = toggle_locale_i(cat_index, locale); - gwLOCALE_LOCK; + NL_LANGINFO_LOCK(cat_index); retval = save_to_buffer(nl_langinfo(item), retbufp, retbuf_sizep); - gwLOCALE_UNLOCK; + NL_LANGINFO_UNLOCK(cat_index); if (utf8ness) { *utf8ness = get_locale_string_utf8ness_i(retval, @@ -5955,14 +6362,19 @@ S_my_langinfo_i(pTHX_ Newx(floatbuf, initial_size, char); /* 1.5 is exactly representable on binary computers */ + LC_NUMERIC_LOCK(0); Size_t needed_size = snprintf(floatbuf, initial_size, "%.1f", 1.5); + LC_NUMERIC_UNLOCK; /* If our guess wasn't big enough, increase and try again, based on * the real number that snprintf() is supposed to return */ if (UNLIKELY(needed_size >= initial_size)) { needed_size++; /* insurance */ Renew(floatbuf, needed_size, char); - Size_t new_needed = snprintf(floatbuf, needed_size, "%.1f", 1.5); + LC_NUMERIC_LOCK(0); + Size_t new_needed = snprintf(floatbuf, needed_size, "%.1f", + 1.5); + LC_NUMERIC_UNLOCK; assert(new_needed <= needed_size); needed_size = new_needed; } @@ -6284,6 +6696,8 @@ S_my_langinfo_i(pTHX_ * is documented and has been stable for many releases */ UINT ___lc_codepage_func(void); + LC_CTYPE_LOCK; + # ifndef WIN32_USE_FAKE_OLD_MINGW_LOCALES retval = save_to_buffer(Perl_form(aTHX_ "%d", ___lc_codepage_func()), @@ -6294,6 +6708,8 @@ S_my_langinfo_i(pTHX_ retbufp, retbuf_sizep); # endif + LC_CTYPE_UNLOCK; + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "locale='%s' cp=%s\n", locale, retval)); break; @@ -6990,7 +7406,8 @@ S_give_perl_locale_control(pTHX_ # endif # if ! defined(USE_THREAD_SAFE_LOCALE) \ - && ! defined(USE_POSIX_2008_LOCALE) + && ! defined(USE_POSIX_2008_LOCALE) \ + && ! defined(EMULATE_THREAD_SAFE_LOCALES) # if defined(LC_ALL) PERL_UNUSED_ARG(lc_all_string); # else @@ -7011,6 +7428,13 @@ S_give_perl_locale_control(pTHX_ } # endif +# endif +# if defined(USE_LOCALE) && ! defined(USE_THREAD_SAFE_LOCALE) + + /* This routine converts Perl to controlling the locale, and we need to + * tell this before calling new_LC_ALL() */ + PL_perl_controls_locale = true; + # endif /* Finally, update our remaining records. 'true' => force recalculation. @@ -7240,6 +7664,11 @@ Perl_init_i18nl10n(pTHX_ int printwarn) PL_cur_LC_ALL = savepv("C"); # endif +# ifdef EMULATE_THREAD_SAFE_LOCALES + for (unsigned int i = 0; i < LC_ALL_INDEX_; i++) { + PL_restore_locale[i] = NULL; + } +# endif # if ! defined(PERL_LC_ALL_USES_NAME_VALUE_PAIRS) && defined(LC_ALL) LOCALE_LOCK; @@ -8275,12 +8704,16 @@ Perl_mem_collxfrm_(pTHX_ const char *input_string, /* Then the transformation of the input. We loop until successful, or we * give up */ for (;;) { + LC_CTYPE_LOCK; + LC_COLLATE_LOCK; errno = 0; *xlen = my_strxfrm(xbuf + COLLXFRM_HDR_LEN, s, xAlloc - COLLXFRM_HDR_LEN); + LC_COLLATE_UNLOCK; + LC_CTYPE_UNLOCK; /* If the transformed string occupies less space than we told strxfrm() * was available, it means it transformed the whole string. */ @@ -8702,13 +9135,20 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) * qualifies), these yield the correct one */ #if defined(USE_LOCALE_CTYPE) # define WHICH_LC_INDEX LC_CTYPE_INDEX_ +# define WHICH_LOCK LC_CTYPE_LOCK +# define WHICH_UNLOCK LC_CTYPE_UNLOCK #elif defined(USE_LOCALE_MESSAGES) # define WHICH_LC_INDEX LC_MESSAGES_INDEX_ +# define WHICH_LOCK LC_MESSAGES_LOCK +# define WHICH_UNLOCK LC_MESSAGES_UNLOCK #endif /*===========================================================================*/ /* First set of implementations, when have strerror_l() */ +#define MY_STRERROR_LOCK LC_MESSAGES_LOCK +#define MY_STRERROR_UNLOCK LC_MESSAGES_UNLOCK + #if defined(USE_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L) # if ! defined(USE_LOCALE_CTYPE) && ! defined(USE_LOCALE_MESSAGES) @@ -8807,7 +9247,12 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) DEBUG_STRERROR_ENTER(errnum, 0); + gwLOCALE_LOCK; + const char *errstr = savepv(Strerror(errnum)); + + gwLOCALE_UNLOCK; + *utf8ness = UTF8NESS_IMMATERIAL; DEBUG_STRERROR_RETURN(errstr, utf8ness); @@ -8832,7 +9277,12 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) const char *errstr; if (IN_LC(categories[WHICH_LC_INDEX])) { + WHICH_LOCK; + gwLOCALE_LOCK; errstr = savepv(Strerror(errnum)); + gwLOCALE_UNLOCK; + WHICH_UNLOCK; + *utf8ness = get_locale_string_utf8ness_i(errstr, LOCALE_UTF8NESS_UNKNOWN, NULL, WHICH_LC_INDEX); @@ -8843,7 +9293,11 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) const char * orig_locale = toggle_locale_i(WHICH_LC_INDEX, "C"); + WHICH_LOCK; + gwLOCALE_LOCK; errstr = savepv(Strerror(errnum)); + gwLOCALE_UNLOCK; + WHICH_UNLOCK; restore_toggled_locale_i(WHICH_LC_INDEX, orig_locale); @@ -8878,11 +9332,18 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) LOCALE_LOCK; + LC_MESSAGES_LOCK; + LC_CTYPE_LOCK; + gwLOCALE_LOCK; + const char* orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, desired_locale); const char* orig_MESSAGES_locale = toggle_locale_c(LC_MESSAGES, desired_locale); const char *errstr = savepv(Strerror(errnum)); + gwLOCALE_UNLOCK; + LC_CTYPE_UNLOCK; + LC_MESSAGES_UNLOCK; restore_toggled_locale_c(LC_MESSAGES, orig_MESSAGES_locale); restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale); @@ -8923,10 +9384,21 @@ To return to Perl control, and restart the gotcha prevention services, call C>. Behavior is undefined for any pure Perl code that executes while the switch is in effect. -The global locale and the per-thread locales are independent. As long as just -one thread converts to the global locale, everything works smoothly. But if -more than one does, they can easily interfere with each other, and races are -likely. On Windows systems prior to Visual Studio 15 (at which point Microsoft +On perls without per thread-locales, there is only the global locale; so +calling this function effectively just disables the gotcha prevention services. + +On perls with per-thread locales, they are independent from the global locale. +As long as just one thread converts to the global locale, everything works +smoothly. But if more than one does, they can easily interfere with each +other, and races are likely. + +On perls that emulate per-thread locales, there is, behind the scenes, actually +just the global locale. Unlike the native per-thread locale platforms, any +thread that calls this function is likely to have races with the remaining +threads when calling locale-dependent libc functions, unless appropriate +mutexes have been inserted. + +On Windows systems prior to Visual Studio 15 (at which point Microsoft fixed a bug), races can occur (even if only one thread has been converted to the global locale), but only if you use the following operations: @@ -8977,6 +9449,9 @@ handle all cases of single- vs multi-thread, POSIX 2008-supported or not. freelocale(old_locale); \ } \ } STMT_END +#elif defined(EMULATE_THREAD_SAFE_LOCALES) +# define CHANGE_SYSTEM_LOCALE_TO_GLOBAL \ + PL_perl_controls_locale = false #else # define CHANGE_SYSTEM_LOCALE_TO_GLOBAL #endif @@ -9006,7 +9481,7 @@ Perl_switch_to_global_locale(pTHX) # else - const bool perl_controls = false; + const bool perl_controls = PL_perl_controls_locale; # endif @@ -9015,7 +9490,12 @@ Perl_switch_to_global_locale(pTHX) return; } -# ifdef LC_ALL +# if ! defined(USE_POSIX_2008_LOCALE) && ! defined(USE_THREAD_SAFE_LOCALE) + + PL_perl_controls_locale = false; + +# else +# ifdef LC_ALL const char * thread_locale = calculate_LC_ALL_string(NULL, EXTERNAL_FORMAT_FOR_SET, @@ -9024,7 +9504,7 @@ Perl_switch_to_global_locale(pTHX) CHANGE_SYSTEM_LOCALE_TO_GLOBAL; posix_setlocale(LC_ALL, thread_locale); -# else /* Must be USE_POSIX_2008_LOCALE) */ +# else const char * cur_thread_locales[LC_ALL_INDEX_]; @@ -9042,6 +9522,7 @@ Perl_switch_to_global_locale(pTHX) } POSIX_SETLOCALE_UNLOCK; +# endif # endif # ifdef USE_LOCALE_NUMERIC @@ -9062,9 +9543,9 @@ Perl_switch_to_global_locale(pTHX) This function copies the state of the program global locale into the calling thread, and converts that thread to using per-thread locales, if it wasn't -already, and the platform supports them. The LC_NUMERIC locale is toggled into -the standard state (using the C locale's conventions), if not within the -lexical scope of S>. +already, and the platform supports them or perl is Configured to emulate them. +The LC_NUMERIC locale is toggled into the standard state (using the C locale's +conventions), if not within the lexical scope of S>. Perl will now consider itself to have control of the locale. @@ -9085,8 +9566,11 @@ multi-threaded systems that don't have multi-thread safe locale operations. Using the libc L> function should be avoided. Nevertheless, certain non-Perl libraries called from XS, do call it, and their behavior may not be able to be changed. This function, along with -C>, can be used to get seamless behavior in these -circumstances, as long as only one thread is involved. +C>, can be used to get seamless behavior on systems +with per-thread locale handling, as long as only one thread is involved. +To get seamless behavior on platforms where perl emulates per-thread locale +handling, mutexes would have to be added to wrap that thread's locale-dependent +functions. If the library has an option to turn off its locale manipulation, doing that is preferable to using this mechanism. C is such a library. @@ -9107,9 +9591,18 @@ Perl_sync_locale(pTHX) #else - bool was_in_global = TRUE; + /* First, switch to the global locale, and note if we were already there */ + + bool was_in_global; -# ifdef USE_THREAD_SAFE_LOCALE +# if ! defined(USE_THREAD_SAFE_LOCALE) + + /* When not using thread-safe locales, as far as the system is concerned, + * there only is the global locale. */ + + was_in_global = PL_perl_controls_locale; + +# else /* Below is thread-safe */ # if defined(WIN32) int config_return = _configthreadlocale(_DISABLE_PER_THREAD_LOCALE); @@ -9118,7 +9611,7 @@ Perl_sync_locale(pTHX) } was_in_global = (config_return == _DISABLE_PER_THREAD_LOCALE); -# elif defined(USE_POSIX_2008_LOCALE) +# elif defined(USE_POSIX_2008_LOCALE) /* Thread-safe POSIX 2008 */ was_in_global = (LC_GLOBAL_LOCALE == uselocale(LC_GLOBAL_LOCALE)); @@ -9232,7 +9725,10 @@ Perl_switch_locale_context(pTHX) * * There are two implementations where this is an issue. For the other * implementations, it doesn't matter because libc is using global values - * that all threads know about. + * that all threads know about. This is true even for the thread-safe + * emulation, as everything to libc is still a global, and we use + * PL_curlocales (for example) to know what the correct locale(s) should + * be, and this variable is under control of aTHX. * * The two implementations are where libc keeps thread-specific information * on its own. These are @@ -9270,10 +9766,20 @@ Perl_switch_locale_context(pTHX) # elif defined(WIN32) - if (! bool_setlocale_c(LC_ALL, PL_cur_LC_ALL)) { - locale_panic_(Perl_form(aTHX_ "Can't setlocale(%s)", PL_cur_LC_ALL)); + if (! PL_perl_controls_locale) { + return; } + if (_configthreadlocale(_ENABLE_PER_THREAD_LOCALE) == -1) { + locale_panic_("_configthreadlocale returned an error"); + } + + const char * lc_all_copy = savepv(PL_cur_LC_ALL); + if (! bool_setlocale_c(LC_ALL, lc_all_copy)) { + locale_panic_(Perl_form(aTHX_ "Can't setlocale(%s)", lc_all_copy)); + } + Safefree(lc_all_copy); + # endif } @@ -9284,8 +9790,12 @@ void Perl_thread_locale_init(pTHX) { -#ifdef USE_THREAD_SAFE_LOCALE -# ifdef USE_POSIX_2008_LOCALE +#if defined(USE_LOCALE) +# ifndef USE_THREAD_SAFE_LOCALE + + PL_perl_controls_locale = TRUE; + +# elif defined(USE_POSIX_2008_LOCALE) /* Called from a thread on startup. * @@ -9312,12 +9822,18 @@ Perl_thread_locale_init(pTHX) PL_cur_locale_obj = PL_C_locale_obj; # endif -# elif defined(WIN32) +# else +# ifdef WIN32 /* On Windows, make sure new thread has per-thread locales enabled */ if (_configthreadlocale(_ENABLE_PER_THREAD_LOCALE) == -1) { locale_panic_("_configthreadlocale returned an error"); } + + PL_perl_controls_locale = true; + +# endif + void_setlocale_c(LC_ALL, "C"); # endif diff --git a/makedef.pl b/makedef.pl index 1bdc4ebe0815..2d2c8c613c04 100644 --- a/makedef.pl +++ b/makedef.pl @@ -68,6 +68,7 @@ BEGIN } use constant PLATFORM => $ARGS{PLATFORM}; +use constant LIBC => uc $Config{libc} =~ s/^-l//r; # This makes us able to use, e.g., $define{WIN32}, so you don't have to # remember what things came from %ARGS. @@ -175,8 +176,8 @@ BEGIN if ($define{USE_LOCALE_THREADS} && ! $define{NO_THREAD_SAFE_LOCALE}) { if ( $define{USE_POSIX_2008_LOCALE} - || ($define{WIN32} && ( $cctype !~ /\D/ - && $cctype >= 80))) + || ($define{WIN32} && ( ($cctype eq "GCC" && LIBC eq "UCRT") + || ($cctype !~ /\D/ && $cctype >= 80)))) { $define{USE_THREAD_SAFE_LOCALE} = 1; } @@ -482,6 +483,24 @@ sub readvar { ); } +unless ($define{USE_LOCALE} && ( $define{WIN32} + || ! $define{USE_THREAD_SAFE_LOCALE})) +{ + ++$skip{$_} foreach qw( + PL_perl_controls_locale + ); +} + +unless ($define{EMULATE_THREAD_SAFE_LOCALES}) +{ + ++$skip{$_} foreach qw( + PL_restore_locale + PL_restore_locale_depth + Perl_category_lock_i + Perl_category_unlock_i + ); +} + unless ($define{USE_PERL_SWITCH_LOCALE_CONTEXT}) { ++$skip{$_} foreach qw( diff --git a/mg.c b/mg.c index 4b6d4ab62266..82c6299dd26e 100644 --- a/mg.c +++ b/mg.c @@ -1096,12 +1096,11 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) else if (strEQ(remaining, "AFE_LOCALES")) { #if ! defined(USE_ITHREADS) || defined(USE_THREAD_SAFE_LOCALE) - sv_setuv(sv, (UV) 1); - +#elif defined(EMULATE_THREAD_SAFE_LOCALES) + sv_setuv(sv, (UV) 2); #else sv_setuv(sv, (UV) 0); - #endif } diff --git a/perl.c b/perl.c index 6d1752203d14..298350d1c5bb 100644 --- a/perl.c +++ b/perl.c @@ -1126,6 +1126,12 @@ perl_destruct(pTHXx) PL_curlocales[i] = NULL; } #endif +#if defined(EMULATE_THREAD_SAFE_LOCALES) + for (i = 0; i < (int) C_ARRAY_LENGTH(PL_restore_locale); i++) { + Safefree(PL_restore_locale[i]); + PL_restore_locale[i] = NULL; + } +#endif #if defined(USE_POSIX_2008_LOCALE) && defined(USE_THREADS) { /* This also makes sure we aren't using a locale object that gets freed @@ -2076,7 +2082,8 @@ S_Internals_V(pTHX_ CV *cv) # ifdef USE_SITECUSTOMIZE " USE_SITECUSTOMIZE" # endif -# ifdef USE_THREAD_SAFE_LOCALE +# if defined(USE_THREAD_SAFE_LOCALE) \ + || defined(EMULATE_THREAD_SAFE_LOCALES) " USE_THREAD_SAFE_LOCALE" # endif ""; /* keep this on a line by itself, WITH the empty string */ diff --git a/perl.h b/perl.h index 92e14cf5e9d6..d1aac447387b 100644 --- a/perl.h +++ b/perl.h @@ -1281,8 +1281,11 @@ typedef enum { # endif /* POSIX 2008 has no means of finding out the current locale without a - * querylocale; so must keep track of it ourselves */ -# if (defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE)) + * querylocale; so must keep track of it ourselves. And for thread-safe + * emulation, we keep track because the system doesn't have per-thread + * information */ +# if (defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE)) \ + || defined(EMULATE_THREAD_SAFE_LOCALES) # define USE_PL_CURLOCALES # endif @@ -7303,19 +7306,73 @@ the plain locale pragma without a parameter (S>) is in effect. #define LOCALE_READ_LOCK LOCALE_LOCK #define LOCALE_READ_UNLOCK LOCALE_UNLOCK +#ifdef EMULATE_THREAD_SAFE_LOCALES -#ifndef LC_NUMERIC_LOCK -# define LC_NUMERIC_LOCK(cond) NOOP -# define LC_NUMERIC_UNLOCK NOOP -#endif +/* The macros for the individual categories are defined in terms of +* these two sets. */ +# define LC_CATEGORY_LOCK_i_(i) category_lock_i(i, __FILE__, __LINE__) +# define LC_CATEGORY_UNLOCK_i_(i) category_unlock_i(i, __FILE__, __LINE__) +# define LC_CATEGORY_LOCK_c_(cat) LC_CATEGORY_LOCK_i_(cat##_INDEX_) +# define LC_CATEGORY_UNLOCK_c_(cat) LC_CATEGORY_UNLOCK_i_(cat##_INDEX_) + +# ifdef USE_LOCALE_COLLATE +# define LC_COLLATE_LOCK LC_CATEGORY_LOCK_c_(LC_COLLATE) +# define LC_COLLATE_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_COLLATE) +# endif +# ifdef USE_LOCALE_CTYPE +# define LC_CTYPE_LOCK LC_CATEGORY_LOCK_c_(LC_CTYPE) +# define LC_CTYPE_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_CTYPE) +# endif +# ifdef USE_LOCALE_MESSAGES +# define LC_MESSAGES_LOCK LC_CATEGORY_LOCK_c_(LC_MESSAGES) +# define LC_MESSAGES_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_MESSAGES) +# endif +# ifdef USE_LOCALE_MONETARY +# define LC_MONETARY_LOCK LC_CATEGORY_LOCK_c_(LC_MONETARY) +# define LC_MONETARY_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_MONETARY) +# endif +# ifdef USE_LOCALE_NUMERIC + + /* This is the one category we may already have defined. It needs to be + * overwritten. We ignore the parameter in this case, since in this + * thread-safe emulation, all the threads are jumbled together */ +# undef LC_NUMERIC_LOCK +# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \ + LC_CATEGORY_LOCK_c_(LC_NUMERIC) +# undef LC_NUMERIC_UNLOCK +# define LC_NUMERIC_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_NUMERIC) +# endif +# ifdef USE_LOCALE_TIME +# define LC_TIME_LOCK LC_CATEGORY_LOCK_c_(LC_TIME) +# define LC_TIME_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_TIME) +# endif +#endif + +/* Below are lock definitions for individual functions that Perl uses. All + * such need to be in terms of the locale category(ies) that affect them, plus + * gwLOCALE_LOCK if they read/write global space. It is best to create a + * definition for each function to hide those details, and allow it to be more + * easily maintained. */ +#ifdef LC_CTYPE_LOCK +# define MBLEN_LOCK_ LC_CTYPE_LOCK +# define MBLEN_UNLOCK_ LC_CTYPE_UNLOCK +# define MBRLEN_LOCK_ LC_CTYPE_LOCK +# define MBRLEN_UNLOCK_ LC_CTYPE_UNLOCK +# define MBTOWC_LOCK_ LC_CTYPE_LOCK +# define MBTOWC_UNLOCK_ LC_CTYPE_UNLOCK +# define MBRTOWC_LOCK_ LC_CTYPE_LOCK +# define MBRTOWC_UNLOCK_ LC_CTYPE_UNLOCK +# define WCTOMB_LOCK_ LC_CTYPE_LOCK +# define WCTOMB_UNLOCK_ LC_CTYPE_UNLOCK +# define WCRTOMB_LOCK_ LC_CTYPE_LOCK +# define WCRTOMB_UNLOCK_ LC_CTYPE_UNLOCK +#else /* These non-reentrant versions use global space */ # define MBLEN_LOCK_ gwLOCALE_LOCK # define MBLEN_UNLOCK_ gwLOCALE_UNLOCK - # define MBTOWC_LOCK_ gwLOCALE_LOCK # define MBTOWC_UNLOCK_ gwLOCALE_UNLOCK - # define WCTOMB_LOCK_ gwLOCALE_LOCK # define WCTOMB_UNLOCK_ gwLOCALE_UNLOCK @@ -7329,13 +7386,46 @@ the plain locale pragma without a parameter (S>) is in effect. # define WCRTOMB_LOCK_ NOOP # define WCRTOMB_UNLOCK_ NOOP +# define LC_CTYPE_LOCK LOCALE_LOCK +# define LC_CTYPE_UNLOCK LOCALE_UNLOCK +#endif + +#if ! defined(LC_COLLATE_LOCK) # define LC_COLLATE_LOCK LOCALE_LOCK # define LC_COLLATE_UNLOCK LOCALE_UNLOCK +#endif + +#if ! defined(LC_MESSAGES_LOCK) +# define LC_MESSAGES_LOCK LOCALE_LOCK +# define LC_MESSAGES_UNLOCK LOCALE_UNLOCK +#endif + +#if ! defined(LC_MONETARY_LOCK) +# define LC_MONETARY_LOCK LOCALE_LOCK +# define LC_MONETARY_UNLOCK LOCALE_UNLOCK +#endif +#ifdef LC_TIME_LOCK +# define STRFTIME_LOCK /* Needs one exclusive lock */ \ + STMT_START { LC_CTYPE_LOCK; LC_TIME_LOCK; ENV_READ_LOCK; \ + } STMT_END +# define STRFTIME_UNLOCK \ + STMT_START { ENV_READ_UNLOCK; LC_TIME_UNLOCK; LC_CTYPE_UNLOCK; \ + } STMT_END +#else # define STRFTIME_LOCK ENV_LOCK # define STRFTIME_UNLOCK ENV_UNLOCK +# define LC_TIME_LOCK LOCALE_LOCK +# define LC_TIME_UNLOCK LOCALE_UNLOCK +#endif + #ifdef USE_LOCALE_NUMERIC +# ifndef LC_NUMERIC_LOCK +# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \ + LOCALE_LOCK_(cond_to_panic_if_already_locked) +# define LC_NUMERIC_UNLOCK LOCALE_UNLOCK_ +# endif /* These macros are for toggling between the underlying locale (UNDERLYING or * LOCAL) and the C locale (STANDARD). (Actually we don't have to use the C @@ -7659,6 +7749,11 @@ cannot have changed since the precalculation. #endif /* !USE_LOCALE_NUMERIC */ +#ifndef LC_NUMERIC_LOCK +# define LC_NUMERIC_LOCK(cond) NOOP +# define LC_NUMERIC_UNLOCK NOOP +#endif + #ifdef USE_LOCALE_THREADS # define ENV_LOCK PERL_WRITE_LOCK(&PL_env_mutex) # define ENV_UNLOCK PERL_WRITE_UNLOCK(&PL_env_mutex) diff --git a/pod/perlvar.pod b/pod/perlvar.pod index a2b0abfbdcdb..f6ef208dfca8 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -2547,10 +2547,11 @@ This variable was added in Perl v5.8.0. =item ${^SAFE_LOCALES} X<${^SAFE_LOCALES}> -Reflects if safe locale operations are available to this perl (when the -value is 1) or not (the value is 0). This variable is always 1 if the -perl has been compiled without threads. It is also 1 if this perl is -using thread-safe locale operations. Note that an individual thread may +Reflects if safe locale operations are available to this perl or not. +This variable is always 1 if the perl has been compiled without threads. +It is also 1 if this perl is using thread-safe locale operations. And +it is 2 if this perl is emulating thread-safe locale operations. Note +that an individual thread may choose to use the global locale (generally unsafe) by calling L. This variable currently is still set to 1 in such threads. diff --git a/proto.h b/proto.h index 27d4e7143d1d..fc147ffd23df 100644 --- a/proto.h +++ b/proto.h @@ -5407,6 +5407,24 @@ Perl_variant_byte_number(PERL_UINTMAX_T word) # endif #endif +#if defined(EMULATE_THREAD_SAFE_LOCALES) +PERL_CALLCONV void +Perl_category_lock_i(pTHX_ const locale_category_index index, const char *file, const line_t caller_line); +# define PERL_ARGS_ASSERT_CATEGORY_LOCK_I \ + assert(file) + +PERL_CALLCONV void +Perl_category_unlock_i(pTHX_ const locale_category_index index, const char *file, const line_t caller_line); +# define PERL_ARGS_ASSERT_CATEGORY_UNLOCK_I \ + assert(file) + +# if !defined(PERL_NO_INLINE_FUNCTIONS) +PERL_STATIC_INLINE int +Perl_posix_LC_foo_(pTHX_ const int c, const U8 classnum); +# define PERL_ARGS_ASSERT_POSIX_LC_FOO_ + +# endif +#endif /* defined(EMULATE_THREAD_SAFE_LOCALES) */ #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) PERL_CALLCONV I32 Perl_my_chsize(pTHX_ int fd, Off_t length) @@ -7063,6 +7081,14 @@ S_my_setlocale_debug_string_i(pTHX_ const locale_category_index cat_index, const __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_MY_SETLOCALE_DEBUG_STRING_I +# endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) || \ + ( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) ) +STATIC void +S_update_PL_curlocales_i(pTHX_ const locale_category_index index, const char *new_locale, const line_t caller_line); +# define PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I \ + assert(new_locale) + # endif # if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) STATIC const char * @@ -7150,16 +7176,9 @@ STATIC locale_t S_use_curlocale_scratch(pTHX); # define PERL_ARGS_ASSERT_USE_CURLOCALE_SCRATCH -# if !defined(USE_QUERYLOCALE) -STATIC void -S_update_PL_curlocales_i(pTHX_ const locale_category_index index, const char *new_locale, const line_t caller_line); -# define PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I \ - assert(new_locale) - -# endif -# elif defined(USE_LOCALE_THREADS) && \ - !defined(USE_THREAD_SAFE_LOCALE) && \ - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) /* && +# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \ + defined(USE_LOCALE_THREADS) && \ + !defined(USE_THREAD_SAFE_LOCALE) /* && !defined(USE_POSIX_2008_LOCALE) */ STATIC bool S_less_dicey_bool_setlocale_r(pTHX_ const int cat, const char *locale); @@ -7170,10 +7189,10 @@ STATIC const char * S_less_dicey_setlocale_r(pTHX_ const int category, const char *locale); # define PERL_ARGS_ASSERT_LESS_DICEY_SETLOCALE_R -# endif /* defined(USE_LOCALE_THREADS) && +# endif /* !defined(EMULATE_THREAD_SAFE_LOCALES) && + defined(USE_LOCALE_THREADS) && !defined(USE_POSIX_2008_LOCALE) && - !defined(USE_THREAD_SAFE_LOCALE) && - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) */ + !defined(USE_THREAD_SAFE_LOCALE) */ # if defined(WIN32) || defined(WIN32_USE_FAKE_OLD_MINGW_LOCALES) STATIC wchar_t * S_Win_byte_string_to_wstring(const UINT code_page, const char *byte_string); diff --git a/sv.c b/sv.c index 91b64d528cef..e1556f4dbbf2 100644 --- a/sv.c +++ b/sv.c @@ -16045,6 +16045,10 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_subname = sv_dup_inc(proto_perl->Isubname, param); +/* The new locale starts in the global C locale. */ +#if defined(USE_LOCALE) && (defined(WIN32) || ! defined(USE_THREAD_SAFE_LOCALE)) + PL_perl_controls_locale = false; +#endif #ifdef USE_PL_CURLOCALES for (i = 0; i < (int) C_ARRAY_LENGTH(PL_curlocales); i++) { PL_curlocales[i] = SAVEPV("C"); @@ -16053,6 +16057,12 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, #ifdef USE_PL_CUR_LC_ALL PL_cur_LC_ALL = SAVEPV("C"); #endif +# ifdef EMULATE_THREAD_SAFE_LOCALES + for (i = 0; i < (int) C_ARRAY_LENGTH(PL_restore_locale); i++) { + PL_restore_locale[i] = NULL; + PL_restore_locale_depth[i] = 0; + } +#endif #ifdef USE_LOCALE_CTYPE Copy(PL_fold, PL_fold_locale, 256, U8);