diff --git a/embed.fnc b/embed.fnc index 4c5a5d576b5b..9db0d9999e34 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1593,6 +1593,7 @@ ApdO |HV* |get_hv |NN const char *name|I32 flags ApdO |CV* |get_cv |NN const char* name|I32 flags Apd |CV* |get_cvn_flags |NN const char* name|STRLEN len|I32 flags ATdo |const char*|Perl_setlocale|const int category|NULLOK const char* locale +ATdo |HV * |Perl_localeconv #if defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H) ATdo |const char*|Perl_langinfo|const nl_item item #else @@ -3224,6 +3225,13 @@ S |unsigned|get_locale_string_utf8ness_i \ |const unsigned cat_index \ |NULLOK const char * string \ |const int known_utf8 +# if (defined(HAS_LOCALECONV) || defined(HAS_LOCALECONV_L)) \ + && (defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC)) +S |HV * |my_localeconv +S |HV * |populate_localeconv|NN const struct lconv *lcbuf \ + |const int numeric_locale_is_utf8 \ + |const int monetary_locale_is_utf8 +# endif # if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) S |const char*|my_langinfo_i|const nl_item item \ |const unsigned int cat_index \ diff --git a/embed.h b/embed.h index 403b1351bfb5..ddd89a4a7ebb 100644 --- a/embed.h +++ b/embed.h @@ -1572,6 +1572,14 @@ # if !defined(WIN32) #define do_exec3(a,b,c) Perl_do_exec3(aTHX_ a,b,c) # endif +# if (defined(HAS_LOCALECONV) || defined(HAS_LOCALECONV_L)) && (defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC)) +# if defined(PERL_IN_LOCALE_C) +# if defined(USE_LOCALE) +#define my_localeconv() S_my_localeconv(aTHX) +#define populate_localeconv(a,b,c) S_populate_localeconv(aTHX_ a,b,c) +# endif +# endif +# endif # if 0 /* Not currently used, but may be needed in the future */ # if defined(PERL_IN_UTF8_C) #define warn_on_first_deprecated_use(a,b,c,d,e) S_warn_on_first_deprecated_use(aTHX_ a,b,c,d,e) diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index a048324d56f0..3afec8dec2f5 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -1572,80 +1572,6 @@ END_EXTERN_C #if ! defined(HAS_LOCALECONV) && ! defined(HAS_LOCALECONV_L) # define localeconv() not_here("localeconv") -#else -struct lconv_offset { - const char *name; - size_t offset; -}; - -/* Create e.g., - {"thousands_sep", STRUCT_OFFSET(struct lconv, thousands_sep)}, - */ -# define LCONV_ENTRY(name) \ - {STRINGIFY(name), STRUCT_OFFSET(struct lconv, name)} - -static const struct lconv_offset lconv_strings[] = { - -# ifdef USE_LOCALE_NUMERIC - LCONV_ENTRY(decimal_point), - LCONV_ENTRY(thousands_sep), -# ifndef NO_LOCALECONV_GROUPING - LCONV_ENTRY(grouping), -# endif -# endif -# ifdef USE_LOCALE_MONETARY - LCONV_ENTRY(int_curr_symbol), - LCONV_ENTRY(currency_symbol), - LCONV_ENTRY(mon_decimal_point), -# ifndef NO_LOCALECONV_MON_THOUSANDS_SEP - LCONV_ENTRY(mon_thousands_sep), -# endif -# ifndef NO_LOCALECONV_MON_GROUPING - LCONV_ENTRY(mon_grouping), -# endif - LCONV_ENTRY(positive_sign), - LCONV_ENTRY(negative_sign), -# endif - {NULL, 0} -}; - -# ifdef USE_LOCALE_NUMERIC - - /* The Linux man pages say these are the field names for the structure - * components that are LC_NUMERIC; the rest being LC_MONETARY */ -# define isLC_NUMERIC_STRING(name) ( strEQ(name, "decimal_point") \ - || strEQ(name, "thousands_sep") \ - \ - /* There should be no harm done \ - * checking for this, even if \ - * NO_LOCALECONV_GROUPING */ \ - || strEQ(name, "grouping")) -# else -# define isLC_NUMERIC_STRING(name) (0) -# endif - -static const struct lconv_offset lconv_integers[] = { -# ifdef USE_LOCALE_MONETARY - LCONV_ENTRY(int_frac_digits), - LCONV_ENTRY(frac_digits), - LCONV_ENTRY(p_cs_precedes), - LCONV_ENTRY(p_sep_by_space), - LCONV_ENTRY(n_cs_precedes), - LCONV_ENTRY(n_sep_by_space), - LCONV_ENTRY(p_sign_posn), - LCONV_ENTRY(n_sign_posn), -# ifdef HAS_LC_MONETARY_2008 - LCONV_ENTRY(int_p_cs_precedes), - LCONV_ENTRY(int_p_sep_by_space), - LCONV_ENTRY(int_n_cs_precedes), - LCONV_ENTRY(int_n_sep_by_space), - LCONV_ENTRY(int_p_sign_posn), - LCONV_ENTRY(int_n_sign_posn), -# endif -# endif - {NULL, 0} -}; - #endif /* HAS_LOCALECONV */ #ifdef HAS_LONG_DOUBLE @@ -2143,136 +2069,7 @@ localeconv() #ifndef HAS_LOCALECONV localeconv(); /* A stub to call not_here(). */ #else - struct lconv *lcbuf; -# if defined(USE_ITHREADS) \ - && defined(USE_POSIX_2008_LOCALE) \ - && defined(HAS_LOCALECONV_L) /* Prefer this thread-safe version */ - bool do_free = FALSE; - locale_t cur = NULL; -# elif defined(TS_W32_BROKEN_LOCALECONV) - const char * save_global; - const char * save_thread; -# endif - DECLARATION_FOR_LC_NUMERIC_MANIPULATION; - - /* localeconv() deals with both LC_NUMERIC and LC_MONETARY, but - * LC_MONETARY is already in the correct locale */ -# ifdef USE_LOCALE_MONETARY - - const bool is_monetary_utf8 = _is_cur_LC_category_utf8(LC_MONETARY); -# endif -# ifdef USE_LOCALE_NUMERIC - - bool is_numeric_utf8; - - STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); - - is_numeric_utf8 = _is_cur_LC_category_utf8(LC_NUMERIC); -# endif - - RETVAL = newHV(); - sv_2mortal((SV*)RETVAL); -# if defined(USE_ITHREADS) \ - && defined(USE_POSIX_2008_LOCALE) \ - && defined(HAS_LOCALECONV_L) - - cur = uselocale((locale_t) 0); - if (cur == LC_GLOBAL_LOCALE) { - cur = duplocale(LC_GLOBAL_LOCALE); - do_free = TRUE; - } - - lcbuf = localeconv_l(cur); -# else - LOCALECONV_LOCK; /* Prevent interference with other threads using - localeconv() */ -# ifdef TS_W32_BROKEN_LOCALECONV - /* This is a workaround for a Windows bug prior to VS 15, in which - * localeconv only looks at the global locale. We toggle to the global - * locale; populate the return; then toggle back. We have to use - * LC_ALL instead of the individual ones because of another bug in - * Windows */ - - save_thread = savepv(Perl_setlocale(LC_NUMERIC, NULL)); - - _configthreadlocale(_DISABLE_PER_THREAD_LOCALE); - - save_global = savepv(Perl_setlocale(LC_ALL, NULL)); - - Perl_setlocale(LC_ALL, save_thread); -# endif - lcbuf = localeconv(); -# endif - if (lcbuf) { - const struct lconv_offset *strings = lconv_strings; - const struct lconv_offset *integers = lconv_integers; - const char *ptr = (const char *) lcbuf; - - while (strings->name) { - /* This string may be controlled by either LC_NUMERIC, or - * LC_MONETARY */ - const bool is_utf8_locale = -# if defined(USE_LOCALE_NUMERIC) && defined(USE_LOCALE_MONETARY) - (isLC_NUMERIC_STRING(strings->name)) - ? is_numeric_utf8 - : is_monetary_utf8; -# elif defined(USE_LOCALE_NUMERIC) - is_numeric_utf8; -# elif defined(USE_LOCALE_MONETARY) - is_monetary_utf8; -# else - FALSE; -# endif - - const char *value = *((const char **)(ptr + strings->offset)); - - if (value && *value) { - const STRLEN value_len = strlen(value); - - /* We mark it as UTF-8 if a utf8 locale and is valid and - * variant under UTF-8 */ - const bool is_utf8 = is_utf8_locale - && is_utf8_non_invariant_string( - (U8*) value, - value_len); - (void) hv_store(RETVAL, - strings->name, - strlen(strings->name), - newSVpvn_utf8(value, value_len, is_utf8), - 0); - } - strings++; - } - - while (integers->name) { - const char value = *((const char *)(ptr + integers->offset)); - - if (value != CHAR_MAX) - (void) hv_store(RETVAL, integers->name, - strlen(integers->name), newSViv(value), 0); - integers++; - } - } -# if defined(USE_ITHREADS) \ - && defined(USE_POSIX_2008_LOCALE) \ - && defined(HAS_LOCALECONV_L) - if (do_free) { - freelocale(cur); - } -# else -# ifdef TS_W32_BROKEN_LOCALECONV - Perl_setlocale(LC_ALL, save_global); - - _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); - - Perl_setlocale(LC_ALL, save_thread); - - Safefree(save_global); - Safefree(save_thread); -# endif - LOCALECONV_UNLOCK; -# endif - RESTORE_LC_NUMERIC(); + RETVAL = Perl_localeconv(); #endif /* HAS_LOCALECONV */ OUTPUT: RETVAL diff --git a/locale.c b/locale.c index 3032a10d8768..22d7026c9232 100644 --- a/locale.c +++ b/locale.c @@ -2900,6 +2900,376 @@ Perl_mbtowc_(pTHX_ const wchar_t * pwc, const char * s, const Size_t len) } +/* +=for apidoc Perl_localeconv + +This is a thread-safe version of the libc L. It is the same as +L (returning a hash of the C +fields), but directly callable from XS code. + +=cut +*/ + +HV * +Perl_localeconv() +{ + dTHX; + +#if ! defined(HAS_SOME_LOCALECONV) \ + || (! defined(USE_LOCALE_MONETARY) && ! defined(USE_LOCALE_NUMERIC)) + + return newHV(); + +#else + + return my_localeconv(); + +#endif + +} + +#if defined(HAS_SOME_LOCALECONV) \ + && (defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC)) + +HV * +S_my_localeconv(pTHX) +{ + HV * retval; + int numeric_locale_is_utf8 = UTF8NESS_UNKNOWN; + int monetary_locale_is_utf8 = UTF8NESS_UNKNOWN; + HV * (*copy_localeconv)(pTHX_ const struct lconv *, int, int); + + /* A thread-safe locale_conv(). The locking mechanisms vary greatly + * depending on platform capabilities. They all share this common set up + * code for the function, and then conditional compilations choose one of + * several terminations. + * + * The current use case is: + * Called from POSIX::locale_conv(). This returns lconv() copied to + * a hash, based on the current underlying locale. + * + * There is a helper function to accomplish this task. The + * function pointer just below is set to it, and it is called + * from each of the various implementations, in the middle of whatever + * necessary locking/locale swapping have been done. + * + * The reason for a function pointer is that a future commit will add a + * second use case, with a different function to implement it */ + + { + copy_localeconv = S_populate_localeconv; + +# ifdef USE_LOCALE_NUMERIC + + /* Get the UTF8ness of the locales now to avoid repeating this for each + * string returned by localeconv() */ + numeric_locale_is_utf8 = is_locale_utf8(PL_numeric_name); + +# endif +# ifdef USE_LOCALE_MONETARY + + monetary_locale_is_utf8 = is_locale_utf8(querylocale_c(LC_MONETARY)); + +# endif + + } + + PERL_ARGS_ASSERT_MY_LOCALECONV; +/*--------------------------------------------------------------------------*/ +/* Here, we are done with the common beginning of all the implementations of + * my_localeconv(). Below are the various terminations of the function (except + * the closing '}'. They are separated out because the preprocessor directives + * were making the simple logic hard to follow. Each implementation ends with + * the same few lines. khw decided to keep those separate because he thought + * it was clearer to the reader. + * + * The first distinct termination (of the above common code) are the + * implementations when we have locale_conv_l() and can use it. These are the + * simplest cases, without any locking needed. */ +# if defined(USE_POSIX_2008_LOCALE) && defined(HAS_LOCALECONV_L) + + /* And there are two sub-cases: First (by far the most common) is where we + * are compiled to pay attention to LC_NUMERIC */ +# ifdef USE_LOCALE_NUMERIC + + { + const locale_t cur = use_curlocale_scratch(); + locale_t with_numeric = duplocale(cur); + + /* Just create a new locale object with what we've got, but using the + * underlying LC_NUMERIC locale */ + with_numeric = newlocale(LC_NUMERIC_MASK, PL_numeric_name, with_numeric); + + retval = copy_localeconv(aTHX_ localeconv_l(with_numeric), + numeric_locale_is_utf8, + monetary_locale_is_utf8); + + freelocale(with_numeric); + + return retval; + } +/*--------------------------------------------------------------------------*/ +# else /* Below not paying attention to LC_NUMERIC */ + + { + const locale_t cur = use_curlocale_scratch((); + + retval = copy_localeconv(aTHX_ localeconv_l(cur), + numeric_locale_is_utf8, + monetary_locale_is_utf8); + return retval; + } +# endif /* Above, using lconv_l(); below plain lconv() */ +/*--------------------------------------------------------------------------*/ +# elif ! defined(TS_W32_BROKEN_LOCALECONV) /* Next is regular lconv() */ + + { + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + + /* There are so many locks because localeconv() deals with two + * categories, and returns in a single global static buffer. Some + * locks might be no-ops on this platform, but not others. We need to + * lock if any one isn't a no-op. */ + STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); + + LOCALECONV_LOCK; + retval = copy_localeconv(aTHX_ localeconv(), numeric_locale_is_utf8, + monetary_locale_is_utf8); + LOCALECONV_UNLOCK; + RESTORE_LC_NUMERIC(); + + return retval; + } +/*--------------------------------------------------------------------------*/ +# else /* defined(TS_W32_BROKEN_LOCALECONV) */ + + /* Last is a workaround for the broken localeconv() on Windows with + * thread-safe locales prior to VS 15. It looks at the global locale + * instead of the thread one. As a work-around, we toggle to the global + * locale; populate the return; then toggle back. We have to use LC_ALL + * instead of the individual categories because of another bug in Windows. + * + * This introduces a potential race with any other thread that has also + * converted to use the global locale, and doesn't protect its locale calls + * with mutexes. khw can't think of any reason for a thread to do so on + * Windows, as the locale API is the same regardless of thread-safety, except + * if the code is ported from working on another platform where there might + * be some reason to do this. But this is typically due to some + * alien-to-Perl library that thinks it owns locale setting. Such a + * library usn't likely to exist on Windows, so such an application is + * unlikely to be run on Windows + */ + { + const char * save_global; + const char * save_thread; + bool restore_per_thread = FALSE; + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + + /* Get to the proper per-thread locale state. (The NUMERIC operations + * are no-ops if not paying attention to LC_NUMERIC) */ + STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); + + /* Save the per-thread locale state */ + save_thread = savepv(querylocale_c(LC_ALL)); + + /* Change to the global locale, and note if we already were there */ + if (_configthreadlocale(_DISABLE_PER_THREAD_LOCALE) + != _DISABLE_PER_THREAD_LOCALE) + { + restore_per_thread = TRUE; + } + + /* Save the state of the global locale; then convert to our desired + * state. */ + save_global = savepv(querylocale_c(LC_ALL)); + void_setlocale_c(LC_ALL, save_thread); + + /* Safely stash the desired data */ + LOCALECONV_LOCK; + retval = copy_localeconv(aTHX_ localeconv(), numeric_locale_is_utf8, + monetary_locale_is_utf8); + LOCALECONV_UNLOCK; + + /* Restore the global locale's prior state */ + void_setlocale_c(LC_ALL, save_global); + + /* And back to per-thread locales */ + if (restore_per_thread) { + _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); + } + + /* Restore the per-thread locale state */ + void_setlocale_c(LC_ALL, save_thread); + + RESTORE_LC_NUMERIC(); + + Safefree(save_global); + Safefree(save_thread); + + return retval; + } + +# endif +/*--------------------------------------------------------------------------*/ +} + +STATIC HV * +S_populate_localeconv(pTHX_ const struct lconv *lcbuf, + const int numeric_locale_is_utf8, + const int monetary_locale_is_utf8) +{ + /* This returns a mortalized hash containing all the elements returned by + * localeconv(). It is used by Perl_localeconv() and POSIX::localeconv() + */ + + struct lconv_offset { + const char *name; + size_t offset; + }; + + /* Create e.g., + {"thousands_sep", STRUCT_OFFSET(struct lconv, thousands_sep)}, + */ +# define LCONV_ENTRY(name) \ + {STRINGIFY(name), STRUCT_OFFSET(struct lconv, name)} + + /* Set up structures containing the documented fields. One structure for + * LC_NUMERIC-controlled strings; one for LC_MONETARY ones, and a final one + * of just numerics. */ +# ifdef USE_LOCALE_NUMERIC + + static const struct lconv_offset lconv_numeric_strings[] = { + LCONV_ENTRY(decimal_point), + LCONV_ENTRY(thousands_sep), +# ifndef NO_LOCALECONV_GROUPING + LCONV_ENTRY(grouping), +# endif + {NULL, 0} + }; + +# endif +# ifdef USE_LOCALE_MONETARY + + static const struct lconv_offset lconv_monetary_strings[] = { + LCONV_ENTRY(int_curr_symbol), + LCONV_ENTRY(currency_symbol), + LCONV_ENTRY(mon_decimal_point), +# ifndef NO_LOCALECONV_MON_THOUSANDS_SEP + LCONV_ENTRY(mon_thousands_sep), +# endif +# ifndef NO_LOCALECONV_MON_GROUPING + LCONV_ENTRY(mon_grouping), +# endif + LCONV_ENTRY(positive_sign), + LCONV_ENTRY(negative_sign), + {NULL, 0} + }; + +# endif + + static const struct lconv_offset lconv_integers[] = { +# ifdef USE_LOCALE_MONETARY + LCONV_ENTRY(int_frac_digits), + LCONV_ENTRY(frac_digits), + LCONV_ENTRY(p_cs_precedes), + LCONV_ENTRY(p_sep_by_space), + LCONV_ENTRY(n_cs_precedes), + LCONV_ENTRY(n_sep_by_space), + LCONV_ENTRY(p_sign_posn), + LCONV_ENTRY(n_sign_posn), +# ifdef HAS_LC_MONETARY_2008 + LCONV_ENTRY(int_p_cs_precedes), + LCONV_ENTRY(int_p_sep_by_space), + LCONV_ENTRY(int_n_cs_precedes), + LCONV_ENTRY(int_n_sep_by_space), + LCONV_ENTRY(int_p_sign_posn), + LCONV_ENTRY(int_n_sign_posn), +# endif +# endif + {NULL, 0} + }; + + static const unsigned category_indices[] = { +# ifdef USE_LOCALE_NUMERIC + LC_NUMERIC_INDEX_, +# endif +# ifdef USE_LOCALE_MONETARY + LC_MONETARY_INDEX_, +# endif + (unsigned) -1 /* Just so the previous element can always end with a + comma => subtract 1 below for the max loop index */ + }; + + const char *ptr = (const char *) lcbuf; + const struct lconv_offset *integers = lconv_integers; + unsigned i; + + HV * retval = newHV(); + sv_2mortal((SV*)retval); + + PERL_ARGS_ASSERT_POPULATE_LOCALECONV; + + /* For each enabled category ... */ + for (i = 0; i < C_ARRAY_LENGTH(category_indices) - 1; i++) { + const unsigned cat_index = category_indices[i]; + int locale_is_utf8 = 0; + const char *locale; + + /* ( = NULL silences a compiler warning; would segfault if it could + * actually happen.) */ + const struct lconv_offset *strings = NULL; + +# ifdef USE_LOCALE_NUMERIC + if (cat_index == LC_NUMERIC_INDEX_) { + locale_is_utf8 = numeric_locale_is_utf8; + strings = lconv_numeric_strings; + } +# endif +# ifdef USE_LOCALE_MONETARY + if (cat_index == LC_MONETARY_INDEX_) { + locale_is_utf8 = monetary_locale_is_utf8; + strings = lconv_monetary_strings; + } +# endif + + assert(locale_is_utf8 != UTF8NESS_UNKNOWN); + + /* Iterate over the strings structure for this category */ + locale = querylocale_i(cat_index); + while (strings->name) { + const char *value = *((const char **)(ptr + strings->offset)); + if (value && *value) { + bool is_utf8 = /* Only make UTF-8 if required to */ + (2 == (get_locale_string_utf8ness_i(locale, + cat_index, + value, + locale_is_utf8))); + (void) hv_store(retval, + strings->name, + strlen(strings->name), + newSVpvn_utf8(value, strlen(value), is_utf8), + 0); + } + + strings++; + } + } + + while (integers->name) { + const char value = *((const char *)(ptr + integers->offset)); + + if (value != CHAR_MAX) + (void) hv_store(retval, integers->name, + strlen(integers->name), newSViv(value), 0); + integers++; + } + + return retval; +} + +#endif /* Has some form of localeconv() and paying attn to a category it + traffics in */ + /* =for apidoc Perl_langinfo diff --git a/proto.h b/proto.h index 12f0d63bf7d7..0ef4958a4951 100644 --- a/proto.h +++ b/proto.h @@ -70,6 +70,8 @@ PERL_CALLCONV int Perl_PerlLIO_open_cloexec(pTHX_ const char *file, int flag) /* PERL_CALLCONV const XOP * Perl_custom_op_xop(pTHX_ const OP *o); */ #define PERL_ARGS_ASSERT_PERL_CUSTOM_OP_XOP +PERL_CALLCONV HV * Perl_localeconv(void); +#define PERL_ARGS_ASSERT_PERL_LOCALECONV PERL_CALLCONV const char* Perl_setlocale(const int category, const char* locale); #define PERL_ARGS_ASSERT_PERL_SETLOCALE #ifndef PERL_NO_INLINE_FUNCTIONS @@ -4547,6 +4549,17 @@ PERL_CALLCONV bool Perl_do_exec3(pTHX_ const char *incmd, int fd, int do_report) #define PERL_ARGS_ASSERT_DO_EXEC3 \ assert(incmd) #endif +#if (defined(HAS_LOCALECONV) || defined(HAS_LOCALECONV_L)) && (defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC)) +# if defined(PERL_IN_LOCALE_C) +# if defined(USE_LOCALE) +STATIC HV * S_my_localeconv(pTHX); +#define PERL_ARGS_ASSERT_MY_LOCALECONV +STATIC HV * S_populate_localeconv(pTHX_ const struct lconv *lcbuf, const int numeric_locale_is_utf8, const int monetary_locale_is_utf8); +#define PERL_ARGS_ASSERT_POPULATE_LOCALECONV \ + assert(lcbuf) +# endif +# endif +#endif #if 0 /* Not currently used, but may be needed in the future */ # if defined(PERL_IN_UTF8_C) STATIC void S_warn_on_first_deprecated_use(pTHX_ const char * const name, const char * const alternative, const bool use_locale, const char * const file, const unsigned line);