Skip to content

Commit

Permalink
locale.c: Make static fcn reentrant
Browse files Browse the repository at this point in the history
This makes my_langinfo() reentrant by adding parameters specifying where
to store the result.

This prepares for future commits, and fixes some minor bugs for XS
writers, in that the claim was that the buffer in calling
Perl_langinfo() was safe from getting zapped until the next call to it
in the same thread.  It turns out there were cases where, because of
internal calls, the buffer did get zapped.
  • Loading branch information
khwilliamson committed Apr 29, 2021
1 parent 9f4b32a commit 2ebfd38
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 48 deletions.
12 changes: 9 additions & 3 deletions embed.fnc
Expand Up @@ -3219,10 +3219,16 @@ ST |const char*|category_name |const int category
ST |unsigned int|get_category_index|const int category|NULLOK const char * locale
S |const char*|switch_category_locale_to_template|const int switch_category|const int template_category|NULLOK const char * template_locale
S |void |restore_switched_locale|const int category|NULLOK const char * const original_locale
# ifdef HAS_NL_LANGINFO
ST |const char*|my_langinfo|const nl_item item|bool toggle
# if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L)
S |const char*|my_langinfo|const nl_item item \
|bool toggle \
|NN const char ** retbufp \
|NULLOK Size_t * retbuf_sizep
# else
ST |const char*|my_langinfo|const int item|bool toggle
S |const char*|my_langinfo|const int item \
|bool toggle \
|NN const char ** retbufp \
|NULLOK Size_t * retbuf_sizep
# endif
STR |const char *|save_to_buffer|NULLOK const char * string \
|NULLOK const char **buf \
Expand Down
8 changes: 4 additions & 4 deletions embed.h
Expand Up @@ -1502,10 +1502,10 @@
# endif
# endif
# endif
# if !(defined(HAS_NL_LANGINFO))
# if !(defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L))
# if defined(PERL_IN_LOCALE_C)
# if defined(USE_LOCALE)
#define my_langinfo S_my_langinfo
#define my_langinfo(a,b,c,d) S_my_langinfo(aTHX_ a,b,c,d)
# endif
# endif
# endif
Expand Down Expand Up @@ -1612,10 +1612,10 @@
#define do_semop(a,b) Perl_do_semop(aTHX_ a,b)
#define do_shmio(a,b,c) Perl_do_shmio(aTHX_ a,b,c)
# endif
# if defined(HAS_NL_LANGINFO)
# if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L)
# if defined(PERL_IN_LOCALE_C)
# if defined(USE_LOCALE)
#define my_langinfo S_my_langinfo
#define my_langinfo(a,b,c,d) S_my_langinfo(aTHX_ a,b,c,d)
# endif
# endif
# endif
Expand Down
93 changes: 62 additions & 31 deletions locale.c
Expand Up @@ -1510,12 +1510,14 @@ S_set_numeric_radix(pTHX_ const bool use_locale)
# if defined(USE_LOCALE_NUMERIC) && ( defined(HAS_SOME_LOCALECONV) \
|| defined(HAS_SOME_LANGINFO))

const char * scratch_buffer = NULL;
const char * radix = (use_locale)
? my_langinfo(RADIXCHAR, FALSE)
? my_langinfo(RADIXCHAR, FALSE, &scratch_buffer, NULL)
/* FALSE => already in dest locale */
: C_decimal_point;

sv_setpv(PL_numeric_radix_sv, radix);
Safefree(scratch_buffer);

/* If this is valid UTF-8 that isn't totally ASCII, and we are in
* a UTF-8 locale, then mark the radix as being in UTF-8 */
Expand Down Expand Up @@ -1590,6 +1592,8 @@ S_new_numeric(pTHX_ const char *newnum)
*/

char *save_newnum;
const char * scratch_buffer = NULL;
Size_t buf_size = 0;

if (! newnum) {
Safefree(PL_numeric_name);
Expand All @@ -1611,9 +1615,14 @@ S_new_numeric(pTHX_ const char *newnum)
* THOUSEP can currently (but rarely) cause a race, so avoid doing that,
* and just always change the locale if not C nor POSIX on those systems */
if (! PL_numeric_standard) {
PL_numeric_standard = ( strEQ(C_decimal_point, my_langinfo(RADIXCHAR,
FALSE /* Don't toggle locale */ ))
&& strEQ(C_thousands_sep, my_langinfo(THOUSEP, FALSE)));
PL_numeric_standard = ( strEQ(C_decimal_point,
my_langinfo(RADIXCHAR,
FALSE, /* Don't toggle locale */
&scratch_buffer, &buf_size))
&& strEQ(C_thousands_sep,
my_langinfo(THOUSEP, FALSE,
&scratch_buffer, &buf_size)));
Safefree(scratch_buffer);
}

# endif
Expand Down Expand Up @@ -1991,9 +2000,14 @@ S_new_ctype(pTHX_ const char *newctype)

# ifdef HAS_SOME_LANGINFO

{
const char * scratch_buffer = NULL;
Perl_sv_catpvf(aTHX_ PL_warn_locale, "; codeset=%s",
/* parameter FALSE is a don't care here */
my_langinfo(CODESET, FALSE));
my_langinfo(CODESET, FALSE,
&scratch_buffer, NULL));
Safefree(scratch_buffer);
}

# endif

Expand Down Expand Up @@ -2874,6 +2888,8 @@ typedef int nl_item; /* Substitute 'int' for emulated nl_langinfo() */
const char *
Perl_langinfo(const nl_item item)
{
dTHX;

/* If we are not paying attention to the category that controls an item,
* instead return a default value. Also return the default value if there
* is no way for us to figure out the correct value. If we have some form
Expand Down Expand Up @@ -3009,21 +3025,33 @@ Perl_langinfo(const nl_item item)

#else

return my_langinfo(item, TRUE);
return my_langinfo(item, TRUE, &PL_langinfo_buf, &PL_langinfo_bufsize);

#endif

}

#ifdef USE_LOCALE

/* There are several implementations of my_langinfo, depending on the
* Configuration. They all share the same beginning of the function */
STATIC const char *
S_my_langinfo(const nl_item item, bool toggle)
{
S_my_langinfo(pTHX_

dTHX;
const nl_item item, /* The item to look up */
bool toggle,

/* Where to store the result, and where the size of that buffer
* is stored, updated on exit. retbuf_sizep may be NULL for an
* empty-on-entry, single use buffer whose size we don't need to
* keep track of */
const char ** retbufp,
Size_t * retbuf_sizep)
{
const char * retval;

PERL_ARGS_ASSERT_MY_LANGINFO;

# ifdef USE_LOCALE_NUMERIC

/* We only need to toggle into the underlying LC_NUMERIC locale for these
Expand Down Expand Up @@ -3061,14 +3089,15 @@ S_my_langinfo(const nl_item item, bool toggle)
/* Copy to a per-thread buffer, which is also one that won't be
* destroyed by a subsequent setlocale(), such as the
* RESTORE_LC_NUMERIC may do just below. */
retval = save_to_buffer(nl_langinfo(item),
&PL_langinfo_buf, &PL_langinfo_bufsize);
retval = save_to_buffer(nl_langinfo(item), retbufp, retbuf_sizep);
NL_LANGINFO_UNLOCK;

if (toggle) {
RESTORE_LC_NUMERIC();
}
}

return retval;
/*--------------------------------------------------------------------------*/
# else /* Use nl_langinfo_l(), avoiding both a mutex and changing the
locale. */
Expand All @@ -3091,13 +3120,12 @@ S_my_langinfo(const nl_item item, bool toggle)

/* We have to save it to a buffer, because the freelocale() just below
* can invalidate the internal one */
retval = save_to_buffer(nl_langinfo_l(item, cur),
&PL_langinfo_buf, &PL_langinfo_bufsize);
retval = save_to_buffer(nl_langinfo_l(item, cur), retbufp, retbuf_sizep);
}

# endif

return retval;

# endif
/*--------------------------------------------------------------------------*/
# else /* Below, emulate nl_langinfo as best we can */

Expand Down Expand Up @@ -3175,7 +3203,7 @@ S_my_langinfo(const nl_item item, bool toggle)
break;

retval = save_to_buffer(Perl_form(aTHX_ "%c%s", precedes, currency),
&PL_langinfo_buf, &PL_langinfo_bufsize);
retbufp, retbuf_sizep);
}

# ifdef TS_W32_BROKEN_LOCALECONV
Expand Down Expand Up @@ -3236,19 +3264,19 @@ S_my_langinfo(const nl_item item, bool toggle)

/* Everything in between is the radix string */
if (floatbuf >= e) {
retval = save_to_buffer("?", PL_langinfo_buf, PL_langinfo_bufsize);
retval = save_to_buffer("?", retbufp, retbuf_sizep);
}
else {
*floatbuf = '\0';
retval = save_to_buffer(item_start, PL_langinfo_buf, PL_langinfo_bufsize);
retval = save_to_buffer(item_start, retbufp, retbuf_sizep);
}

if (toggle) {
RESTORE_LC_NUMERIC();
}
}

retval = PL_langinfo_buf;
retval = *retbufp;
break;

# else
Expand Down Expand Up @@ -3283,10 +3311,10 @@ S_my_langinfo(const nl_item item, bool toggle)
* issues. */

needed_size = GetNumberFormatEx(PL_numeric_name, 0, "1234.5",
NULL, PL_langinfo_buf, PL_langinfo_bufsize);
NULL, retbufp, *retbuf_sizep);
DEBUG_L(PerlIO_printf(Perl_debug_log,
"%s: %d: return from GetNumber, count=%d, val=%s\n",
__FILE__, __LINE__, needed_size, PL_langinfo_buf));
__FILE__, __LINE__, needed_size, retbufp));

# endif
# endif
Expand All @@ -3304,7 +3332,7 @@ S_my_langinfo(const nl_item item, bool toggle)
}
}

retval = save_to_buffer(temp, &PL_langinfo_buf, &PL_langinfo_bufsize);
retval = save_to_buffer(temp, retbufp, retbuf_sizep);

# ifdef TS_W32_BROKEN_LOCALECONV

Expand Down Expand Up @@ -3437,17 +3465,16 @@ S_my_langinfo(const nl_item item, bool toggle)
* time, it all works */
temp = my_strftime(format, 30, 30, hour, mday, mon,
2011, 0, 0, 0);
retval = save_to_buffer(temp, &PL_langinfo_buf,
&PL_langinfo_bufsize);
retval = save_to_buffer(temp, retbufp, retbuf_sizep);
Safefree(temp);

/* If the item is 'ALT_DIGITS', 'PL_langinfo_buf' contains the
/* If the item is 'ALT_DIGITS', '*retbuf' contains the
* alternate format for wday 0. If the value is the same as the
* normal 0, there isn't an alternate, so clear the buffer.
*
* (wday was chosen because its range is all a single digit.
* Things like tm_sec have two digits as the minimum: '00'.) */
if (item == ALT_DIGITS && strEQ(PL_langinfo_buf, "0")) {
if (item == ALT_DIGITS && strEQ(*retbufp, "0")) {
return "";
}

Expand All @@ -3474,7 +3501,7 @@ S_my_langinfo(const nl_item item, bool toggle)
/* If to return the format, not the value, overwrite the buffer
* with it. But some strftime()s will keep the original format if
* illegal, so change those to "" */
if (strEQ(PL_langinfo_buf, format)) {
if (strEQ(*retbufp, format)) {
return "";
}

Expand Down Expand Up @@ -3514,8 +3541,7 @@ S_my_langinfo(const nl_item item, bool toggle)
/* Use everything past the dot */
retval++;

retval = save_to_buffer(retval, &PL_langinfo_buf,
&PL_langinfo_bufsize);
retval = save_to_buffer(retval, retbufp, retbuf_sizep);
}

break;
Expand Down Expand Up @@ -5060,7 +5086,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
defective locale definition. XXX We should probably check for
these in the Latin1 range and warn (but on glibc, requires
iswalnum() etc. due to their not handling 80-FF correctly */
const char *codeset = my_langinfo(CODESET, FALSE);
const char * scratch_buffer = NULL;
const char *codeset = my_langinfo(CODESET, FALSE, &scratch_buffer, NULL);
/* FALSE => already in dest locale */

DEBUG_Lv(PerlIO_printf(Perl_debug_log,
Expand All @@ -5080,6 +5107,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
"\tnllanginfo returned CODESET '%s'; ?UTF8 locale=%d\n",
codeset, is_utf8));
restore_switched_locale(LC_CTYPE, original_ctype_locale);
Safefree(scratch_buffer);
goto finish_and_return;
}
}
Expand Down Expand Up @@ -5129,8 +5157,10 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
category,
save_input_locale);
bool only_ascii = FALSE;
const char * scratch_buffer = NULL;
const U8 * currency_string
= (const U8 *) my_langinfo(CRNCYSTR, FALSE);
= (const U8 *) my_langinfo(CRNCYSTR, FALSE,
&scratch_buffer, NULL);
/* 2nd param not relevant for this item */
const U8 * first_variant;

Expand All @@ -5151,6 +5181,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
else {
is_utf8 = is_strict_utf8_string(first_variant, 0);
}
Safefree(scratch_buffer);

restore_switched_locale(LC_MONETARY, original_monetary_locale);

Expand Down
22 changes: 12 additions & 10 deletions proto.h
Expand Up @@ -4217,11 +4217,12 @@ STATIC int S_sv_2iuv_non_preserve(pTHX_ SV *const sv);
PERL_CALLCONV const char* Perl_langinfo(const int item);
#define PERL_ARGS_ASSERT_PERL_LANGINFO
#endif
#if !(defined(HAS_NL_LANGINFO))
#if !(defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L))
# if defined(PERL_IN_LOCALE_C)
# if defined(USE_LOCALE)
STATIC const char* S_my_langinfo(const int item, bool toggle);
#define PERL_ARGS_ASSERT_MY_LANGINFO
STATIC const char* S_my_langinfo(pTHX_ const int item, bool toggle, const char ** retbufp, Size_t * retbuf_sizep);
#define PERL_ARGS_ASSERT_MY_LANGINFO \
assert(retbufp)
# endif
# endif
#endif
Expand Down Expand Up @@ -4696,18 +4697,19 @@ PERL_CALLCONV I32 Perl_do_shmio(pTHX_ I32 optype, SV** mark, SV** sp);
#define PERL_ARGS_ASSERT_DO_SHMIO \
assert(mark); assert(sp)
#endif
#if defined(HAS_NL_LANGINFO)
#if defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H)
PERL_CALLCONV const char* Perl_langinfo(const nl_item item);
#define PERL_ARGS_ASSERT_PERL_LANGINFO
#endif
#if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L)
# if defined(PERL_IN_LOCALE_C)
# if defined(USE_LOCALE)
STATIC const char* S_my_langinfo(const nl_item item, bool toggle);
#define PERL_ARGS_ASSERT_MY_LANGINFO
STATIC const char* S_my_langinfo(pTHX_ const nl_item item, bool toggle, const char ** retbufp, Size_t * retbuf_sizep);
#define PERL_ARGS_ASSERT_MY_LANGINFO \
assert(retbufp)
# endif
# endif
#endif
#if defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H)
PERL_CALLCONV const char* Perl_langinfo(const nl_item item);
#define PERL_ARGS_ASSERT_PERL_LANGINFO
#endif
#if defined(HAS_PIPE)
PERL_CALLCONV int Perl_PerlProc_pipe_cloexec(pTHX_ int *pipefd)
__attribute__warn_unused_result__;
Expand Down

0 comments on commit 2ebfd38

Please sign in to comment.