Skip to content

Commit

Permalink
locale.c: querylocale() doesn't work on LC_ALL
Browse files Browse the repository at this point in the history
I had misread the man pages.  This bug has been in the field for several
releases now, but most likely hasn't shown up because it's almost always
the case that the locale categories will be set to the same locale.  And
so most implementations of querylocale() would return the correct
result.

This commit works by splitting the calculation of the value of LC_ALL
from S_emulate_setlocale_i() into a separate function, and extending it
to work on querylocale() systems.  This has the added benefit of
removing tangential code from the main line, making
S_emulate_setlocale_i easier to read.

calculate_LC_ALL() is the new function, and is now called from two
places.  As part of this commit, constness is added to PL_curlocales[]

Part of this change is to keep our records of LC_ALL on non-querylocale
systems always up-to-date, which is better practice

And part of this change is temporary, marked as such, to be removed a
few commits later.
  • Loading branch information
khwilliamson committed May 5, 2021
1 parent 81fb296 commit bf87298
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 98 deletions.
5 changes: 5 additions & 0 deletions embed.fnc
Expand Up @@ -3230,6 +3230,11 @@ iTR |const char *|save_to_buffer|NULLOK const char * string \
|const Size_t offset
# if defined(USE_LOCALE)
S |char* |stdize_locale |NN char* locs
# ifdef USE_QUERYLOCALE
S |const char *|calculate_LC_ALL|const locale_t cur_obj
# else
S |const char *|calculate_LC_ALL|NN const char ** individ_locales
# endif
S |void |new_collate |NULLOK const char* newcoll
S |void |new_ctype |NN const char* newctype
Sr |void |setlocale_failure_panic_i|const unsigned int cat_index \
Expand Down
10 changes: 10 additions & 0 deletions embed.h
Expand Up @@ -1504,6 +1504,13 @@
# if !(defined(PERL_USE_3ARG_SIGHANDLER))
#define sighandler Perl_sighandler
# endif
# if !(defined(USE_QUERYLOCALE))
# if defined(PERL_IN_LOCALE_C)
# if defined(USE_LOCALE)
#define calculate_LC_ALL(a) S_calculate_LC_ALL(aTHX_ a)
# endif
# endif
# endif
# if !(defined(_MSC_VER))
#define magic_regdatum_set(a,b) Perl_magic_regdatum_set(aTHX_ a,b)
# endif
Expand Down Expand Up @@ -1706,6 +1713,9 @@
#define emulate_setlocale_i(a,b) S_emulate_setlocale_i(aTHX_ a,b)
#define my_querylocale_i(a) S_my_querylocale_i(aTHX_ a)
# endif
# if defined(USE_QUERYLOCALE)
#define calculate_LC_ALL(a) S_calculate_LC_ALL(aTHX_ a)
# endif
# if defined(WIN32)
#define win32_setlocale(a,b) S_win32_setlocale(aTHX_ a,b)
# endif
Expand Down
2 changes: 1 addition & 1 deletion intrpvar.h
Expand Up @@ -724,7 +724,7 @@ PERLVAR(I, padix_floor, PADOFFSET) /* how low may inner block reset padix */

/* This is the most number of categories we've encountered so far on any
* platform */
PERLVARA(I, curlocales, 12, char *)
PERLVARA(I, curlocales, 12, const char *)

#endif
#ifdef USE_LOCALE_COLLATE
Expand Down
254 changes: 157 additions & 97 deletions locale.c
Expand Up @@ -501,12 +501,20 @@ S_category_name(const int category)
# ifndef USE_QUERYLOCALE
# define USE_PL_CURLOCALES
# else
# define isSINGLE_BIT_SET(mask) isPOWER_OF_2(mask)

/* This code used to think querylocale() was valid on LC_ALL. Make sure
* all instances of that have been removed */
# define QUERYLOCALE_ASSERT(index) \
__ASSERT_(isSINGLE_BIT_SET(category_masks[index]))
# if ! defined(HAS_QUERYLOCALE) && defined(_NL_LOCALE_NAME)
# define querylocale_l(index, locale_obj) \
nl_langinfo_l(_NL_LOCALE_NAME(categories[index]), locale_obj)
(QUERYLOCALE_ASSERT(index) \
nl_langinfo_l(_NL_LOCALE_NAME(categories[index]), locale_obj))
# else
# define querylocale_l(index, locale_obj) \
querylocale(category_masks[index], locale_obj)
(QUERYLOCALE_ASSERT(index) \
querylocale(category_masks[index], locale_obj))
# endif
# endif
# if ! defined(__GLIBC__) || ! defined(USE_LOCALE_MESSAGES)
Expand Down Expand Up @@ -599,6 +607,7 @@ S_my_querylocale_i(pTHX_ const unsigned int index)

int category;
const locale_t cur_obj = uselocale((locale_t) 0);
const char * retval;

PERL_ARGS_ASSERT_MY_QUERYLOCALE_I;
assert(index <= NOMINAL_LC_ALL_INDEX);
Expand All @@ -608,97 +617,31 @@ S_my_querylocale_i(pTHX_ const unsigned int index)
DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: my_querylocale_i %p\n",
__FILE__, __LINE__, cur_obj));
if (cur_obj == LC_GLOBAL_LOCALE) {
return porcelain_setlocale(category, NULL);
retval = porcelain_setlocale(category, NULL);
}
else {

# ifdef USE_QUERYLOCALE

return (char *) querylocale_l(index, cur_obj);
/* We don't currently keep records when there is querylocale(), so have
* to get it anew each time */
retval = (index == LC_ALL_INDEX_)
? calculate_LC_ALL(cur_obj)
: querylocale_l(index, cur_obj);

# else

/* Without querylocale(), we have to use our record-keeping we've done. */
if (category != LC_ALL) {

DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: my_querylocale_i returning %s\n",
__FILE__, __LINE__, PL_curlocales[index]));

return PL_curlocales[index];
}
else { /* For LC_ALL */
unsigned int i;
Size_t names_len = 0;
char * all_string;
bool are_all_categories_the_same_locale = TRUE;

/* If we have a valid LC_ALL value, just return it */
if (PL_curlocales[LC_ALL_INDEX_]) {

DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: my_querylocale_i returning %s\n",
__FILE__, __LINE__, PL_curlocales[LC_ALL_INDEX_]));

return PL_curlocales[LC_ALL_INDEX_];
}

/* Otherwise, we need to construct a string of name=value pairs.
* We use the glibc syntax, like
* LC_NUMERIC=C;LC_TIME=en_US.UTF-8;...
* First calculate the needed size. Along the way, check if all
* the locale names are the same */
for (i = 0; i < LC_ALL_INDEX_; i++) {

DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: my_querylocale_i i=%d, name=%s, locale=%s\n",
__FILE__, __LINE__, i, category_names[i],
PL_curlocales[i]));

names_len += strlen(category_names[i])
+ 1 /* '=' */
+ strlen(PL_curlocales[i])
+ 1; /* ';' */
/* But we do have up-to-date values when we keep our own records */
retval = PL_curlocales[index];

if (i > 0 && strNE(PL_curlocales[i], PL_curlocales[i-1])) {
are_all_categories_the_same_locale = FALSE;
}
}
# endif

/* If they are the same, we don't actually have to construct the
* string; we just make the entry in LC_ALL_INDEX_ valid, and be that
* single name */
if (are_all_categories_the_same_locale) {
PL_curlocales[LC_ALL_INDEX_] = savepv(PL_curlocales[0]);
return PL_curlocales[LC_ALL_INDEX_];
}

names_len++; /* Trailing '\0' */
SAVEFREEPV(Newx(all_string, names_len, char));
*all_string = '\0';

/* Then fill in the string */
for (i = 0; i < LC_ALL_INDEX_; i++) {

DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: my_querylocale_i i=%d, name=%s, locale=%s\n",
__FILE__, __LINE__, i, category_names[i],
PL_curlocales[i]));

my_strlcat(all_string, category_names[i], names_len);
my_strlcat(all_string, "=", names_len);
my_strlcat(all_string, PL_curlocales[i], names_len);
my_strlcat(all_string, ";", names_len);
}

DEBUG_L(PerlIO_printf(Perl_debug_log,
"%s:%d: my_querylocale_i returning %s\n",
__FILE__, __LINE__, all_string));

return all_string;
}

# endif

"%s:%d: my_querylocale_i(%s) returning '%s'\n",
__FILE__, __LINE__, category_names[index], retval));
return retval;
}

STATIC const char *
Expand Down Expand Up @@ -1074,7 +1017,7 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
# ifdef USE_QUERYLOCALE

if (strEQ(locale, "")) {
locale = querylocale_l(index, new_obj);
locale = querylocale_i(index);
}

# else
Expand All @@ -1098,20 +1041,19 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
}
else {

/* For a single category, if it's not the same as the one in LC_ALL, we
* nullify LC_ALL */
/* Otherwise, update the single category, plus LC_ALL */

Safefree(PL_curlocales[index]);
PL_curlocales[index] = savepv(locale);

if (PL_curlocales[LC_ALL_INDEX_] && strNE(PL_curlocales[LC_ALL_INDEX_],
locale))
if ( PL_curlocales[LC_ALL_INDEX_] == NULL
|| strNE(PL_curlocales[LC_ALL_INDEX_], locale))
{
Safefree(PL_curlocales[LC_ALL_INDEX_]);
PL_curlocales[LC_ALL_INDEX_] = NULL;
PL_curlocales[LC_ALL_INDEX_] =
savepv(calculate_LC_ALL(PL_curlocales));
}

/* Then update the category's record */
Safefree(PL_curlocales[index]);
PL_curlocales[index] = savepv(locale);

FIX_GLIBC_LC_MESSAGES_BUG(index);
}

Expand Down Expand Up @@ -1167,6 +1109,130 @@ S_stdize_locale(pTHX_ char *locs)
return locs;
}

STATIC
const char *

# ifdef USE_QUERYLOCALE
S_calculate_LC_ALL(pTHX_ const locale_t cur_obj)
# else
S_calculate_LC_ALL(pTHX_ const char ** individ_locales)
# endif

{
/* For POSIX 2008, we have to figure out LC_ALL ourselves when needed.
* querylocale(), on systems that have it, doesn't tend to work for LC_ALL.
* So we have to construct the answer ourselves based on the passed in
* data, which is either a locale_t object, for systems with querylocale(),
* or an array we keep updated to the proper values, otherwise.
*
* This returns a mortalized string containing the locale name(s) of
* LC_ALL.
*
* If all individual categories are the same locale, we can just set LC_ALL
* to that locale. But if not, we have to create an aggregation of all the
* categories on the system. Platforms differ as to the syntax they use
* for these non-uniform locales for LC_ALL. Some use a '/' or other
* delimiter of the locales with a predetermined order of categories; a
* Configure probe would be needed to tell us how to decipher those. glibc
* uses a series of name=value pairs, like
* LC_NUMERIC=C;LC_TIME=en_US.UTF-8;...
* The syntax we use for our aggregation doesn't much matter, as we take
* care not to use the native setlocale() function on whatever style is
* chosen. But, it would be possible for someone to call Perl_setlocale()
* using a native style we don't understand. So far no one has complained.
*
* For systems that have categories we don't know about, the algorithm
* below won't know about those missing categories, leading to potential
* bugs for code that looks at them. If there is an environment variable
* that sets that category, we won't know to look for it, and so our use of
* LANG or "C" improperly overrides it. On the other hand, if we don't do
* what is done here, and there is no environment variable, the category's
* locale should be set to LANG or "C". So there is no good solution. khw
* thinks the best is to make sure we have a complete list of possible
* categories, adding new ones as they show up on obscure platforms.
*/

unsigned int i;
Size_t names_len = 0;
bool are_all_categories_the_same_locale = TRUE;
char * aggregate_locale;
char * previous_start = NULL;
char * this_start;
Size_t entry_len = 0;

PERL_ARGS_ASSERT_CALCULATE_LC_ALL;

/* First calculate the needed size for the string listing the categories
* and their locales. */
for (i = 0; i < LC_ALL_INDEX_; i++) {

# ifdef USE_QUERYLOCALE
const char * entry = querylocale_l(i, cur_obj);
# else
const char * entry = individ_locales[i];
# endif

if (entry == NULL) continue; /* XXX Temporary */
names_len += strlen(category_names[i])
+ 1 /* '=' */
+ strlen(entry)
+ 1; /* ';' */
}

names_len++; /* Trailing '\0' */

/* Allocate enough space for the aggregated string */
SAVEFREEPV(Newxz(aggregate_locale, names_len, char));

/* Then fill it in */
for (i = 0; i < LC_ALL_INDEX_; i++) {
Size_t new_len;

# ifdef USE_QUERYLOCALE
const char * entry = querylocale_l(i, cur_obj);
# else
const char * entry = individ_locales[i];
# endif

if (entry == NULL) continue; /* XXX Temporary */
new_len = my_strlcat(aggregate_locale, category_names[i], names_len);
assert(new_len <= names_len);
new_len = my_strlcat(aggregate_locale, "=", names_len);
assert(new_len <= names_len);

this_start = aggregate_locale + strlen(aggregate_locale);
entry_len = strlen(entry);

new_len = my_strlcat(aggregate_locale, entry, names_len);
assert(new_len <= names_len);
new_len = my_strlcat(aggregate_locale, ";", names_len);
assert(new_len <= names_len);
PERL_UNUSED_VAR(new_len); /* Only used in DEBUGGING */

if ( i > 0
&& are_all_categories_the_same_locale
&& memNE(previous_start, this_start, entry_len + 1))
{
are_all_categories_the_same_locale = FALSE;
}
else {
previous_start = this_start;
}
}

/* If they are all the same, just return any one of them */
if (are_all_categories_the_same_locale) {
aggregate_locale = this_start;
aggregate_locale[entry_len] = '\0';
}

DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: calculate_LC_ALL returning '%s'\n",
__FILE__, __LINE__, aggregate_locale));

return aggregate_locale;
}

STATIC void
S_setlocale_failure_panic_i(pTHX_
const unsigned int cat_index,
Expand Down Expand Up @@ -3372,12 +3438,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn)

PL_numeric_radix_sv = newSVpvs(".");

# endif
# ifdef USE_PL_CURLOCALES

/* Initialize our records. If we have POSIX 2008, we have LC_ALL */
void_setlocale_c(LC_ALL, porcelain_setlocale(LC_ALL, NULL));

# endif
# ifdef LOCALE_ENVIRON_REQUIRED

Expand Down
13 changes: 13 additions & 0 deletions proto.h
Expand Up @@ -4232,6 +4232,15 @@ PERL_CALLCONV Signal_t Perl_csighandler(int sig);
PERL_CALLCONV Signal_t Perl_sighandler(int sig);
#define PERL_ARGS_ASSERT_SIGHANDLER
#endif
#if !(defined(USE_QUERYLOCALE))
# if defined(PERL_IN_LOCALE_C)
# if defined(USE_LOCALE)
STATIC const char * S_calculate_LC_ALL(pTHX_ const char ** individ_locales);
#define PERL_ARGS_ASSERT_CALCULATE_LC_ALL \
assert(individ_locales)
# endif
# endif
#endif
#if !(defined(_MSC_VER))
PERL_CALLCONV_NO_RET int Perl_magic_regdatum_set(pTHX_ SV* sv, MAGIC* mg)
__attribute__noreturn__;
Expand Down Expand Up @@ -5152,6 +5161,10 @@ STATIC const char* S_emulate_setlocale_i(pTHX_ const unsigned int index, const c
STATIC const char* S_my_querylocale_i(pTHX_ const unsigned int index);
#define PERL_ARGS_ASSERT_MY_QUERYLOCALE_I
# endif
# if defined(USE_QUERYLOCALE)
STATIC const char * S_calculate_LC_ALL(pTHX_ const locale_t cur_obj);
#define PERL_ARGS_ASSERT_CALCULATE_LC_ALL
# endif
# if defined(WIN32)
STATIC char* S_win32_setlocale(pTHX_ int category, const char* locale);
#define PERL_ARGS_ASSERT_WIN32_SETLOCALE
Expand Down

0 comments on commit bf87298

Please sign in to comment.