Skip to content

Commit

Permalink
locale.c: Generalize stdize_locale()
Browse files Browse the repository at this point in the history
This function is rewritten to handle LC_ALL, and to make it easier to
add new checks.

There is also a change, which I think is an improvement, in that everything
starting with a \n is trimmed, instead of just a trailing \n being

A couple of calls to stdize_locale() are removed, as they are redundant,
because they are called only as a result of Perl_setlocale() being
called, and that ends up calling stdize_locale always, early on.

The call to savepv() is also moved in a couple cases to after the result
is known to not be NULL

I originally had such a new check in mind, but it turned out that doing
it here didn't solve the problem, so this commit has been amended
(before ever being pushed) to not include that.
chomped.
  • Loading branch information
khwilliamson committed May 6, 2021
1 parent 836f3fe commit eabb379
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 45 deletions.
7 changes: 6 additions & 1 deletion embed.fnc
Expand Up @@ -3229,7 +3229,12 @@ iTR |const char *|save_to_buffer|NULLOK const char * string \
|NN Size_t *buf_size \
|const Size_t offset
# if defined(USE_LOCALE)
S |char* |stdize_locale |NN char* locs
:# ifndef HAS_POSIX_2008_LOCALE
S |const char*|stdize_locale|const int category \
|NULLOK const char* input_locale \
|NULLOK const char **buf \
|NULLOK Size_t *buf_size
:# endif
# ifdef USE_QUERYLOCALE
S |const char *|calculate_LC_ALL|const locale_t cur_obj
# else
Expand Down
2 changes: 1 addition & 1 deletion embed.h
Expand Up @@ -1707,7 +1707,7 @@
#define restore_switched_locale(a,b) S_restore_switched_locale(aTHX_ a,b)
#define set_numeric_radix(a) S_set_numeric_radix(aTHX_ a)
#define setlocale_failure_panic_i(a,b,c,d,e) S_setlocale_failure_panic_i(aTHX_ a,b,c,d,e)
#define stdize_locale(a) S_stdize_locale(aTHX_ a)
#define stdize_locale(a,b,c,d) S_stdize_locale(aTHX_ a,b,c,d)
#define switch_category_locale_to_template(a,b,c) S_switch_category_locale_to_template(aTHX_ a,b,c)
# if defined(USE_POSIX_2008_LOCALE)
#define emulate_setlocale_i(a,b) S_emulate_setlocale_i(aTHX_ a,b)
Expand Down
2 changes: 2 additions & 0 deletions embedvar.h
Expand Up @@ -316,6 +316,8 @@
#define PL_statusvalue_vms (vTHX->Istatusvalue_vms)
#define PL_stderrgv (vTHX->Istderrgv)
#define PL_stdingv (vTHX->Istdingv)
#define PL_stdize_locale_buf (vTHX->Istdize_locale_buf)
#define PL_stdize_locale_bufsize (vTHX->Istdize_locale_bufsize)
#define PL_strtab (vTHX->Istrtab)
#define PL_strxfrm_NUL_replacement (vTHX->Istrxfrm_NUL_replacement)
#define PL_strxfrm_is_behaved (vTHX->Istrxfrm_is_behaved)
Expand Down
2 changes: 2 additions & 0 deletions intrpvar.h
Expand Up @@ -745,6 +745,8 @@ PERLVARI(I, langinfo_buf, const char *, NULL)
PERLVARI(I, langinfo_bufsize, Size_t, 0)
PERLVARI(I, setlocale_buf, const char *, NULL)
PERLVARI(I, setlocale_bufsize, Size_t, 0)
PERLVARI(I, stdize_locale_buf, const char *, NULL)
PERLVARI(I, stdize_locale_bufsize, Size_t, 0)

#ifdef PERL_SAWAMPERSAND
PERLVAR(I, sawampersand, U8) /* must save all match strings */
Expand Down
184 changes: 144 additions & 40 deletions locale.c
Expand Up @@ -434,9 +434,20 @@ S_category_name(const int category)
((const char *) setlocale(cat, locale))
#endif

/* The next layer up is to catch vagaries and bugs in the libc setlocale return
* value */
#ifdef stdize_locale
# define stdized_setlocale(cat, locale) \
stdize_locale(cat, porcelain_setlocale(cat, locale), \
&PL_stdize_locale_buf, &PL_stdize_locale_bufsize)
#else
# define stdized_setlocale(cat, locale) porcelain_setlocale(cat, locale)
#endif

/* The next many lines form a layer above the close-to-the-metal 'porcelain'
* macros. They are used to present a uniform API to the rest of the code in
* this file in spite of the disparate underlying implementations. */
* and 'stdized' macros. They are used to present a uniform API to the rest of
* the code in this file in spite of the disparate underlying implementations.
* */

#ifndef USE_POSIX_2008_LOCALE

Expand All @@ -445,7 +456,7 @@ S_category_name(const int category)
* layer just calls the base-level functions. See the introductory comments in
* this file for the meaning of the suffixes '_c', '_r', '_i'. */

# define setlocale_c(cat, locale) porcelain_setlocale(cat, locale)
# define setlocale_c(cat, locale) stdized_setlocale(cat, locale)
# define setlocale_i(i, locale) setlocale_c(categories[i], locale)
# define setlocale_r(cat, locale) setlocale_c(cat, locale)

Expand All @@ -461,11 +472,13 @@ S_category_name(const int category)
# define void_setlocale_r(cat, locale) \
void_setlocale_i(get_category_index(cat, locale), locale)

# define bool_setlocale_c(cat, locale) cBOOL(setlocale_c(cat, locale))
# define bool_setlocale_i(i, locale) cBOOL(setlocale_i(i, locale))
# define bool_setlocale_r(cat, locale) cBOOL(setlocale_r(cat, locale))
# define bool_setlocale_c(cat, locale) \
cBOOL(porcelain_setlocale(cat, locale))
# define bool_setlocale_i(i, locale) \
bool_setlocale_c(categories[i], locale)
# define bool_setlocale_r(cat, locale) bool_setlocale_c(cat, locale)

# define querylocale_c(cat) porcelain_setlocale(cat, NULL)
# define querylocale_c(cat) setlocale_c(cat, NULL)
# define querylocale_r(cat) querylocale_c(cat)
# define querylocale_i(i) querylocale_c(categories[i])

Expand Down Expand Up @@ -1067,48 +1080,138 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)

#ifdef USE_LOCALE

STATIC char *
S_stdize_locale(pTHX_ char *locs)
# ifndef HAS_POSIX_2008_LOCALE

/* So far, the locale strings returned by modern 2008-compliant systems have
* been fine */

STATIC const char *
S_stdize_locale(pTHX_ const int category,
const char *input_locale,
const char **buf,
Size_t *buf_size)
{
/* Standardize the locale name from a string returned by 'setlocale',
* possibly modifying that string.
/* The return value of setlocale() is opaque, but is required to be usable
* as input to a future setlocale() to create the same state.
* Unfortunately not all systems are compliant. But most often they are of
* a very restricted set of forms that this file has been coded to expect.
*
* The typical return value of setlocale() is either
* (1) "xx_YY" if the first argument of setlocale() is not LC_ALL
* (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL
* (the space-separated values represent the various sublocales,
* in some unspecified order). This is not handled by this function.
* There are some outliers, though, that this function tries to tame:
*
* In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n",
* which is harmful for further use of the string in setlocale(). This
* function removes the trailing new line and everything up through the '='
* */
* 1) A new-line. This function chomps any \n characters
* 2) foo=bar. 'bar' is what is generally meant, and the foo= part is
* stripped. This form is legal for LC_ALL. When found in
* that category group, the function calls itself
* recursively on each possible component category to make
* sure the individual categories are ok.
*
* If no changes to the input was made, it is returned; otherwise the
* changed version is stored into memory at *buf, with *buf_size set to its
* new value, and *buf is returned.
*/

const char * const s = strchr(locs, '=');
bool okay = TRUE;
const char * first_bad;
const char * retval;

PERL_ARGS_ASSERT_STDIZE_LOCALE;

if (s) {
const char * const t = strchr(s, '.');
okay = FALSE;
if (t) {
const char * const u = strchr(t, '\n');
if (u && (u[1] == 0)) {
const STRLEN len = u - s;
Move(s + 1, locs, len, char);
locs[len] = 0;
okay = TRUE;
if (input_locale == NULL) {
return NULL;
}

first_bad = strpbrk(input_locale, "=\n");

/* Most likely, there isn't a problem with the input */
if (LIKELY(! first_bad)) {
return input_locale;
}

# ifdef LC_ALL

/* But if there is, and the category is LC_ALL, we have to look at each
* component category */
if (category == LC_ALL) {
const char * individ_locales[LC_ALL_INDEX_];
bool made_changes = FALSE;
unsigned int i;

for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
Size_t this_size = 0;
individ_locales[i] = stdize_locale(categories[i],
porcelain_setlocale(categories[i],
NULL),
&individ_locales[i],
&this_size);

/* If the size didn't change, it means this category did not have
* to be adjusted, and individ_locales[i] points to the buffer
* returned by porcelain_setlocale(); we have to copy that before
* it's called again in the next iteration */
if (this_size == 0) {
individ_locales[i] = savepv(individ_locales[i]);
}
else {
made_changes = TRUE;
}
}

if (!okay)
Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs);
/* If all the individual categories were ok as-is, this was a false
* alarm. We must have seen an '=' which was a legal occurrence in
* this combination locale */
if (! made_changes) {
retval = input_locale; /* The input can be returned unchanged */
}
else {
retval = save_to_buffer(querylocale_c(LC_ALL), buf, buf_size, 0);
}

for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
Safefree(individ_locales[i]);
}

return retval;
}

# endif

/* Here, there was a problem in an individual category. This means that at
* least one adjustment will be necessary. Create a modifiable copy */
retval = save_to_buffer(input_locale, buf, buf_size, 0);

if (*first_bad != '=') {

return locs;
/* Translate the found position into terms of the copy */
first_bad = retval + (first_bad - input_locale);
}
else { /* An '=' */

/* It is unlikely that the return is so screwed-up that it contains
* multiple equals signs, but handle that case by stripping all of
* them. */
const char * final_equals = strrchr(retval, '=');

/* The length passed here causes the move to include the terminating
* NUL */
Move(final_equals + 1, retval, strlen(final_equals), char);

/* See if there are additional problems; if not, we're good to return.
* */
first_bad = strpbrk(retval, "\n");

if (! first_bad) {
return retval;
}
}

/* Here, the problem must be a \n. Get rid of it and what follows.
* (Originally, only a trailing \n was stripped. Unsure what to do if not
* trailing) */
*((char *) first_bad) = '\0';
return retval;
}

# endif

STATIC
const char *

Expand Down Expand Up @@ -1364,7 +1467,7 @@ S_new_numeric(pTHX_ const char *newnum)
return;
}

save_newnum = stdize_locale(savepv(newnum));
save_newnum = savepv(newnum);
PL_numeric_underlying = TRUE;
PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum);

Expand Down Expand Up @@ -1874,7 +1977,7 @@ S_new_collate(pTHX_ const char *newcoll)
if (! PL_collation_name || strNE(PL_collation_name, newcoll)) {
++PL_collation_ix;
Safefree(PL_collation_name);
PL_collation_name = stdize_locale(savepv(newcoll));
PL_collation_name = savepv(newcoll);
PL_collation_standard = isNAME_C_OR_POSIX(newcoll);
if (PL_collation_standard) {
goto is_standard_collation;
Expand Down Expand Up @@ -4437,13 +4540,13 @@ S_switch_category_locale_to_template(pTHX_ const int switch_category,

/* Find the original locale of the category we may need to change, so that
* it can be restored to later */
restore_to_locale =
stdize_locale(savepv(querylocale_r(switch_category)));
restore_to_locale = querylocale_r(switch_category);
if (! restore_to_locale) {
Perl_croak(aTHX_
"panic: %s: %d: Could not find current %s locale, errno=%d\n",
__FILE__, __LINE__, category_name(switch_category), errno);
}
restore_to_locale = savepv(restore_to_locale);

/* If the locale of the template category wasn't passed in, find it now */
if (template_locale == NULL) {
Expand Down Expand Up @@ -4553,12 +4656,13 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
# endif

/* Get the desired category's locale */
save_input_locale = stdize_locale(savepv(querylocale_r(category)));
save_input_locale = querylocale_r(category);
if (! save_input_locale) {
Perl_croak(aTHX_
"panic: %s: %d: Could not find current %s locale, errno=%d\n",
__FILE__, __LINE__, category_name(category), errno);
}
save_input_locale = savepv(save_input_locale);

DEBUG_L(PerlIO_printf(Perl_debug_log,
"Current locale for %s is %s\n",
Expand Down
5 changes: 5 additions & 0 deletions perl.c
Expand Up @@ -1159,6 +1159,11 @@ perl_destruct(pTHXx)
PL_langinfo_buf = NULL;
}

if (PL_stdize_locale_buf) {
Safefree(PL_stdize_locale_buf);
PL_stdize_locale_buf = NULL;
}

#ifdef USE_LOCALE_CTYPE
SvREFCNT_dec(PL_warn_locale);
PL_warn_locale = NULL;
Expand Down
5 changes: 2 additions & 3 deletions proto.h
Expand Up @@ -5150,9 +5150,8 @@ PERL_STATIC_NO_RET void S_setlocale_failure_panic_i(pTHX_ const unsigned int cat
#define PERL_ARGS_ASSERT_SETLOCALE_FAILURE_PANIC_I \
assert(failed)

STATIC char* S_stdize_locale(pTHX_ char* locs);
#define PERL_ARGS_ASSERT_STDIZE_LOCALE \
assert(locs)
STATIC const char* S_stdize_locale(pTHX_ const int category, const char* input_locale, const char **buf, Size_t *buf_size);
#define PERL_ARGS_ASSERT_STDIZE_LOCALE
STATIC const char* S_switch_category_locale_to_template(pTHX_ const int switch_category, const int template_category, const char * template_locale);
#define PERL_ARGS_ASSERT_SWITCH_CATEGORY_LOCALE_TO_TEMPLATE
# if defined(USE_POSIX_2008_LOCALE)
Expand Down
3 changes: 3 additions & 0 deletions sv.c
Expand Up @@ -15684,6 +15684,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
PL_setlocale_buf = NULL;
PL_setlocale_bufsize = 0;

PL_stdize_locale_buf = NULL;
PL_stdize_locale_bufsize = 0;

/* Unicode inversion lists */

PL_AboveLatin1 = sv_dup_inc(proto_perl->IAboveLatin1, param);
Expand Down

0 comments on commit eabb379

Please sign in to comment.