Skip to content

Commit

Permalink
locale.c: Split aggregate LC_ALL from emulate_setlocale
Browse files Browse the repository at this point in the history
This splits into a separate function the code necessary in some
Configurations to calculate LC_ALL from a potentially disparate
aggregate of categories having different locales.

This is being done just for readability, as this extensive code in the
middle of something else distracts from the main point.

A goto is hence replaced by a recursive call.
  • Loading branch information
khwilliamson committed May 5, 2021
1 parent 6b0fdae commit 9f57e1d
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 105 deletions.
2 changes: 2 additions & 0 deletions embed.fnc
Expand Up @@ -3254,6 +3254,8 @@ S |void |new_LC_ALL |NULLOK const char* unused
S |const char*|emulate_setlocale_i|const unsigned int index \
|NULLOK const char* locale
S |const char*|my_querylocale_i|const unsigned int index
S |const char *|setlocale_from_aggregate_LC_ALL \
|NN const char * locale
# endif
# ifdef WIN32
S |char* |win32_setlocale|int category|NULLOK const char* locale
Expand Down
1 change: 1 addition & 0 deletions embed.h
Expand Up @@ -1712,6 +1712,7 @@
# if defined(USE_POSIX_2008_LOCALE)
#define emulate_setlocale_i(a,b) S_emulate_setlocale_i(aTHX_ a,b)
#define my_querylocale_i(a) S_my_querylocale_i(aTHX_ a)
#define setlocale_from_aggregate_LC_ALL(a) S_setlocale_from_aggregate_LC_ALL(aTHX_ a)
# endif
# if defined(USE_QUERYLOCALE)
#define calculate_LC_ALL(a) S_calculate_LC_ALL(aTHX_ a)
Expand Down
237 changes: 132 additions & 105 deletions locale.c
Expand Up @@ -492,6 +492,9 @@ S_category_name(const int category)
* are equivalents, like LC_NUMERIC_MASK, which we use instead, converting to
* by using get_category_index() followed by table lookup. */

# define emulate_setlocale_c(cat, locale) \
emulate_setlocale_i(cat##_INDEX_, locale)

# define setlocale_i(i, locale) emulate_setlocale_i(i, locale)
# define setlocale_c(cat, locale) setlocale_i(cat##_INDEX_, locale)
# define setlocale_r(cat, locale) \
Expand Down Expand Up @@ -657,6 +660,129 @@ S_my_querylocale_i(pTHX_ const unsigned int index)
return retval;
}

# ifndef USE_QUERYLOCALE

STATIC const char *
S_setlocale_from_aggregate_LC_ALL(pTHX_ const char * locale)
{
/* This function parses the value of the LC_ALL locale, assuming glibc
* syntax, and sets each individual category on the system to the proper
* value.
*
* This is likely to only ever be called from one place, so exists to make
* the calling function easier to read by moving this ancillary code out of
* the main line.
*
* The locale for each category is independent of the other categories.
* Often, they are all the same, but certainly not always. Perl, in fact,
* usually keeps LC_NUMERIC in the C locale, regardless of the underlying
* locale. LC_ALL has to be able to represent the case of when there are
* varying locales. Platforms have differing ways of representing this.
* Because of this, the code in this file goes to lengths to avoid the
* issue, generally looping over the component categories instead of
* referring to them in the aggregate, wherever possible. However, there
* are cases where we have to parse our own constructed aggregates, which use
* the glibc syntax. */

unsigned int i;
const char * s = locale;
const char * e = locale + strlen(locale);
const char * p = s;
const char * category_end;
const char * name_start;
const char * name_end;
const char * locale_on_entry = savepv(querylocale_c(LC_ALL));
const char * retval;

PERL_ARGS_ASSERT_SETLOCALE_FROM_AGGREGATE_LC_ALL;

/* If the string that gives what to set doesn't include all categories,
* the omitted ones get set to "C". To get this behavior, first set
* all the individual categories to "C", and override the furnished
* ones below */
if (! emulate_setlocale_c(LC_ALL, "C")) {
setlocale_failure_panic_c(LC_ALL, locale_on_entry,
"C", __LINE__, 0);
NOT_REACHED; /* NOTREACHED */
}

while (s < e) {

/* Parse through the category */
while (isWORDCHAR(*p)) {
p++;
}
category_end = p;

if (*p++ != '=') {
Perl_croak(aTHX_
"panic: %s: %d: Unexpected character in locale name '%02X",
__FILE__, __LINE__, *(p-1));
}

/* Parse through the locale name */
name_start = p;
while (p < e && *p != ';') {
if (! isGRAPH(*p)) {
Perl_croak(aTHX_
"panic: %s: %d: Unexpected character in locale name '%02X",
__FILE__, __LINE__, *(p-1));
}
p++;
}
name_end = p;

/* Space past the semi-colon */
if (p < e) {
p++;
}

/* Find the index of the category name in our lists */
for (i = 0; i < LC_ALL_INDEX_; i++) {
char * individ_locale;

/* Keep going if this isn't the index. The strnNE() avoids a
* Perl_form(), but would fail if ever a category name could be
* a substring of another one, like if there were a
* "LC_TIME_DATE" */
if strnNE(s, category_names[i], category_end - s) {
continue;
}

individ_locale = Perl_form(aTHX_ "%.*s",
(int) (name_end - name_start), name_start);
if (! emulate_setlocale_i(i, individ_locale)) {
if (! emulate_setlocale_c(LC_ALL, locale_on_entry)) {
Safefree(locale_on_entry);
setlocale_failure_panic_i(i, individ_locale,
locale, __LINE__, 0);
NOT_REACHED; /* NOTREACHED */
}
Safefree(locale_on_entry);
return NULL;
}

/* Found and handled the desired category */
break;
}

s = p;
}

/* Here we have set all the individual categories by recursive calls;
* update the LC_ALL entry as well. We can't just use the input 'locale'
* as the value may omit categories whose locale is 'C'. khw thinks it's
* better to store a complete LC_ALL. So calculate it. */
retval = savepv(calculate_LC_ALL(PL_curlocales));
Safefree(PL_curlocales[LC_ALL_INDEX_]);
PL_curlocales[LC_ALL_INDEX_] = retval;

Safefree(locale_on_entry);
return retval;
}

# endif /* No querylocale() */

STATIC const char *
S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
{
Expand All @@ -676,20 +802,18 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
* index to find the category mask that the POSIX 2008 functions use. */

int mask;
int category;
locale_t old_obj;
locale_t new_obj;

PERL_ARGS_ASSERT_EMULATE_SETLOCALE_I;
assert(index <= NOMINAL_LC_ALL_INDEX);

mask = category_masks[index];
category = categories[index];

DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: emulate_setlocale_i input=%d (%s), mask=0x%x, \"%s\", %d\n",
__FILE__, __LINE__, category, category_name(category), mask,
locale, index));
"%s:%d: emulate_setlocale_i input=%d (%s), mask=0x%x, \"%s\", cat=%d\n",
__FILE__, __LINE__, index, category_names[index], mask,
locale, categories[index]));

/* If just querying what the existing locale is ... */
if (locale == NULL) {
Expand Down Expand Up @@ -736,7 +860,7 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
default_name = "C";
}

if (category != LC_ALL) {
if (index != LC_ALL_INDEX_) {
const char * const name = PerlEnv_getenv(category_names[index]);

/* Here we are setting a single category. Assume will have the
Expand Down Expand Up @@ -805,105 +929,8 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
}
} /* End of this being setlocale(LC_foo, "") */
else if (strchr(locale, ';')) {

/* LC_ALL may actually incude a conglomeration of various categories.
* Without querylocale, this code uses the glibc (as of this writing)
* syntax for representing that, but that is not a stable API, and
* other platforms do it differently, so we have to handle all cases
* ourselves */

unsigned int i;
const char * s = locale;
const char * e = locale + strlen(locale);
const char * p = s;
const char * category_end;
const char * name_start;
const char * name_end;

/* If the string that gives what to set doesn't include all categories,
* the omitted ones get set to "C". To get this behavior, first set
* all the individual categories to "C", and override the furnished
* ones below */
for (i = 0; i < LC_ALL_INDEX_; i++) {
if (! emulate_setlocale_i(i, "C")) {
return NULL;
}
}

while (s < e) {

/* Parse through the category */
while (isWORDCHAR(*p)) {
p++;
}
category_end = p;

if (*p++ != '=') {
Perl_croak(aTHX_
"panic: %s: %d: Unexpected character in locale name '%02X",
__FILE__, __LINE__, *(p-1));
}

/* Parse through the locale name */
name_start = p;
while (p < e && *p != ';') {
if (! isGRAPH(*p)) {
Perl_croak(aTHX_
"panic: %s: %d: Unexpected character in locale name '%02X",
__FILE__, __LINE__, *(p-1));
}
p++;
return setlocale_from_aggregate_LC_ALL(locale);
}
name_end = p;

/* Space past the semi-colon */
if (p < e) {
p++;
}

/* Find the index of the category name in our lists */
for (i = 0; i < LC_ALL_INDEX_; i++) {
char * individ_locale;

/* Keep going if this isn't the index. The strnNE() avoids a
* Perl_form(), but would fail if ever a category name could be
* a substring of another one, like if there were a
* "LC_TIME_DATE" */
if strnNE(s, category_names[i], category_end - s) {
continue;
}

/* If this index is for the single category we're changing, we
* have found the locale to set it to. */
if (category == categories[i]) {
locale = Perl_form(aTHX_ "%.*s",
(int) (name_end - name_start),
name_start);
goto ready_to_set;
}

assert(category == LC_ALL);
individ_locale = Perl_form(aTHX_ "%.*s",
(int) (name_end - name_start), name_start);
if (! emulate_setlocale_i(i, individ_locale))
{
return NULL;
}
}

s = p;
}

/* Here we have set all the individual categories by recursive calls.
* These collectively should have fixed up LC_ALL, so can just query
* what that now is */
assert(category == LC_ALL);

return querylocale_c(LC_ALL);
} /* End of this being setlocale(LC_ALL,
"LC_CTYPE=foo;LC_NUMERIC=bar;...") */

ready_to_set: ;

/* Here at the end of having to deal with the absence of querylocale().
* Some cases have already been fully handled by recursive calls to this
Expand Down Expand Up @@ -1039,7 +1066,7 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)

/* Without querylocale(), we have to update our records */

if (category == LC_ALL) {
if (index == LC_ALL_INDEX_) {
unsigned int i;

/* For LC_ALL, we change all individual categories to correspond */
Expand Down
3 changes: 3 additions & 0 deletions proto.h
Expand Up @@ -5159,6 +5159,9 @@ STATIC const char* S_emulate_setlocale_i(pTHX_ const unsigned int index, const c
#define PERL_ARGS_ASSERT_EMULATE_SETLOCALE_I
STATIC const char* S_my_querylocale_i(pTHX_ const unsigned int index);
#define PERL_ARGS_ASSERT_MY_QUERYLOCALE_I
STATIC const char * S_setlocale_from_aggregate_LC_ALL(pTHX_ const char * locale);
#define PERL_ARGS_ASSERT_SETLOCALE_FROM_AGGREGATE_LC_ALL \
assert(locale)
# endif
# if defined(USE_QUERYLOCALE)
STATIC const char * S_calculate_LC_ALL(pTHX_ const locale_t cur_obj);
Expand Down

0 comments on commit 9f57e1d

Please sign in to comment.