Skip to content

Commit

Permalink
S_parse_LC_ALL_string: Squash if categories are same locale
Browse files Browse the repository at this point in the history
It turns out that some platforms return something like
    C/C/C/C/C/C
instead of just the equivalent 'C' when querying LC_ALL.  This can lead
to extra work for us, and unnecessary noise when displayed to the user.

This commit changes parse_LC_ALL_string() to look for this case, and to
coalesce the result to just  the single value, which is returned in just
the 0th element of the input array, leaving the other elements unused.

It makes this behavior overridable by an input flag, which will be used
in a future commit.
  • Loading branch information
khwilliamson committed May 6, 2023
1 parent 108d513 commit cc5a581
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 32 deletions.
1 change: 1 addition & 0 deletions embed.fnc
Expand Up @@ -4393,6 +4393,7 @@ S |void |give_perl_locale_control \
S |parse_LC_ALL_string_return|parse_LC_ALL_string \
|NN const char *string \
|NN const char **output \
|bool always_use_full_array \
|const bool panic_on_error \
|const line_t caller_line
# else
Expand Down
9 changes: 0 additions & 9 deletions embed.h
Expand Up @@ -1268,15 +1268,6 @@
# define my_localeconv(a) S_my_localeconv(aTHX_ a)
# define populate_hash_from_localeconv(a,b,c,d,e) S_populate_hash_from_localeconv(aTHX_ a,b,c,d,e)
# endif
<<<<<<< HEAD
# if defined(LC_ALL) && \
( defined(USE_FAKE_LC_ALL_POSITIONAL_NOTATION) || \
defined(USE_POSIX_2008_LOCALE) || \
( defined(USE_LOCALE) && defined(USE_STDIZE_LOCALE) ) )
# define parse_LC_ALL_string(a,b,c) S_parse_LC_ALL_string(aTHX_ a,b,c)
# endif
=======
>>>>>>> 6d4a269fb2 (locale.c: Extract code into a function)
# if defined(USE_LOCALE)
# define calculate_LC_ALL_string(a,b,c) S_calculate_LC_ALL_string(aTHX_ a,b,c)
# define get_category_index_helper(a,b,c) S_get_category_index_helper(aTHX_ a,b,c)
Expand Down
77 changes: 70 additions & 7 deletions locale.c
Expand Up @@ -234,6 +234,7 @@ S_positional_name_value_xlation(const char * locale, bool direction)
/* This parses either notation */
switch (parse_LC_ALL_string(locale,
(const char **) &individ_locales,
false, /* Return only [0] if suffices */
false, /* Don't panic on error */
__LINE__))
{
Expand All @@ -242,7 +243,9 @@ S_positional_name_value_xlation(const char * locale, bool direction)

case no_array:
return locale;

case only_element_0:
SAVEFREEPV(individ_locales[0]);
return individ_locales[0];
case full_array:
{
calc_LC_ALL_format format = (direction)
Expand Down Expand Up @@ -1088,6 +1091,7 @@ Perl_locale_panic(const char * msg,
STATIC parse_LC_ALL_string_return
S_parse_LC_ALL_string(pTHX_ const char * string,
const char ** output,
bool use_full_array,
const bool panic_on_error,
const line_t caller_line)
{
Expand All @@ -1108,14 +1112,22 @@ S_parse_LC_ALL_string(pTHX_ const char * string,
* those platforms, this function also parses that form. It examines the
* input to see which form is being parsed.
*
* Often, all categories will have the same locale. In that case, the
* input 'string' likely is a single value, and no splitting is needed.
* In such cases, this function doesn't store anything into 'output', and
* returns 'no_array'.
* Often, all categories will have the same locale. This is special cased
* if 'use_full_array' is false on input:
* 1) If the input 'string' is a single value, this function doesn't
* store anything into 'output', and returns 'no_array'
* 2) Some platforms will return multiple occurrences of the same
* value rather than coalescing them down to a single one. HP-UX
* is such a one. This function will do that collapsing for you,
* returning 'only_element_0' and saving the single value in
* output[0], which the caller will need to arrange to be freed.
* The rest of output[] is undefined, and does not need to be
* freed.
*
* Otherwise, output[] will be filled with the individual locale names for
* all categories on the system, and the caller needs to arrange for each
* to be freed.
* to be freed. This means that either at least one category differed from
* the others, or 'use_full_array' was true on input.
*
* The input 'string' may not be valid. This function looks mainly for
* syntactic errors, and if found, returns 'invalid'. 'output' will not be
Expand Down Expand Up @@ -1162,10 +1174,20 @@ S_parse_LC_ALL_string(pTHX_ const char * string,
# endif

if (single_component) {
if (! use_full_array) {
return no_array;
}

for (unsigned int i = 0; i < LC_ALL_INDEX_; i++) {
output[i] = savepv(string);
}

return full_array;
}

/* Here the input is multiple components. Parse through them.
/* Here the input is multiple components. Parse through them. (It is
* possible that these components are all the same, so we check, and if so,
* return just the 0th component (unless 'use_full_array' is true)
*
* This enum notes the possible errors findable in parsing */
enum {
Expand All @@ -1182,6 +1204,7 @@ S_parse_LC_ALL_string(pTHX_ const char * string,
const char * s = string;
const char * e = s + strlen(string);
const char * category_end = NULL;
const char * saved_first = NULL;

/* Parse the input locale string */
while (s < e) {
Expand Down Expand Up @@ -1256,6 +1279,17 @@ S_parse_LC_ALL_string(pTHX_ const char * string,
* that category. */
output[index] = savepvn(s, next_sep - s);

if (! use_full_array) {
if (! saved_first) {
saved_first = output[index];
}
else {
if (strNE(saved_first, output[index])) {
use_full_array = true;
}
}
}

/* Next time start from the new position */
s = next_sep + separator_len;
}
Expand Down Expand Up @@ -1285,7 +1319,20 @@ S_parse_LC_ALL_string(pTHX_ const char * string,
}
}

/* In the loop above, we changed 'use_full_array' to true iff not all
* categories have the same locale. Hence, if it is still 'false', all of
* them are the same. */
if (use_full_array) {
return full_array;
}

/* Free the dangling ones */
for (unsigned int i = 1; i < LC_ALL_INDEX_; i++) {
Safefree(output[i]);
output[i] = NULL;
}

return only_element_0;

failure:

Expand Down Expand Up @@ -1422,6 +1469,7 @@ S_stdize_locale(pTHX_ const int category,
else {
switch (parse_LC_ALL_string(input_locale,
(const char **) & individ_locales,
false, /* Return only [0] if suffices */
false, /* Don't panic on error */
caller_line))
{
Expand All @@ -1439,6 +1487,10 @@ S_stdize_locale(pTHX_ const int category,
* didn't fill in any of 'individ_locales'. Set the 0th element to
* that locale. */
individ_locales[0] = (char *) input_locale;
/* FALLTHROUGH */

case only_element_0: /* Element 0 is the only element we need to look
at */
upper = 0;
break;
}
Expand Down Expand Up @@ -1902,6 +1954,7 @@ S_bool_setlocale_2008_i(pTHX_
if (index == LC_ALL_INDEX_) {
switch (parse_LC_ALL_string(new_locale,
(const char **) &new_locales,
false, /* Return only [0] if suffices */
false, /* Don't panic on error */
caller_line))
{
Expand All @@ -1913,6 +1966,12 @@ S_bool_setlocale_2008_i(pTHX_
need_loop = false;
break;

case only_element_0:
SAVEFREEPV(new_locales[0]);
new_locale = new_locales[0];
need_loop = false;
break;

case full_array:
need_loop = true;
break;
Expand Down Expand Up @@ -2567,6 +2626,7 @@ S_find_locale_from_environment(pTHX_ const unsigned int index)
* component of it. Split the result into its individual components */
switch (parse_LC_ALL_string(lc_all,
(const char **) &locale_names,
false, /* Return only [0] if suffices */
false, /* Don't panic on error */
__LINE__))
{
Expand All @@ -2576,6 +2636,9 @@ S_find_locale_from_environment(pTHX_ const unsigned int index)
case no_array:
return lc_all;

case only_element_0:
SAVEFREEPV(locale_names[0]);
return locale_names[0];

case full_array:
/* We need to mortalize the desired component, and free the rest */
Expand Down
1 change: 1 addition & 0 deletions perl.h
Expand Up @@ -1385,6 +1385,7 @@ typedef enum {
typedef enum {
invalid,
no_array,
only_element_0,
full_array
} parse_LC_ALL_string_return;

Expand Down
19 changes: 3 additions & 16 deletions proto.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit cc5a581

Please sign in to comment.