diff --git a/embed.fnc b/embed.fnc index d2f9c94bf7c0..cdd58e84d18a 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3242,13 +3242,15 @@ S |const char*|my_langinfo_i|const nl_item item \ |const unsigned int cat_index \ |NULLOK const char * locale \ |NN const char ** retbufp \ - |NULLOK Size_t * retbuf_sizep -# else + |NULLOK Size_t * retbuf_sizep \ + |NULLOK int * utf8ness +# else S |const char*|my_langinfo_i|const int item \ |const unsigned int cat_index \ |NULLOK const char * locale \ |NN const char ** retbufp \ - |NULLOK Size_t * retbuf_sizep + |NULLOK Size_t * retbuf_sizep \ + |NULLOK int * utf8ness # if (defined(HAS_LOCALECONV) || defined(HAS_LOCALECONV_L)) \ && (defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC)) S |HV * |get_nl_item_from_localeconv \ @@ -3256,7 +3258,7 @@ S |HV * |get_nl_item_from_localeconv \ |const int item \ |const int unused # endif -# endif +# endif STR |const char *|save_to_buffer|NULLOK const char * string \ |NULLOK const char **buf \ |NULLOK Size_t *buf_size diff --git a/embed.h b/embed.h index 7ca56c15c4aa..47eb72cdde4b 100644 --- a/embed.h +++ b/embed.h @@ -1505,7 +1505,7 @@ # endif # if defined(PERL_IN_LOCALE_C) # if defined(USE_LOCALE) -#define my_langinfo_i(a,b,c,d,e) S_my_langinfo_i(aTHX_ a,b,c,d,e) +#define my_langinfo_i(a,b,c,d,e,f) S_my_langinfo_i(aTHX_ a,b,c,d,e,f) # endif # endif # endif @@ -1623,7 +1623,7 @@ # if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) # if defined(PERL_IN_LOCALE_C) # if defined(USE_LOCALE) -#define my_langinfo_i(a,b,c,d,e) S_my_langinfo_i(aTHX_ a,b,c,d,e) +#define my_langinfo_i(a,b,c,d,e,f) S_my_langinfo_i(aTHX_ a,b,c,d,e,f) # endif # endif # endif diff --git a/locale.c b/locale.c index 8f27f69dab46..c137788c6aca 100644 --- a/locale.c +++ b/locale.c @@ -157,8 +157,9 @@ static const char C_thousands_sep[] = ""; #define USE_UNDERLYING_LOCALE NULL #define USE_UNDERLYING_NUMERIC ((char *) 1) -#define my_langinfo_c(item, category, locale, retbufp, retbuf_sizep) \ - my_langinfo_i(item, category##_INDEX_, locale, retbufp, retbuf_sizep) +#define my_langinfo_c(item, category, locale, retbufp, retbuf_sizep, utf8ness) \ + my_langinfo_i(item, category##_INDEX_, locale, retbufp, \ + retbuf_sizep, utf8ness) #ifdef USE_LOCALE @@ -1557,6 +1558,7 @@ S_set_numeric_radix(pTHX_ const bool use_locale) # if defined(USE_LOCALE_NUMERIC) && ( defined(HAS_SOME_LOCALECONV) \ || defined(HAS_SOME_LANGINFO)) + int utf8ness = 1; const char * radix; const char * scratch_buffer = NULL; @@ -1566,18 +1568,13 @@ S_set_numeric_radix(pTHX_ const bool use_locale) else { radix = my_langinfo_c(RADIXCHAR, LC_NUMERIC, USE_UNDERLYING_NUMERIC, - &scratch_buffer, NULL); + &scratch_buffer, NULL, &utf8ness); } sv_setpv(PL_numeric_radix_sv, radix); Safefree(scratch_buffer); - /* If this is valid UTF-8 that isn't totally ASCII, and we are in - * a UTF-8 locale, then mark the radix as being in UTF-8 */ - if (is_utf8_non_invariant_string((U8 *) SvPVX(PL_numeric_radix_sv), - SvCUR(PL_numeric_radix_sv)) - && _is_cur_LC_category_utf8(LC_NUMERIC)) - { + if (utf8ness > 1) { SvUTF8_on(PL_numeric_radix_sv); } @@ -1662,12 +1659,12 @@ S_new_numeric(pTHX_ const char *newnum) my_langinfo_c(RADIXCHAR, LC_NUMERIC, USE_UNDERLYING_LOCALE, &scratch_buffer, - &buf_size)) + &buf_size, NULL)) && strEQ(C_thousands_sep, my_langinfo_c(THOUSEP, LC_NUMERIC, USE_UNDERLYING_LOCALE, &scratch_buffer, - &buf_size)); + &buf_size, NULL)); Safefree(scratch_buffer); } @@ -2044,7 +2041,7 @@ S_new_ctype(pTHX_ const char *newctype) Perl_sv_catpvf(aTHX_ PL_warn_locale, "; codeset=%s", my_langinfo_c(CODESET, LC_CTYPE, USE_UNDERLYING_LOCALE, - &scratch_buffer, NULL)); + &scratch_buffer, NULL, NULL)); Safefree(scratch_buffer); } @@ -3687,11 +3684,11 @@ Perl_langinfo(const nl_item item) /* Use either the underlying numeric, or the other underlying categories */ if (cat_index == LC_NUMERIC_INDEX_) { return my_langinfo_c(item, LC_NUMERIC, USE_UNDERLYING_NUMERIC, - &PL_langinfo_buf, &PL_langinfo_bufsize); + &PL_langinfo_buf, &PL_langinfo_bufsize, NULL); } else { return my_langinfo_i(item, cat_index, USE_UNDERLYING_LOCALE, - &PL_langinfo_buf, &PL_langinfo_bufsize); + &PL_langinfo_buf, &PL_langinfo_bufsize, NULL); } #endif @@ -3719,7 +3716,11 @@ S_my_langinfo_i(pTHX_ * empty-on-entry, single use buffer whose size we don't need * to keep track of */ const char ** retbufp, - Size_t * retbuf_sizep) + Size_t * retbuf_sizep, + + /* If not NULL, the location to store the UTF8-ness of 'item's + * value, as documented */ + int * utf8ness) { const char * retval; @@ -3750,10 +3751,12 @@ S_my_langinfo_i(pTHX_ if (locale == USE_UNDERLYING_LOCALE) { cur = use_curlocale_scratch(); + locale = NULL; } else if (locale == USE_UNDERLYING_NUMERIC) { assert(cat_index == LC_NUMERIC_INDEX_); cur = PL_underlying_numeric_obj; + locale = PL_numeric_name; } else { cur = newlocale(category_masks[cat_index], locale, (locale_t) 0); @@ -3762,6 +3765,11 @@ S_my_langinfo_i(pTHX_ retval = save_to_buffer(nl_langinfo_l(item, cur), retbufp, retbuf_sizep); + if (utf8ness) { + *utf8ness = get_locale_string_utf8ness_i(locale, cat_index, + retval, UTF8NESS_UNKNOWN); + } + if (need_free) { freelocale(cur); } @@ -3787,6 +3795,9 @@ S_my_langinfo_i(pTHX_ else if (locale != USE_UNDERLYING_LOCALE) { orig_switched_locale = toggle_locale_i(cat_index, locale); } + else { + locale = NULL; + } NL_LANGINFO_LOCK; retval = save_to_buffer(nl_langinfo(item), retbufp, retbuf_sizep); @@ -3799,6 +3810,11 @@ S_my_langinfo_i(pTHX_ restore_toggled_locale_i(cat_index, orig_switched_locale); } + if (utf8ness) { + *utf8ness = get_locale_string_utf8ness_i(locale, cat_index, + retval, UTF8NESS_UNKNOWN); + } + return retval; } /*--------------------------------------------------------------------------*/ @@ -3808,6 +3824,12 @@ S_my_langinfo_i(pTHX_ * nl_langinfo(). There are various possibilities depending on the * Configuration */ + /* Almost all the items will have ASCII return values. Set that here, and + * override if necessary */ + if (utf8ness) { + *utf8ness = 1; + } + /* If the desired locale to get the information about isn't the current * one, switch to it, and call ourselves recursively */ if (locale == USE_UNDERLYING_LOCALE) { @@ -3822,7 +3844,7 @@ S_my_langinfo_i(pTHX_ const char * orig_switched_locale = toggle_locale_i(cat_index, want_locale); retval = my_langinfo_i(item, cat_index, USE_UNDERLYING_LOCALE, - retbufp, retbuf_sizep); + retbufp, retbuf_sizep, utf8ness); restore_toggled_locale_i(cat_index, orig_switched_locale); return retval; } @@ -3884,6 +3906,11 @@ S_my_langinfo_i(pTHX_ *floatbuf = '\0'; retval = save_to_buffer(item_start, retbufp, retbuf_sizep); + if (utf8ness) { + *utf8ness = get_locale_string_utf8ness_i(locale, cat_index, retval, + UTF8NESS_UNKNOWN); + } + return retval; } @@ -3914,6 +3941,12 @@ S_my_langinfo_i(pTHX_ retval = save_to_buffer(SvPV_nolen(string), retbufp, retbuf_sizep); + if (utf8ness) { + *utf8ness = get_locale_string_utf8ness_i(locale, cat_index, + retval, + UTF8NESS_UNKNOWN); + } + SvREFCNT_dec_NN(string); return retval; } @@ -4030,8 +4063,8 @@ S_my_langinfo_i(pTHX_ /* The year was deliberately chosen so that January 1 is on the * first day of the week. Since we're only getting one thing at a * time, it all works */ - temp = my_strftime(format, 30, 30, hour, mday, mon, - 2011, 0, 0, 0); + temp = my_strftime8(format, 30, 30, hour, mday, mon, + 2011, 0, 0, 0, utf8ness); retval = save_to_buffer(temp, retbufp, retbuf_sizep); Safefree(temp); @@ -4062,7 +4095,12 @@ S_my_langinfo_i(pTHX_ * evidence that it should work differently, this returns the alt-0 * string for ALT_DIGITS. */ if (! return_format) { - return retval; + return retval; /* *utf8ness was set by my_strftime8() */ + } + + /* A format is always in ASCII */ + if (utf8ness) { + *utf8ness = 1; } /* If to return the format, not the value, overwrite the buffer @@ -5785,7 +5823,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) const char * scratch_buffer = NULL; const char *codeset = my_langinfo_c(CODESET, LC_CTYPE, USE_UNDERLYING_LOCALE, - &scratch_buffer, NULL); + &scratch_buffer, NULL, NULL); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\tnllanginfo returned CODESET '%s'\n", codeset)); @@ -5858,7 +5896,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) const U8 * currency_string = (const U8 *) my_langinfo_c(CRNCYSTR, LC_MONETARY, save_input_locale, - &scratch_buffer, NULL); + &scratch_buffer, NULL, NULL); /* 2nd param not relevant for this item */ const U8 * first_variant; @@ -6262,7 +6300,7 @@ S_is_locale_utf8(pTHX_ const char * locale) const char * scratch_buffer = NULL; const char * codeset = my_langinfo_c(CODESET, LC_CTYPE, locale, - &scratch_buffer, NULL); + &scratch_buffer, NULL, NULL); bool retval = is_codeset_name_UTF8(codeset); PERL_ARGS_ASSERT_IS_LOCALE_UTF8; diff --git a/proto.h b/proto.h index dfda844aa388..db975662f2b6 100644 --- a/proto.h +++ b/proto.h @@ -4236,7 +4236,7 @@ STATIC HV * S_get_nl_item_from_localeconv(pTHX_ const struct lconv *lcbuf, const # endif # if defined(PERL_IN_LOCALE_C) # if defined(USE_LOCALE) -STATIC const char* S_my_langinfo_i(pTHX_ const int item, const unsigned int cat_index, const char * locale, const char ** retbufp, Size_t * retbuf_sizep); +STATIC const char* S_my_langinfo_i(pTHX_ const int item, const unsigned int cat_index, const char * locale, const char ** retbufp, Size_t * retbuf_sizep, int * utf8ness); #define PERL_ARGS_ASSERT_MY_LANGINFO_I \ assert(retbufp) # endif @@ -4731,7 +4731,7 @@ PERL_CALLCONV const char* Perl_langinfo(const nl_item item); #if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) # if defined(PERL_IN_LOCALE_C) # if defined(USE_LOCALE) -STATIC const char* S_my_langinfo_i(pTHX_ const nl_item item, const unsigned int cat_index, const char * locale, const char ** retbufp, Size_t * retbuf_sizep); +STATIC const char* S_my_langinfo_i(pTHX_ const nl_item item, const unsigned int cat_index, const char * locale, const char ** retbufp, Size_t * retbuf_sizep, int * utf8ness); #define PERL_ARGS_ASSERT_MY_LANGINFO_I \ assert(retbufp) # endif