From 4c816e681a7a13f31069b11929b02dcf37c1f9f8 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 29 Apr 2023 11:35:27 -0600 Subject: [PATCH] add is_cur_locale_utf8 --- embed.fnc | 2 ++ embed.h | 1 + locale.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ proto.h | 4 +++ 4 files changed, 88 insertions(+) diff --git a/embed.fnc b/embed.fnc index fb2d0b19ba6e..62a4957693df 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1607,6 +1607,8 @@ ATdip |bool |is_c9strict_utf8_string_loclen \ |STRLEN len \ |NULLOK const U8 **ep \ |NULLOK STRLEN *el +Ap |bool |is_cur_locale_utf8 \ + |const int category APTdp |bool |isinfnan |NV nv dp |bool |isinfnansv |NN SV *sv diff --git a/embed.h b/embed.h index b3fa97a54745..881d58939bec 100644 --- a/embed.h +++ b/embed.h @@ -314,6 +314,7 @@ # define isUTF8_CHAR Perl_isUTF8_CHAR # define isUTF8_CHAR_flags Perl_isUTF8_CHAR_flags # define is_c9strict_utf8_string_loclen Perl_is_c9strict_utf8_string_loclen +# define is_cur_locale_utf8(a) Perl_is_cur_locale_utf8(aTHX_ a) # define is_lvalue_sub() Perl_is_lvalue_sub(aTHX) # define is_safe_syscall(a,b,c,d) Perl_is_safe_syscall(aTHX_ a,b,c,d) # define is_strict_utf8_string_loclen Perl_is_strict_utf8_string_loclen diff --git a/locale.c b/locale.c index e3e5724a89a0..9c6fbd5abeeb 100644 --- a/locale.c +++ b/locale.c @@ -5014,6 +5014,87 @@ S_is_locale_utf8(pTHX_ const char * locale) } #endif + +bool +Perl_is_cur_locale_utf8(pTHX_ const int category) +{ + /* Returns a bool as to whether or not the locale for the input category is + * a UTF-8 one. The input may be LC_ALL, as long as all categories have + * the same locale; otherwise the answer is undefined, and this function + * returns false, while setting errno to EINVAL. */ + +#ifndef USE_LOCALE + + PERL_UNUSED_ARG(category); + return false; + +#else + + const locale_category_index index = get_category_index(category); + + if (index < LC_ALL_INDEX_) { + return is_locale_utf8(query_nominal_locale_i(index)); + } + +# ifndef LC_ALL + + SET_EINVAL; + return false; + +# else + + if (index > LC_ALL_INDEX_) { + SET_EINVAL; + return false; + } + + /* Parse the current LC_ALL settings to determine if all categories have + * the same locale or not */ + + const char * individ_locales[LC_ALL_INDEX_] = { NULL }; + const char * lc_all = calculate_LC_ALL_string(NULL, + EXTERNAL_FORMAT_FOR_QUERY, + WANT_TEMP_PV, + __LINE__); + bool is_utf8 = false; /* Initialized because some compilers aren't smart + enough to realize it always is set by the + switch() below */ + switch (parse_LC_ALL_string(lc_all, + (const char **) &individ_locales, + no_override, + false, /* Return only [0] if suffices */ + false, /* Don't panic on error */ + __LINE__)) + { + case no_array: /* LC_ALL is a single locale */ + is_utf8 = is_locale_utf8(lc_all); + break; + + case only_element_0: /* element[0] is a single locale valid for all + categories */ + is_utf8 = is_locale_utf8(individ_locales[0]); + Safefree(individ_locales[0]); + break; + + case full_array: + for_all_individual_category_indexes(j) { + Safefree(individ_locales[j]); + } + /* FALLTHROUGH */ + + case invalid: + SET_EINVAL; + is_utf8 = false; + break; + } + + return is_utf8; + +# endif +#endif + +} + #ifdef USE_LOCALE STATIC void diff --git a/proto.h b/proto.h index fc147ffd23df..fb6e074dec0f 100644 --- a/proto.h +++ b/proto.h @@ -1812,6 +1812,10 @@ is_c9strict_utf8_string(const U8 *s, STRLEN len) /* PERL_CALLCONV bool is_c9strict_utf8_string_loc(const U8 *s, STRLEN len, const U8 **ep); */ +PERL_CALLCONV bool +Perl_is_cur_locale_utf8(pTHX_ const int category); +#define PERL_ARGS_ASSERT_IS_CUR_LOCALE_UTF8 + /* PERL_CALLCONV bool is_invariant_string(const U8 * const s, STRLEN len) __attribute__warn_unused_result__