From c5f3cd3bc79f927c5ce5ac93b5e2893dc6506b6c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 18 Feb 2021 16:08:19 -0700 Subject: [PATCH] XXXdelta Add my_strftime8() This is like plain my_strftime(), but additionally returns an indication of the UTF-8ness of the returned string --- embed.fnc | 7 +++++- embed.h | 1 + ext/POSIX/POSIX.xs | 16 +++---------- locale.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++ proto.h | 5 ++++ 5 files changed, 72 insertions(+), 14 deletions(-) diff --git a/embed.fnc b/embed.fnc index 2aa5795228ff..ca7ab81a7b90 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1417,7 +1417,12 @@ Ap |PerlIO*|my_popen_list |NN const char* mode|int n|NN SV ** args Apd |void |my_setenv |NULLOK const char* nam|NULLOK const char* val m |I32 |my_stat pX |I32 |my_stat_flags |NULLOK const U32 flags -Adfp |char * |my_strftime |NN const char *fmt|int sec|int min|int hour|int mday|int mon|int year|int wday|int yday|int isdst +Adfp |char * |my_strftime |NN const char *fmt|int sec|int min|int hour \ + |int mday|int mon|int year|int wday|int yday \ + |int isdst +Adfp |char * |my_strftime8 |NN const char *fmt|int sec|int min|int hour \ + |int mday|int mon|int year|int wday|int yday \ + |int isdst|NULLOK int * utf8ness : Used in pp_ctl.c p |void |my_unexec CbDTPR |UV |NATIVE_TO_NEED |const UV enc|const UV ch diff --git a/embed.h b/embed.h index 9a35299d5fe8..898b2f46091b 100644 --- a/embed.h +++ b/embed.h @@ -333,6 +333,7 @@ #define my_setenv(a,b) Perl_my_setenv(aTHX_ a,b) #define my_socketpair Perl_my_socketpair #define my_strftime(a,b,c,d,e,f,g,h,i,j) Perl_my_strftime(aTHX_ a,b,c,d,e,f,g,h,i,j) +#define my_strftime8(a,b,c,d,e,f,g,h,i,j,k) Perl_my_strftime8(aTHX_ a,b,c,d,e,f,g,h,i,j,k) #define my_strtod Perl_my_strtod #define newANONATTRSUB(a,b,c,d) Perl_newANONATTRSUB(aTHX_ a,b,c,d) #define newANONHASH(a) Perl_newANONHASH(aTHX_ a) diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index 3afec8dec2f5..b1172b5ca409 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -3549,29 +3549,19 @@ strftime(fmt, sec, min, hour, mday, mon, year, wday = -1, yday = -1, isdst = -1) { char *buf; SV *sv; + int is_utf8; /* allowing user-supplied (rather than literal) formats * is normally frowned upon as a potential security risk; * but this is part of the API so we have to allow it */ GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); - buf = my_strftime(SvPV_nolen(fmt), sec, min, hour, mday, mon, year, wday, yday, isdst); + buf = my_strftime8(SvPV_nolen(fmt), sec, min, hour, mday, mon, year, wday, yday, isdst, &is_utf8); GCC_DIAG_RESTORE_STMT; sv = sv_newmortal(); if (buf) { STRLEN len = strlen(buf); sv_usepvn_flags(sv, buf, len, SV_HAS_TRAILING_NUL); - if ( SvUTF8(fmt) - || ( is_utf8_non_invariant_string((U8*) buf, len) -#ifdef USE_LOCALE_TIME - && _is_cur_LC_category_utf8(LC_TIME) -#else /* If can't check directly, at least can see if script is consistent, - under UTF-8, which gives us an extra measure of confidence. */ - - && isSCRIPT_RUN((const U8 *) buf, - (const U8 *) buf + len, - TRUE) /* Means assume UTF-8 */ -#endif - )) { + if (SvUTF8(fmt) || is_utf8 == 2) { SvUTF8_on(sv); } } diff --git a/locale.c b/locale.c index c4b21aa2b273..aa1f205558d6 100644 --- a/locale.c +++ b/locale.c @@ -4158,6 +4158,63 @@ S_my_langinfo_i(pTHX_ #endif /* USE_LOCALE */ +/* +=for apidoc my_strftime8 + +This is L with an extra parameter, whose value on return +indicates how to interpret the returned string: + +=over + +=item Z<>2 + +The returned string should be interpreted as being encoded in UTF-8. + +=item Z<>1 + +It is the caller's choice as to how to interpret the returned string. It has +the same representation when encoded in UTF-8 as when not. + +=item Z<>0 + +Do not interpret the returned string as UTF-8. + +=back + +=cut +*/ + +char * +Perl_my_strftime8(pTHX_ const char *fmt, int sec, int min, int hour, int mday, + int mon, int year, int wday, int yday, int isdst, + int * utf8ness) +{ + char * retval = my_strftime(fmt, sec, min, hour, mday, mon, year, wday, + yday, isdst); + + PERL_ARGS_ASSERT_MY_STRFTIME8; + + if (utf8ness) { + +#ifdef USE_LOCALE_TIME + *utf8ness = get_locale_string_utf8ness_i(NULL, LC_TIME_INDEX_, + retval, UTF8NESS_UNKNOWN); +#else + *utf8ness = 1; +#endif + + } + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s: %d: fmt=%s, retval=%s", + __FILE__, __LINE__, fmt, retval); + if (utf8ness) PerlIO_printf(Perl_debug_log, "; utf8ness=%d", + *utf8ness); + PerlIO_printf(Perl_debug_log, "\n"); + ); + + return retval; +} + /* * Initialize locale awareness. */ diff --git a/proto.h b/proto.h index e6cb0619ee96..3be7664851da 100644 --- a/proto.h +++ b/proto.h @@ -2202,6 +2202,11 @@ PERL_CALLCONV char * Perl_my_strftime(pTHX_ const char *fmt, int sec, int min, i #define PERL_ARGS_ASSERT_MY_STRFTIME \ assert(fmt) +PERL_CALLCONV char * Perl_my_strftime8(pTHX_ const char *fmt, int sec, int min, int hour, int mday, int mon, int year, int wday, int yday, int isdst, int * utf8ness) + __attribute__format__(__strftime__,pTHX_1,0); +#define PERL_ARGS_ASSERT_MY_STRFTIME8 \ + assert(fmt) + PERL_CALLCONV NV Perl_my_strtod(const char * const s, char ** e) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_MY_STRTOD \