From f50e77e71794c59bc2b6a4ff2f31d9fbbc6b896f Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 12 Mar 2021 10:30:53 -0700 Subject: [PATCH] XXXdelta Fix POSIX::strxfrm() This function takes an SV containing a PV. The encoding of that PV is based on the locale of the LC_CTYPE locale. It really doesn't make sense to collate based off of the sequencing of a different locale, which prior to this commit it would do if the LC_COLLATION locale were different. --- embed.fnc | 1 + ext/POSIX/POSIX.xs | 23 +++++---------------- ext/POSIX/lib/POSIX.pod | 11 ++++++++--- locale.c | 44 +++++++++++++++++++++++++++++++++++++++++ proto.h | 3 +++ 5 files changed, 61 insertions(+), 21 deletions(-) diff --git a/embed.fnc b/embed.fnc index 33ee1f3dd1af..e578619d68aa 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1348,6 +1348,7 @@ Ap |I32 * |markstack_grow p |int |magic_setcollxfrm|NN SV* sv|NN MAGIC* mg p |int |magic_freecollxfrm|NN SV* sv|NN MAGIC* mg pbD |char* |mem_collxfrm |NN const char* input_string|STRLEN len|NN STRLEN* xlen +EXop |SV * |strxfrm |NN SV * src : Defined in locale.c, used only in sv.c # if defined(PERL_IN_LOCALE_C) || defined(PERL_IN_SV_C) || defined(PERL_IN_MATHOMS_C) Ep |char* |mem_collxfrm_ |NN const char* input_string \ diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index b1172b5ca409..60b4d1cb14df 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -3392,24 +3392,11 @@ void strxfrm(src) SV * src CODE: - { - STRLEN srclen; - STRLEN dstlen; - STRLEN buflen; - char *p = SvPV(src,srclen); - srclen++; - buflen = srclen * 4 + 1; - ST(0) = sv_2mortal(newSV(buflen)); - dstlen = strxfrm(SvPVX(ST(0)), p, (size_t)buflen); - if (dstlen >= buflen) { - dstlen++; - SvGROW(ST(0), dstlen); - strxfrm(SvPVX(ST(0)), p, (size_t)dstlen); - dstlen--; - } - SvCUR_set(ST(0), dstlen); - SvPOK_only(ST(0)); - } +#ifdef USE_LOCALE + ST(0) = Perl_strxfrm(aTHX_ src); +#else + ST(0) = src; +#endif SysRet mkfifo(filename, mode) diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod index 4f85f35c8c7a..8871491c12a8 100644 --- a/ext/POSIX/lib/POSIX.pod +++ b/ext/POSIX/lib/POSIX.pod @@ -1971,9 +1971,14 @@ Used with C or C as an alternative to C>. Not really needed since Perl can do this transparently, see L. -Beware that in a UTF-8 locale, anything you pass to this function must -be in UTF-8; and when not in a UTF-8 locale, anything passed must not be -UTF-8 encoded. +Unlike the libc C, this allows NUL characters in the input +C<$src>. + +It doesn't make sense for a string to be encoded in one locale (say, +ISO-8859-6, Arabic) and to collate it based on another (like ISO-8859-7, +Greek). Perl assumes that the current C locale correctly +represents the encoding of C<$src>, and ignores the value of +C. =item C diff --git a/locale.c b/locale.c index b3899b3dd5de..2d24e12028c8 100644 --- a/locale.c +++ b/locale.c @@ -5674,6 +5674,50 @@ S_print_collxfrm_input_and_return(pTHX_ } # endif /* DEBUGGING */ + +SV * +Perl_strxfrm(pTHX_ SV * src) +{ + /* For use by POSIX::strxfrm(). The PV in an SV is controlled by LC_CTYPE, + * not LC_COLLATE. If the locales for the two categories differ, LC_CTYPE + * should win out. + * + * If we can't calculate a collation, 'src' is instead returned, so that + * future comparisons will be by code point order */ + + SV * dst = src; + STRLEN dstlen; + STRLEN srclen; + const char *p = SvPV_const(src,srclen); + const U32 utf8_flag = SvUTF8(src); + +# ifdef USE_LOCALE_CTYPE + + const char * orig_ctype = toggle_locale_c(LC_CTYPE, + querylocale_c(LC_COLLATE)); +# endif + + char *d = mem_collxfrm_(p, srclen, &dstlen, cBOOL(utf8_flag)); + + PERL_ARGS_ASSERT_STRXFRM; + assert(utf8_flag == 0 || utf8_flag == SVf_UTF8); + + if (d != NULL) { + assert(dstlen > 0); + dst =newSVpvn_flags(d + COLLXFRM_HDR_LEN, + dstlen, SVs_TEMP|utf8_flag); + Safefree(d); + } + +# ifdef USE_LOCALE_CTYPE + + restore_toggled_locale_c(LC_CTYPE, orig_ctype); + +# endif + + return dst; +} + #endif /* USE_LOCALE_COLLATE */ #ifdef DEBUGGING diff --git a/proto.h b/proto.h index 1e910d1c3a1a..14039ed8f9fe 100644 --- a/proto.h +++ b/proto.h @@ -6916,6 +6916,9 @@ PERL_CALLCONV char* Perl_mem_collxfrm(pTHX_ const char* input_string, STRLEN len assert(input_string); assert(xlen) #endif +PERL_CALLCONV SV * Perl_strxfrm(pTHX_ SV * src); +#define PERL_ARGS_ASSERT_STRXFRM \ + assert(src) #ifndef NO_MATHOMS PERL_CALLCONV char* Perl_sv_collxfrm(pTHX_ SV *const sv, STRLEN *const nxp); #define PERL_ARGS_ASSERT_SV_COLLXFRM \