Skip to content

Commit

Permalink
XXXdelta Fix POSIX::strxfrm()
Browse files Browse the repository at this point in the history
This function takes an SV containing a PV.  The encoding of that PV is
based on the locale of the LC_CTYPE locale.  It really doesn't make
sense to collate based off of the sequencing of a different locale, which
prior to this commit it would do if the LC_COLLATION locale were
different.
  • Loading branch information
khwilliamson committed May 6, 2021
1 parent 1b5e3fb commit c92669d
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 21 deletions.
1 change: 1 addition & 0 deletions embed.fnc
Expand Up @@ -1348,6 +1348,7 @@ Ap |I32 * |markstack_grow
p |int |magic_setcollxfrm|NN SV* sv|NN MAGIC* mg
p |int |magic_freecollxfrm|NN SV* sv|NN MAGIC* mg
pbD |char* |mem_collxfrm |NN const char* input_string|STRLEN len|NN STRLEN* xlen
EXop |SV * |strxfrm |NN SV * src
: Defined in locale.c, used only in sv.c
# if defined(PERL_IN_LOCALE_C) || defined(PERL_IN_SV_C) || defined(PERL_IN_MATHOMS_C)
Ep |char* |mem_collxfrm_ |NN const char* input_string \
Expand Down
23 changes: 5 additions & 18 deletions ext/POSIX/POSIX.xs
Expand Up @@ -3392,24 +3392,11 @@ void
strxfrm(src)
SV * src
CODE:
{
STRLEN srclen;
STRLEN dstlen;
STRLEN buflen;
char *p = SvPV(src,srclen);
srclen++;
buflen = srclen * 4 + 1;
ST(0) = sv_2mortal(newSV(buflen));
dstlen = strxfrm(SvPVX(ST(0)), p, (size_t)buflen);
if (dstlen >= buflen) {
dstlen++;
SvGROW(ST(0), dstlen);
strxfrm(SvPVX(ST(0)), p, (size_t)dstlen);
dstlen--;
}
SvCUR_set(ST(0), dstlen);
SvPOK_only(ST(0));
}
#ifdef USE_LOCALE
ST(0) = Perl_strxfrm(aTHX_ src);
#else
ST(0) = src;
#endif

SysRet
mkfifo(filename, mode)
Expand Down
11 changes: 8 additions & 3 deletions ext/POSIX/lib/POSIX.pod
Expand Up @@ -1971,9 +1971,14 @@ Used with C<eq> or C<cmp> as an alternative to C<L</strcoll>>.
Not really needed since Perl can do this transparently, see
L<perllocale>.

Beware that in a UTF-8 locale, anything you pass to this function must
be in UTF-8; and when not in a UTF-8 locale, anything passed must not be
UTF-8 encoded.
Unlike the libc C<strxfrm>, this allows NUL characters in the input
C<$src>.

It doesn't make sense for a string to be encoded in one locale (say,
ISO-8859-6, Arabic) and to collate it based on another (like ISO-8859-7,
Greek). Perl assumes that the current C<LC_CTYPE> locale correctly
represents the encoding of C<$src>, and ignores the value of
C<LC_COLLATE>.

=item C<sysconf>

Expand Down
44 changes: 44 additions & 0 deletions locale.c
Expand Up @@ -5674,6 +5674,50 @@ S_print_collxfrm_input_and_return(pTHX_
}

# endif /* DEBUGGING */

SV *
Perl_strxfrm(pTHX_ SV * src)
{
/* For use by POSIX::strxfrm(). The PV in an SV is controlled by LC_CTYPE,
* not LC_COLLATE. If the locales for the two categories differ, LC_CTYPE
* should win out.
*
* If we can't calculate a collation, 'src' is instead returned, so that
* future comparisons will be by code point order */

SV * dst = src;
STRLEN dstlen;
STRLEN srclen;
const char *p = SvPV_const(src,srclen);
const U32 utf8_flag = SvUTF8(src);

# ifdef USE_LOCALE_CTYPE

const char * orig_ctype = toggle_locale_c(LC_CTYPE,
querylocale_c(LC_COLLATE));
# endif

char *d = mem_collxfrm_(p, srclen, &dstlen, cBOOL(utf8_flag));

PERL_ARGS_ASSERT_STRXFRM;
assert(utf8_flag == 0 || utf8_flag == SVf_UTF8);

if (d != NULL) {
assert(dstlen > 0);
dst =newSVpvn_flags(d + COLLXFRM_HDR_LEN,
dstlen, SVs_TEMP|utf8_flag);
Safefree(d);
}

# ifdef USE_LOCALE_CTYPE

restore_toggled_locale_c(LC_CTYPE, orig_ctype);

# endif

return dst;
}

#endif /* USE_LOCALE_COLLATE */

#ifdef DEBUGGING
Expand Down
3 changes: 3 additions & 0 deletions proto.h
Expand Up @@ -6916,6 +6916,9 @@ PERL_CALLCONV char* Perl_mem_collxfrm(pTHX_ const char* input_string, STRLEN len
assert(input_string); assert(xlen)
#endif

PERL_CALLCONV SV * Perl_strxfrm(pTHX_ SV * src);
#define PERL_ARGS_ASSERT_STRXFRM \
assert(src)
#ifndef NO_MATHOMS
PERL_CALLCONV char* Perl_sv_collxfrm(pTHX_ SV *const sv, STRLEN *const nxp);
#define PERL_ARGS_ASSERT_SV_COLLXFRM \
Expand Down

0 comments on commit c92669d

Please sign in to comment.