Skip to content

Commit

Permalink
Move utf8ness calc for $! into locale.c from mg.c
Browse files Browse the repository at this point in the history
locale.c has the infrastructure to handle this, so remove repeated
logic.

The removed code tried to discern better based on using script runs, but
this actually doesn't help, so is removed.
  • Loading branch information
khwilliamson committed May 9, 2021
1 parent 68a9f4c commit f2e9cc2
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 53 deletions.
5 changes: 4 additions & 1 deletion embed.fnc
Expand Up @@ -1606,10 +1606,13 @@ ATdo |const char*|Perl_langinfo8|const nl_item item|NULLOK int * utf8ness
ATdo |const char*|Perl_langinfo|const int item
ATdo |const char*|Perl_langinfo8|const int item|NULLOK int * utf8ness
#endif
#ifdef WIN32
p |bool |get_win32_message_utf8ness|NULLOK const char * string
#endif
pEX |int |mbtowc_|NULLOK const wchar_t * pwc|NULLOK const char * s|const Size_t len
CpO |int |init_i18nl10n |int printwarn
CbpOD |int |init_i18nl14n |int printwarn
p |char* |my_strerror |const int errnum
p |char* |my_strerror |const int errnum|NN int * utf8ness
XpT |void |_warn_problematic_locale
Xp |void |set_numeric_underlying
Xp |void |set_numeric_standard
Expand Down
5 changes: 4 additions & 1 deletion embed.h
Expand Up @@ -1411,7 +1411,7 @@
#define my_clearenv() Perl_my_clearenv(aTHX)
#define my_lstat_flags(a) Perl_my_lstat_flags(aTHX_ a)
#define my_stat_flags(a) Perl_my_stat_flags(aTHX_ a)
#define my_strerror(a) Perl_my_strerror(aTHX_ a)
#define my_strerror(a,b) Perl_my_strerror(aTHX_ a,b)
#define my_unexec() Perl_my_unexec(aTHX)
#define newATTRSUB_x(a,b,c,d,e,f) Perl_newATTRSUB_x(aTHX_ a,b,c,d,e,f)
#define newSTUB(a,b) Perl_newSTUB(aTHX_ a,b)
Expand Down Expand Up @@ -2085,6 +2085,9 @@
#define quadmath_format_needed Perl_quadmath_format_needed
#define quadmath_format_valid Perl_quadmath_format_valid
# endif
# if defined(WIN32)
#define get_win32_message_utf8ness(a) Perl_get_win32_message_utf8ness(aTHX_ a)
# endif
# if defined(_MSC_VER)
#define magic_regdatum_set(a,b) Perl_magic_regdatum_set(aTHX_ a,b)
# endif
Expand Down
77 changes: 62 additions & 15 deletions locale.c
Expand Up @@ -2828,12 +2828,30 @@ S_get_locale_string_utf8ness_i(pTHX_ const char * locale,

return 2;

#endif
# endif

}

# ifdef WIN32

bool
Perl_get_win32_message_utf8ness(pTHX_ const char * string)
{
/* This function knows the internal workings of
* get_locale_string_utf8ness_i() so that:
*
* NULL => locale irrelevant
* 0 => category irrelevant
* TRUE => assume the locale is UTF-8, so returns based on the legality of
* the input string, ignoring the locale and category completely */

return get_locale_string_utf8ness_i(NULL, 0, string, TRUE);
}

# endif
#endif /* USE_LOCALE */


int
Perl_mbtowc_(pTHX_ const wchar_t * pwc, const char * s, const Size_t len)
{
Expand Down Expand Up @@ -6533,10 +6551,11 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
" Within locale scope=%d\n", \
__FILE__, __LINE__, errnum, in_locale))

#define DEBUG_STRERROR_RETURN(errstr) \
#define DEBUG_STRERROR_RETURN(errstr, utf8ness) \
DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
"%s:%d Strerror returned; saving a copy: '", __FILE__, __LINE__); \
print_bytes_for_locale(errstr, errstr + strlen(errstr), 0));
print_bytes_for_locale(errstr, errstr + strlen(errstr), 0); \
PerlIO_printf(Perl_debug_log, "'; utf8ness=%d\n", *utf8ness));

/* On platforms that have precisely one of these categories (Windows
* qualifies), these yield the correct one */
Expand All @@ -6553,6 +6572,11 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
* returns. Otherwise the text is derived from the locale, LC_MESSAGES if we
* have that; LC_CTYPE if not.
*
* It returns in *utf8ness the result's UTF-8ness:
* 0 = definitely not
* 1 = immaterial: representation is the same in UTF-8 as not
* 2 = defintely yes
*
* The function just calls strerror(), but temporarily switches locales, if
* needed. Many platforms require LC_CTYPE and LC_MESSAGES to be in the same
* CODESET in order for the return from strerror() to not contain '?' symbols,
Expand Down Expand Up @@ -6581,12 +6605,15 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)

/* Here, neither category is defined: use the C locale */
char *
Perl_my_strerror(pTHX_ const int errnum)
Perl_my_strerror(pTHX_ const int errnum, int * utf8ness)
{
char *errstr = savepv(strerror_l(errnum, PL_C_locale_obj));

PERL_ARGS_ASSERT_MY_STRERROR;

DEBUG_STRERROR_ENTER(errnum, 0);
DEBUG_STRERROR_RETURN(errstr);
*utf8ness = 1;
DEBUG_STRERROR_RETURN(errstr, utf8ness);

SAVEFREEPV(errstr);
return errstr;
Expand All @@ -6601,7 +6628,7 @@ Perl_my_strerror(pTHX_ const int errnum)
* locale; otherwise use the current locale object */

char *
Perl_my_strerror(pTHX_ const int errnum)
Perl_my_strerror(pTHX_ const int errnum, int * utf8ness)
{
char *errstr;

Expand All @@ -6615,7 +6642,10 @@ Perl_my_strerror(pTHX_ const int errnum)
DEBUG_STRERROR_ENTER(errnum, IN_LC(categories[WHICH_LC_INDEX]));

errstr = savepv(strerror_l(errnum, which_obj));
DEBUG_STRERROR_RETURN(errstr);

*utf8ness = get_locale_string_utf8ness_i(NULL, WHICH_LC_INDEX, errstr,
UTF8NESS_UNKNOWN);
DEBUG_STRERROR_RETURN(errstr, utf8ness);

SAVEFREEPV(errstr);
return errstr;
Expand All @@ -6627,25 +6657,30 @@ Perl_my_strerror(pTHX_ const int errnum)
* either C or the LC_MESSAGES locale */

char *
Perl_my_strerror(pTHX_ const int errnum)
Perl_my_strerror(pTHX_ const int errnum, int * utf8ness)
{
char *errstr;

PERL_ARGS_ASSERT_MY_STRERROR;

DEBUG_STRERROR_ENTER(errnum, IN_LC(LC_MESSAGES));

if (! IN_LC(LC_MESSAGES)) { /* Use C if not within locale scope */
errstr = savepv(strerror_l(errnum, PL_C_locale_obj));
*utf8ness = 1;
}
else { /* Otherwise, use the LC_MESSAGES locale, making sure LC_CTYPE
matches */
locale_t cur = duplocale(use_curlocale_scratch());

cur = newlocale(LC_CTYPE_MASK, querylocale_c(LC_MESSAGES), cur);
errstr = savepv(strerror_l(errnum, cur));
*utf8ness = get_locale_string_utf8ness_i(NULL, LC_MESSAGES_INDEX_,
errstr, UTF8NESS_UNKNOWN);
freelocale(cur);
}

DEBUG_STRERROR_RETURN(errstr);
DEBUG_STRERROR_RETURN(errstr, utf8ness);

SAVEFREEPV(errstr);
return errstr;
Expand All @@ -6659,15 +6694,19 @@ Perl_my_strerror(pTHX_ const int errnum)
* strerror */

char *
Perl_my_strerror(pTHX_ const int errnum)
Perl_my_strerror(pTHX_ const int errnum, int * utf8ness)
{
char *errstr;

PERL_ARGS_ASSERT_MY_STRERROR;

DEBUG_STRERROR_ENTER(errnum, 0);

errstr = savepv(Strerror(errnum));

DEBUG_STRERROR_RETURN(errstr);
*utf8ness = 1;

DEBUG_STRERROR_RETURN(errstr, utf8ness);

SAVEFREEPV(errstr);
return errstr;
Expand All @@ -6681,7 +6720,7 @@ Perl_my_strerror(pTHX_ const int errnum)
* locale; otherwise use the current locale */

char *
Perl_my_strerror(pTHX_ const int errnum)
Perl_my_strerror(pTHX_ const int errnum, int * utf8ness)
{
char *errstr;

Expand All @@ -6691,16 +6730,20 @@ Perl_my_strerror(pTHX_ const int errnum)

if (IN_LC(categories[WHICH_LC_INDEX])) {
errstr = savepv(Strerror(errnum));
*utf8ness = get_locale_string_utf8ness_i(NULL, WHICH_LC_INDEX, errstr,
UTF8NESS_UNKNOWN);
}
else {
const char * orig_locale = toggle_locale_i(WHICH_LC_INDEX, "C");

errstr = savepv(Strerror(errnum));

restore_toggled_locale_i(WHICH_LC_INDEX, orig_locale);

*utf8ness = 1;
}

DEBUG_STRERROR_RETURN(errstr);
DEBUG_STRERROR_RETURN(errstr, utf8ness);

SAVEFREEPV(errstr);
return errstr;
Expand All @@ -6713,7 +6756,7 @@ Perl_my_strerror(pTHX_ const int errnum)
* either C or the LC_MESSAGES locale */

char *
Perl_my_strerror(pTHX_ const int errnum)
Perl_my_strerror(pTHX_ const int errnum, int * utf8ness)
{
char *errstr;
const char * desired_locale = savepv((IN_LC(LC_MESSAGES))
Expand All @@ -6723,6 +6766,8 @@ Perl_my_strerror(pTHX_ const int errnum)
const char * orig_MESSAGES_locale;
/* XXX Can fail on z/OS */

PERL_ARGS_ASSERT_MY_STRERROR;

DEBUG_STRERROR_ENTER(errnum, IN_LC(LC_MESSAGES));

orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, desired_locale);
Expand All @@ -6733,7 +6778,9 @@ Perl_my_strerror(pTHX_ const int errnum)
restore_toggled_locale_c(LC_MESSAGES, orig_MESSAGES_locale);
restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale);

DEBUG_STRERROR_RETURN(errstr);
*utf8ness = get_locale_string_utf8ness_i(NULL, LC_MESSAGES_INDEX_, errstr,
UTF8NESS_UNKNOWN);
DEBUG_STRERROR_RETURN(errstr, utf8ness);

Safefree(desired_locale);
SAVEFREEPV(errstr);
Expand Down
2 changes: 1 addition & 1 deletion makedef.pl
Expand Up @@ -271,7 +271,7 @@ sub readvar {

if ($ARGS{PLATFORM} ne 'win32') {
++$skip{$_} foreach qw(
Perl_my_setlocale
Perl_get_win32_message_utf8ness
);
}

Expand Down
54 changes: 21 additions & 33 deletions mg.c
Expand Up @@ -808,37 +808,6 @@ S_fixup_errno_string(pTHX_ SV* sv)
if(strEQ(SvPVX(sv), "")) {
sv_catpv(sv, UNKNOWN_ERRNO_MSG);
}
else {

/* In some locales the error string may come back as UTF-8, in which
* case we should turn on that flag. This didn't use to happen, and to
* avoid as many possible backward compatibility issues as possible, we
* don't turn on the flag unless we have to. So the flag stays off for
* an entirely invariant string. We assume that if the string looks
* like UTF-8 in a single script, it really is UTF-8: "text in any
* other encoding that uses bytes with the high bit set is extremely
* unlikely to pass a UTF-8 validity test"
* (http://en.wikipedia.org/wiki/Charset_detection). There is a
* potential that we will get it wrong however, especially on short
* error message text, so do an additional check. */
if ( ! IN_BYTES /* respect 'use bytes' */
&& is_utf8_non_invariant_string((U8*) SvPVX_const(sv), SvCUR(sv))

#ifdef USE_LOCALE_MESSAGES

&& _is_cur_LC_category_utf8(LC_MESSAGES)

#else /* If can't check directly, at least can see if script is consistent,
under UTF-8, which gives us an extra measure of confidence. */

&& isSCRIPT_RUN((const U8 *) SvPVX_const(sv), (U8 *) SvEND(sv),
TRUE) /* Means assume UTF-8 */
#endif

) {
SvUTF8_on(sv);
}
}
}

/*
Expand Down Expand Up @@ -876,11 +845,16 @@ SV *
Perl_sv_string_from_errnum(pTHX_ int errnum, SV *tgtsv)
{
char const *errstr;
int utf8ness;

if(!tgtsv)
tgtsv = sv_newmortal();
errstr = my_strerror(errnum);
errstr = my_strerror(errnum, &utf8ness);
if(errstr) {
sv_setpv(tgtsv, errstr);
if (utf8ness > 1) {
SvUTF8_on(tgtsv);
}
fixup_errno_string(tgtsv);
} else {
SvPVCLEAR(tgtsv);
Expand Down Expand Up @@ -955,9 +929,11 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
SvPVCLEAR(sv);
}
#elif defined(OS2)
{
int utf8ness;
if (!(_emx_env & 0x200)) { /* Under DOS */
sv_setnv(sv, (NV)errno);
sv_setpv(sv, errno ? my_strerror(errno) : "");
sv_setpv(sv, errno ? my_strerror(errnum, &utf8ness) : "");
} else {
if (errno != errno_isOS2) {
const int tmp = _syserrno();
Expand All @@ -968,15 +944,27 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
sv_setpv(sv, os2error(Perl_rc));
}
if (SvOK(sv) && strNE(SvPVX(sv), "")) {
if (utf8ness > 1) {
SvUTF8_on(sv);
}
fixup_errno_string(sv);
}
}
# elif defined(WIN32)
{
const DWORD dwErr = GetLastError();
sv_setnv(sv, (NV)dwErr);
if (dwErr) {
PerlProc_GetOSError(sv, dwErr);
fixup_errno_string(sv);

# ifdef USE_LOCALE
if ( IN_LOCALE
&& get_win32_message_utf8ness(SvPV_nomg_const_nolen(sv)))
{
SvUTF8_on(sv);
}
# endif
}
else
SvPVCLEAR(sv);
Expand Down
7 changes: 5 additions & 2 deletions proto.h
Expand Up @@ -2195,8 +2195,9 @@ PERL_CALLCONV int Perl_my_socketpair(int family, int type, int protocol, int fd[
#define PERL_ARGS_ASSERT_MY_STAT
PERL_CALLCONV I32 Perl_my_stat_flags(pTHX_ const U32 flags);
#define PERL_ARGS_ASSERT_MY_STAT_FLAGS
PERL_CALLCONV char* Perl_my_strerror(pTHX_ const int errnum);
#define PERL_ARGS_ASSERT_MY_STRERROR
PERL_CALLCONV char* Perl_my_strerror(pTHX_ const int errnum, int * utf8ness);
#define PERL_ARGS_ASSERT_MY_STRERROR \
assert(utf8ness)
PERL_CALLCONV char * Perl_my_strftime(pTHX_ const char *fmt, int sec, int min, int hour, int mday, int mon, int year, int wday, int yday, int isdst)
__attribute__format__(__strftime__,pTHX_1,0);
#define PERL_ARGS_ASSERT_MY_STRFTIME \
Expand Down Expand Up @@ -6997,6 +6998,8 @@ PERL_CALLCONV bool Perl_quadmath_format_valid(const char* format);
assert(format)
#endif
#if defined(WIN32)
PERL_CALLCONV bool Perl_get_win32_message_utf8ness(pTHX_ const char * string);
#define PERL_ARGS_ASSERT_GET_WIN32_MESSAGE_UTF8NESS
PERL_CALLCONV_NO_RET void win32_croak_not_implemented(const char * fname)
__attribute__noreturn__;
#define PERL_ARGS_ASSERT_WIN32_CROAK_NOT_IMPLEMENTED \
Expand Down

0 comments on commit f2e9cc2

Please sign in to comment.