diff --git a/embed.fnc b/embed.fnc index c8dfa4244ad4..072e2c8abebd 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1598,6 +1598,7 @@ ATdo |const char*|Perl_langinfo|const nl_item item #else ATdo |const char*|Perl_langinfo|const int item #endif +pEX |int |mbtowc_|NULLOK const wchar_t * pwc|NULLOK const char * s|const Size_t len CpO |int |init_i18nl10n |int printwarn CbpOD |int |init_i18nl14n |int printwarn p |char* |my_strerror |const int errnum diff --git a/embed.h b/embed.h index d8f6bc110ab6..ddff189366b3 100644 --- a/embed.h +++ b/embed.h @@ -952,6 +952,7 @@ #define get_prop_values Perl_get_prop_values #define grok_atoUV Perl_grok_atoUV #define load_charnames(a,b,c,d) Perl_load_charnames(aTHX_ a,b,c,d) +#define mbtowc_(a,b,c) Perl_mbtowc_(aTHX_ a,b,c) #define mg_find_mglob(a) Perl_mg_find_mglob(aTHX_ a) #define multiconcat_stringify(a) Perl_multiconcat_stringify(aTHX_ a) #define multideref_stringify(a,b) Perl_multideref_stringify(aTHX_ a,b) diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index 2e742313d971..a048324d56f0 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -3397,30 +3397,18 @@ mblen(s, n = ~0) OUTPUT: RETVAL -#if defined(HAS_MBRTOWC) && (defined(USE_ITHREADS) || ! defined(HAS_MBTOWC)) -# define USE_MBRTOWC -#else -# undef USE_MBRTOWC -#endif - int mbtowc(pwc, s, n = ~0) SV * pwc SV * s size_t n CODE: + RETVAL = -1; +#if defined(HAS_MBTOWC) || defined(HAS_MBRTOWC) errno = 0; SvGETMAGIC(s); if (! SvOK(s)) { /* Initialize state */ -#ifdef USE_MBRTOWC - /* Initialize the shift state to all zeros in PL_mbrtowc_ps. */ - memzero(&PL_mbrtowc_ps, sizeof(PL_mbrtowc_ps)); - RETVAL = 0; -#else - MBTOWC_LOCK; - RETVAL = mbtowc(NULL, NULL, 0); - MBTOWC_UNLOCK; -#endif + mbtowc_(NULL, NULL, 0); } else { /* Not resetting state */ wchar_t wc; @@ -3433,15 +3421,7 @@ mbtowc(pwc, s, n = ~0) size_t len; char * string = SvPV(byte_s, len); if (n < len) len = n; -#ifdef USE_MBRTOWC - RETVAL = (SSize_t) mbrtowc(&wc, string, len, &PL_mbrtowc_ps); -#else - /* Locking prevents races, but locales can be switched out - * without locking, so this isn't a cure all */ - MBTOWC_LOCK; - RETVAL = mbtowc(&wc, string, len); - MBTOWC_UNLOCK; -#endif + RETVAL = mbtowc_(&wc, string, len); if (RETVAL >= 0) { sv_setiv_mg(pwc, wc); } @@ -3450,6 +3430,7 @@ mbtowc(pwc, s, n = ~0) } } } +#endif OUTPUT: RETVAL diff --git a/locale.c b/locale.c index 01ed246f7140..a62eda62ffdd 100644 --- a/locale.c +++ b/locale.c @@ -2622,6 +2622,69 @@ S_save_to_buffer(const char * string, const char **buf, Size_t *buf_size, return *buf; } +int +Perl_mbtowc_(pTHX_ const wchar_t * pwc, const char * s, const Size_t len) +{ + +#if ! defined(HAS_MBRTOWC) && ! defined(HAS_MBTOWC) + + PERL_UNUSED_ARG(pwc); + PERL_UNUSED_ARG(s); + PERL_UNUSED_ARG(len); + return -1; + +#else /* Below we have some form of mbtowc() */ +# if defined(HAS_MBRTOWC) \ + && (defined(USE_LOCALE_THREADS) || ! defined(HAS_MBTOWC)) +# define USE_MBRTOWC +# else +# undef USE_MBRTOWC +# endif + + int retval = -1; + + if (s == NULL) { /* Initialize the shift state to all zeros in + PL_mbrtowc_ps. */ + +# if defined(USE_MBRTOWC) + + memzero(&PL_mbrtowc_ps, sizeof(PL_mbrtowc_ps)); + return 0; + +# else + + MBTOWC_LOCK; + SETERRNO(0, 0); + retval = mbtowc(NULL, NULL, 0); + MBTOWC_UNLOCK; + return retval; + +# endif + + } + +# if defined(USE_MBRTOWC) + + SETERRNO(0, 0); + retval = (SSize_t) mbrtowc((wchar_t *) pwc, s, len, &PL_mbrtowc_ps); + +# else + + /* Locking prevents races, but locales can be switched out without locking, + * so this isn't a cure all */ + MBTOWC_LOCK; + SETERRNO(0, 0); + retval = mbtowc((wchar_t *) pwc, s, len); + MBTOWC_UNLOCK; + +# endif + + return retval; + +#endif + +} + /* =for apidoc Perl_langinfo @@ -4943,47 +5006,13 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * late adder to C89, so very likely to have it. However, testing has * shown that, like nl_langinfo() above, there are locales that are not * strictly UTF-8 that this will return that they are */ - { wchar_t wc; int len; - dSAVEDERRNO; - -# if defined(HAS_MBRTOWC) && defined(USE_LOCALE_THREADS) - - mbstate_t ps; -# endif - - /* mbrtowc() and mbtowc() convert a byte string to a wide - * character. Feed a byte string to one of them and check that the - * result is the expected Unicode code point */ - -# if defined(HAS_MBRTOWC) && defined(USE_LOCALE_THREADS) - /* Prefer this function if available, as it's reentrant */ - - memzero(&ps, sizeof(ps));; - PERL_UNUSED_RESULT(mbrtowc(&wc, NULL, 0, &ps)); /* Reset any shift - state */ - SETERRNO(0, 0); - len = mbrtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8), &ps); - SAVE_ERRNO; - -# else - - MBTOWC_LOCK; - PERL_UNUSED_RESULT(mbtowc(&wc, NULL, 0));/* Reset any shift state */ - SETERRNO(0, 0); - len = mbtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8)); - SAVE_ERRNO; - MBTOWC_UNLOCK; - -# endif - - RESTORE_ERRNO; - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "\treturn from mbtowc; len=%d; code_point=%x; errno=%d\n", - len, (unsigned int) wc, GET_ERRNO)); + PERL_UNUSED_RESULT(mbtowc_(NULL, NULL, 0)); + len = mbtowc_(&wc, REPLACEMENT_CHARACTER_UTF8, + STRLENs(REPLACEMENT_CHARACTER_UTF8)); is_utf8 = cBOOL( len == STRLENs(REPLACEMENT_CHARACTER_UTF8) && wc == (wchar_t) UNICODE_REPLACEMENT); diff --git a/proto.h b/proto.h index 763e0dbee713..59cf88befcde 100644 --- a/proto.h +++ b/proto.h @@ -2022,6 +2022,8 @@ PERL_CALLCONV Malloc_t Perl_malloc(MEM_SIZE nbytes) PERL_CALLCONV I32 * Perl_markstack_grow(pTHX); #define PERL_ARGS_ASSERT_MARKSTACK_GROW +PERL_CALLCONV int Perl_mbtowc_(pTHX_ const wchar_t * pwc, const char * s, const Size_t len); +#define PERL_ARGS_ASSERT_MBTOWC_ PERL_CALLCONV SV* Perl_mess(pTHX_ const char* pat, ...) __attribute__format__(__printf__,pTHX_1,pTHX_2); #define PERL_ARGS_ASSERT_MESS \