Skip to content

Commit

Permalink
Move code from POSIX.xs to locale.c
Browse files Browse the repository at this point in the history
This avoids duplicated logic.
  • Loading branch information
khwilliamson committed May 9, 2021
1 parent 047ab9f commit 4c305ff
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 61 deletions.
1 change: 1 addition & 0 deletions embed.fnc
Expand Up @@ -1598,6 +1598,7 @@ ATdo |const char*|Perl_langinfo|const nl_item item
#else
ATdo |const char*|Perl_langinfo|const int item
#endif
pEX |int |mbtowc_|NULLOK const wchar_t * pwc|NULLOK const char * s|const Size_t len
CpO |int |init_i18nl10n |int printwarn
CbpOD |int |init_i18nl14n |int printwarn
p |char* |my_strerror |const int errnum
Expand Down
1 change: 1 addition & 0 deletions embed.h
Expand Up @@ -952,6 +952,7 @@
#define get_prop_values Perl_get_prop_values
#define grok_atoUV Perl_grok_atoUV
#define load_charnames(a,b,c,d) Perl_load_charnames(aTHX_ a,b,c,d)
#define mbtowc_(a,b,c) Perl_mbtowc_(aTHX_ a,b,c)
#define mg_find_mglob(a) Perl_mg_find_mglob(aTHX_ a)
#define multiconcat_stringify(a) Perl_multiconcat_stringify(aTHX_ a)
#define multideref_stringify(a,b) Perl_multideref_stringify(aTHX_ a,b)
Expand Down
29 changes: 5 additions & 24 deletions ext/POSIX/POSIX.xs
Expand Up @@ -3397,30 +3397,18 @@ mblen(s, n = ~0)
OUTPUT:
RETVAL

#if defined(HAS_MBRTOWC) && (defined(USE_ITHREADS) || ! defined(HAS_MBTOWC))
# define USE_MBRTOWC
#else
# undef USE_MBRTOWC
#endif

int
mbtowc(pwc, s, n = ~0)
SV * pwc
SV * s
size_t n
CODE:
RETVAL = -1;
#if defined(HAS_MBTOWC) || defined(HAS_MBRTOWC)
errno = 0;
SvGETMAGIC(s);
if (! SvOK(s)) { /* Initialize state */
#ifdef USE_MBRTOWC
/* Initialize the shift state to all zeros in PL_mbrtowc_ps. */
memzero(&PL_mbrtowc_ps, sizeof(PL_mbrtowc_ps));
RETVAL = 0;
#else
MBTOWC_LOCK;
RETVAL = mbtowc(NULL, NULL, 0);
MBTOWC_UNLOCK;
#endif
mbtowc_(NULL, NULL, 0);
}
else { /* Not resetting state */
wchar_t wc;
Expand All @@ -3433,15 +3421,7 @@ mbtowc(pwc, s, n = ~0)
size_t len;
char * string = SvPV(byte_s, len);
if (n < len) len = n;
#ifdef USE_MBRTOWC
RETVAL = (SSize_t) mbrtowc(&wc, string, len, &PL_mbrtowc_ps);
#else
/* Locking prevents races, but locales can be switched out
* without locking, so this isn't a cure all */
MBTOWC_LOCK;
RETVAL = mbtowc(&wc, string, len);
MBTOWC_UNLOCK;
#endif
RETVAL = mbtowc_(&wc, string, len);
if (RETVAL >= 0) {
sv_setiv_mg(pwc, wc);
}
Expand All @@ -3450,6 +3430,7 @@ mbtowc(pwc, s, n = ~0)
}
}
}
#endif
OUTPUT:
RETVAL

Expand Down
103 changes: 66 additions & 37 deletions locale.c
Expand Up @@ -2622,6 +2622,69 @@ S_save_to_buffer(const char * string, const char **buf, Size_t *buf_size,
return *buf;
}

int
Perl_mbtowc_(pTHX_ const wchar_t * pwc, const char * s, const Size_t len)
{

#if ! defined(HAS_MBRTOWC) && ! defined(HAS_MBTOWC)

PERL_UNUSED_ARG(pwc);
PERL_UNUSED_ARG(s);
PERL_UNUSED_ARG(len);
return -1;

#else /* Below we have some form of mbtowc() */
# if defined(HAS_MBRTOWC) \
&& (defined(USE_LOCALE_THREADS) || ! defined(HAS_MBTOWC))
# define USE_MBRTOWC
# else
# undef USE_MBRTOWC
# endif

int retval = -1;

if (s == NULL) { /* Initialize the shift state to all zeros in
PL_mbrtowc_ps. */

# if defined(USE_MBRTOWC)

memzero(&PL_mbrtowc_ps, sizeof(PL_mbrtowc_ps));
return 0;

# else

MBTOWC_LOCK;
SETERRNO(0, 0);
retval = mbtowc(NULL, NULL, 0);
MBTOWC_UNLOCK;
return retval;

# endif

}

# if defined(USE_MBRTOWC)

SETERRNO(0, 0);
retval = (SSize_t) mbrtowc((wchar_t *) pwc, s, len, &PL_mbrtowc_ps);

# else

/* Locking prevents races, but locales can be switched out without locking,
* so this isn't a cure all */
MBTOWC_LOCK;
SETERRNO(0, 0);
retval = mbtowc((wchar_t *) pwc, s, len);
MBTOWC_UNLOCK;

# endif

return retval;

#endif

}

/*
=for apidoc Perl_langinfo
Expand Down Expand Up @@ -4943,47 +5006,13 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
* late adder to C89, so very likely to have it. However, testing has
* shown that, like nl_langinfo() above, there are locales that are not
* strictly UTF-8 that this will return that they are */

{
wchar_t wc;
int len;
dSAVEDERRNO;

# if defined(HAS_MBRTOWC) && defined(USE_LOCALE_THREADS)

mbstate_t ps;

# endif

/* mbrtowc() and mbtowc() convert a byte string to a wide
* character. Feed a byte string to one of them and check that the
* result is the expected Unicode code point */

# if defined(HAS_MBRTOWC) && defined(USE_LOCALE_THREADS)
/* Prefer this function if available, as it's reentrant */

memzero(&ps, sizeof(ps));;
PERL_UNUSED_RESULT(mbrtowc(&wc, NULL, 0, &ps)); /* Reset any shift
state */
SETERRNO(0, 0);
len = mbrtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8), &ps);
SAVE_ERRNO;

# else

MBTOWC_LOCK;
PERL_UNUSED_RESULT(mbtowc(&wc, NULL, 0));/* Reset any shift state */
SETERRNO(0, 0);
len = mbtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8));
SAVE_ERRNO;
MBTOWC_UNLOCK;

# endif

RESTORE_ERRNO;
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"\treturn from mbtowc; len=%d; code_point=%x; errno=%d\n",
len, (unsigned int) wc, GET_ERRNO));
PERL_UNUSED_RESULT(mbtowc_(NULL, NULL, 0));
len = mbtowc_(&wc, REPLACEMENT_CHARACTER_UTF8,
STRLENs(REPLACEMENT_CHARACTER_UTF8));

is_utf8 = cBOOL( len == STRLENs(REPLACEMENT_CHARACTER_UTF8)
&& wc == (wchar_t) UNICODE_REPLACEMENT);
Expand Down
2 changes: 2 additions & 0 deletions proto.h
Expand Up @@ -2022,6 +2022,8 @@ PERL_CALLCONV Malloc_t Perl_malloc(MEM_SIZE nbytes)

PERL_CALLCONV I32 * Perl_markstack_grow(pTHX);
#define PERL_ARGS_ASSERT_MARKSTACK_GROW
PERL_CALLCONV int Perl_mbtowc_(pTHX_ const wchar_t * pwc, const char * s, const Size_t len);
#define PERL_ARGS_ASSERT_MBTOWC_
PERL_CALLCONV SV* Perl_mess(pTHX_ const char* pat, ...)
__attribute__format__(__printf__,pTHX_1,pTHX_2);
#define PERL_ARGS_ASSERT_MESS \
Expand Down

0 comments on commit 4c305ff

Please sign in to comment.