Skip to content

Commit

Permalink
57
Browse files Browse the repository at this point in the history
  • Loading branch information
khwilliamson committed May 9, 2021
1 parent 41a402a commit 679fc1c
Show file tree
Hide file tree
Showing 18 changed files with 1,515 additions and 877 deletions.
24 changes: 14 additions & 10 deletions embed.fnc
Expand Up @@ -3265,18 +3265,19 @@ S |const char*|my_langinfo_i|const int item \
S |HV * |get_nl_item_from_localeconv \
|NN const struct lconv *lcbuf \
|const int item \
|const int unused
|const int locale_is_utf8
# endif
# endif
ST |const char *|save_to_buffer|NULLOK const char * string \
|NULLOK const char **buf \
|NULLOK Size_t *buf_size
:# ifndef HAS_POSIX_2008_LOCALE
# ifndef HAS_POSIX_2008_LOCALE
S |const char*|stdize_locale|const int category \
|NULLOK const char* input_locale \
|NULLOK const char **buf \
|NULLOK Size_t *buf_size
:# endif
|NULLOK Size_t *buf_size \
|line_t caller_line
# endif
# ifdef USE_QUERYLOCALE
S |const char *|calculate_LC_ALL|const locale_t cur_obj
# else
Expand All @@ -3291,10 +3292,12 @@ Sr |void |setlocale_failure_panic_i|const unsigned int cat_index \
|const line_t caller_1_line
S |void |new_numeric |NN const char* newnum
S |void |new_LC_ALL |NULLOK const char* unused
S |const char *|toggle_locale_i|const unsigned switch_cat_index \
|NN const char * new_locale
S |void |restore_toggled_locale_i|const unsigned cat_index \
|NULLOK const char * original_locale
So |const char *|toggle_locale_i|const unsigned switch_cat_index \
|NN const char * new_locale \
|const line_t caller_line
So |void |restore_toggled_locale_i|const unsigned cat_index \
|NULLOK const char * original_locale \
|const line_t caller_line
S |bool |is_locale_utf8 |NN const char * locale
ST |bool |is_codeset_name_UTF8|NN const char * name
# ifdef USE_POSIX_2008_LOCALE
Expand Down Expand Up @@ -3329,8 +3332,9 @@ S |void |less_dicey_void_setlocale_i \
# endif
# ifdef WIN32
S |char* |win32_setlocale|int category|NULLOK const char* locale
pTC |wchar_t *|Win_utf8_string_to_wstring|NULLOK const char * utf8_string
pTC |char * |Win_wstring_to_utf8_string|NULLOK const wchar_t * wstring
ST |wchar_t *|Win_byte_string_to_wstring|UINT code_page \
|NULLOK const char * byte_string
ST |char * |Win_wstring_to_utf8_string|NULLOK const wchar_t * wstring
# endif
# ifdef DEBUGGING
S |void |print_collxfrm_input_and_return \
Expand Down
20 changes: 9 additions & 11 deletions embed.h
Expand Up @@ -864,14 +864,6 @@
#define warn_nocontext Perl_warn_nocontext
#define warner_nocontext Perl_warner_nocontext
#endif
#if defined(PERL_IN_LOCALE_C)
# if defined(USE_LOCALE)
# if defined(WIN32)
#define Win_utf8_string_to_wstring Perl_Win_utf8_string_to_wstring
#define Win_wstring_to_utf8_string Perl_Win_wstring_to_utf8_string
# endif
# endif
#endif
#if defined(PERL_USE_3ARG_SIGHANDLER)
#define csighandler Perl_csighandler
#endif
Expand Down Expand Up @@ -1555,6 +1547,13 @@
#define dooneliner(a,b) S_dooneliner(aTHX_ a,b)
# endif
# endif
# if !defined(HAS_POSIX_2008_LOCALE)
# if defined(PERL_IN_LOCALE_C)
# if defined(USE_LOCALE)
#define stdize_locale(a,b,c,d,e) S_stdize_locale(aTHX_ a,b,c,d,e)
# endif
# endif
# endif
# if !defined(HAS_RENAME)
#define same_dirent(a,b) Perl_same_dirent(aTHX_ a,b)
# endif
Expand Down Expand Up @@ -1749,11 +1748,8 @@
#define new_collate(a) S_new_collate(aTHX_ a)
#define new_ctype(a) S_new_ctype(aTHX_ a)
#define new_numeric(a) S_new_numeric(aTHX_ a)
#define restore_toggled_locale_i(a,b) S_restore_toggled_locale_i(aTHX_ a,b)
#define save_to_buffer S_save_to_buffer
#define setlocale_failure_panic_i(a,b,c,d,e) S_setlocale_failure_panic_i(aTHX_ a,b,c,d,e)
#define stdize_locale(a,b,c,d) S_stdize_locale(aTHX_ a,b,c,d)
#define toggle_locale_i(a,b) S_toggle_locale_i(aTHX_ a,b)
# if defined(USE_POSIX_2008_LOCALE)
#define emulate_setlocale_i(a,b,c,d) S_emulate_setlocale_i(aTHX_ a,b,c,d)
#define find_locale_from_environment(a) S_find_locale_from_environment(aTHX_ a)
Expand All @@ -1766,6 +1762,8 @@
#define calculate_LC_ALL(a) S_calculate_LC_ALL(aTHX_ a)
# endif
# if defined(WIN32)
#define Win_byte_string_to_wstring S_Win_byte_string_to_wstring
#define Win_wstring_to_utf8_string S_Win_wstring_to_utf8_string
#define win32_setlocale(a,b) S_win32_setlocale(aTHX_ a,b)
# endif
# endif
Expand Down
2 changes: 2 additions & 0 deletions embedvar.h
Expand Up @@ -378,6 +378,8 @@
#define PL_watchaddr (vTHX->Iwatchaddr)
#define PL_watchok (vTHX->Iwatchok)
#define PL_wcrtomb_ps (vTHX->Iwcrtomb_ps)
#define PL_win32setlocale_buf (vTHX->Iwin32setlocale_buf)
#define PL_win32setlocale_bufsize (vTHX->Iwin32setlocale_bufsize)
#define PL_xsubfilename (vTHX->Ixsubfilename)

#endif /* MULTIPLICITY */
Expand Down
94 changes: 81 additions & 13 deletions ext/I18N-Langinfo/t/Langinfo.t
Expand Up @@ -13,6 +13,9 @@ my @times = qw( MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7
my @constants = qw(ABDAY_1 DAY_1 ABMON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT
D_FMT T_FMT);
push @constants, @times;
my $debug = 0; #($^O =~ /cygwin | mswin /xi);
local $^D = 0x04000000|0x00100000 if $debug;
my $eval_debug = 'local $^D = 0x04000000|0x00100000;' if $debug;

my %want =
(
Expand All @@ -30,9 +33,68 @@ if ( $Config{osname} !~ / netbsd /ix
$want{MON_1} = "January";
}

sub disp_str ($) {
my $string = shift;

# Displays the string unambiguously. ASCII printables are always output
# as-is, though perhaps separated by blanks from other characters. If
# entirely printable ASCII, just returns the string. Otherwise if valid
# UTF-8 it uses the character names for non-printable-ASCII. Otherwise it
# outputs hex for each non-ASCII-printable byte.

return $string if $string =~ / ^ [[:print:]]* $/xa;

my $result = "";
my $prev_was_punct = 1; # Beginning is considered punct
if (utf8::valid($string) && utf8::is_utf8($string)) {
use charnames ();
foreach my $char (split "", $string) {

# Keep punctuation adjacent to other characters; otherwise
# separate them with a blank
if ($char =~ /[[:punct:]]/a) {
$result .= $char;
$prev_was_punct = 1;
}
elsif ($char =~ /[[:print:]]/a) {
$result .= " " unless $prev_was_punct;
$result .= $char;
$prev_was_punct = 0;
}
else {
$result .= " " unless $prev_was_punct;
my $name = charnames::viacode(ord $char);
$result .= (defined $name) ? $name : ':unknown:';
$prev_was_punct = 0;
}
}
}
else {
use bytes;
foreach my $char (split "", $string) {
if ($char =~ /[[:punct:]]/a) {
$result .= $char;
$prev_was_punct = 1;
}
elsif ($char =~ /[[:print:]]/a) {
$result .= " " unless $prev_was_punct;
$result .= $char;
$prev_was_punct = 0;
}
else {
$result .= " " unless $prev_was_punct;
$result .= sprintf("%02X", ord $char);
$prev_was_punct = 0;
}
}
}

return $result;
}

my @want = sort keys %want;

plan tests => 1 + 3 * @constants + keys(@want) + 1 + 2;
#plan tests => 1 + 3 * @constants + keys(@want) + 1 + 2;

use_ok('I18N::Langinfo', 'langinfo', @constants, 'CRNCYSTR');

Expand Down Expand Up @@ -86,12 +148,18 @@ SKIP: {
my $found_time = 0;
my $found_monetary = 0;
my @locales = find_locales( [ 'LC_TIME', 'LC_CTYPE', 'LC_MONETARY' ]);
my @utf8_locales = find_utf8_ctype_locales(\@locales);

foreach my $utf8_locale (@utf8_locales) {

while (defined (my $utf8_locale = find_utf8_ctype_locale(\@locales))) {
if (! $found_time) {
setlocale(&LC_TIME, $utf8_locale);
print STDERR __FILE__, ": ", __LINE__, ": calling setlocale(LC_TIME, $utf8_locale)\n" if $debug;
my $set_ret = setlocale(&LC_TIME, $utf8_locale);
print STDERR __FILE__, ": ", __LINE__, ": setlocale returned '$set_ret'\n" if $debug;
foreach my $time_item (@times) {
my $eval_string = "langinfo(&$time_item)";
print STDERR __FILE__, ": ", __LINE__, ": ", setlocale(&LC_TIME), "\n" if $debug;
my $eval_string = "$eval_debug langinfo(&$time_item)";
print STDERR __FILE__, ": ", __LINE__, ": ", setlocale(&LC_TIME), "\n" if $debug;
my $time_name = eval $eval_string;
if ($@) {
fail("'$eval_string' failed: $@");
Expand All @@ -107,16 +175,18 @@ SKIP: {
}

if ($time_name =~ /\P{ASCII}/) {
ok(utf8::is_utf8($time_name), "The name for '$time_item' in $utf8_locale is a UTF8 string");
$found_time = 1;
ok(utf8::is_utf8($time_name), "The name for '$time_item' in $utf8_locale is a UTF8 string. Got:\n" . disp_str($time_name));
#$found_time = 1;
last;
}
}
}

if (! $found_monetary) {
print STDERR __FILE__, ": ", __LINE__, ": calling setlocale(LC_MONETARY, $utf8_locale)\n" if $debug;
setlocale(&LC_MONETARY, $utf8_locale);
my $eval_string = "langinfo(&CRNCYSTR)";
print STDERR __FILE__, ": ", __LINE__, ": setlocale returned\n" if $debug;
my $eval_string = "$eval_debug langinfo(&CRNCYSTR)";
my $symbol = eval $eval_string;
if ($@) {
fail("'$eval_string' failed: $@");
Expand All @@ -127,16 +197,12 @@ SKIP: {
last SKIP;
}
if ($symbol =~ /\P{ASCII}/) {
ok(utf8::is_utf8($symbol), "The name for 'CRNCYSTR' in $utf8_locale is a UTF8 string");
$found_monetary = 1;
ok(utf8::is_utf8($symbol), "The name for 'CRNCYSTR' in $utf8_locale is a UTF8 string. Got:\n" . disp_str($symbol));
#$found_monetary = 1;
}
}

last if $found_monetary && $found_time;

# Remove this locale from the list, and loop to find another utf8
# locale
@locales = grep { $_ ne $utf8_locale } @locales;
}

if ($found_time + $found_monetary < 2) {
Expand All @@ -149,3 +215,5 @@ SKIP: {
skip("Couldn't find a locale with a non-ascii $message", 2 - $found_time - $found_monetary);
}
}

done_testing();
4 changes: 2 additions & 2 deletions ext/POSIX/POSIX.xs
Expand Up @@ -3181,7 +3181,7 @@ mblen(s, n = ~0)
}
else {
size_t len;
char * string = SvPV(byte_s, len);
char * string = SvPVbyte(byte_s, len);
if (n < len) len = n;
#ifdef USE_MBRLEN
MBRLEN_LOCK_;
Expand Down Expand Up @@ -3223,7 +3223,7 @@ mbtowc(pwc, s, n = ~0)
}
else {
size_t len;
char * string = SvPV(byte_s, len);
char * string = SvPVbyte(byte_s, len);
if (n < len) len = n;
RETVAL = mbtowc_(&wc, string, len);
if (RETVAL >= 0) {
Expand Down
9 changes: 7 additions & 2 deletions intrpvar.h
Expand Up @@ -724,9 +724,9 @@ PERLVAR(I, padix_floor, PADOFFSET) /* how low may inner block reset padix */
PERLVARA(I, curlocales, 12, const char *)

#endif
#ifndef USE_THREAD_SAFE_LOCALE_EMULATION

PERLVARI(I, perl_controls_locale, bool, 1)
#endif

#ifdef USE_LOCALE_COLLATE

/* The emory needed to store the collxfrm transformation of a string with
Expand Down Expand Up @@ -756,6 +756,11 @@ PERLVARI(I, setlocale_bufsize, Size_t, 0)
PERLVARI(I, stdize_locale_buf, const char *, NULL)
PERLVARI(I, stdize_locale_bufsize, Size_t, 0)

#ifdef WIN32
PERLVARI(I, win32setlocale_buf, const char *, NULL)
PERLVARI(I, win32setlocale_bufsize, Size_t, 0)
#endif

#ifdef PERL_SAWAMPERSAND
PERLVAR(I, sawampersand, U8) /* must save all match strings */
#endif
Expand Down
4 changes: 2 additions & 2 deletions lib/locale.t
Expand Up @@ -59,8 +59,8 @@ use I18N::Langinfo qw(langinfo CODESET CRNCYSTR RADIXCHAR);
# =1 adds debugging output; =2 increases the verbosity somewhat
our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
if ($^O =~ /cygwin | darwin /xi) {
$debug = 1 ;
$^D = 0x04000000|0x00100000;# if $^O eq 'cygwin';
$debug = 0; # 1 ;
$^D = 0x04000000|0x00100000 if $debug;# if $^O eq 'cygwin';
}

# Certain tests have been shown to be problematical for a few locales. Don't
Expand Down

0 comments on commit 679fc1c

Please sign in to comment.