Upgraded Encode from 3.10_01 to 3.12

Perl · Aug 9, 2021 · 7167e19 · 7167e19
1 parent c275db8
commit 7167e19
Show file tree

Hide file tree

Showing 9 changed files with 589 additions and 32 deletions.
diff --git a/MANIFEST b/MANIFEST
@@ -636,10 +636,13 @@ cpan/Encode/t/truncated_utf8.t
 cpan/Encode/t/undef.t
 cpan/Encode/t/unibench.pl		benchmark script
 cpan/Encode/t/Unicode.t			test script
+cpan/Encode/t/Unicode_trailing_nul.t
 cpan/Encode/t/use-Encode-Alias.t
 cpan/Encode/t/utf8ref.t			test script
 cpan/Encode/t/utf8strict.t		test script
 cpan/Encode/t/utf8warnings.t
+cpan/Encode/t/whatwg-aliases.json
+cpan/Encode/t/whatwg-aliases.t
 cpan/Encode/t/xml.t
 cpan/Encode/TW/Makefile.PL		Encode extension
 cpan/Encode/TW/TW.pm			Encode extension

diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm
@@ -1,14 +1,13 @@
 #
-# $Id: Encode.pm,v 3.10 2021/05/18 07:42:45 dankogai Exp dankogai $
+# $Id: Encode.pm,v 3.12 2021/08/09 14:17:04 dankogai Exp dankogai $
 #
 package Encode;
 use strict;
 use warnings;
 use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
 our $VERSION;
 BEGIN {
-    $VERSION = "3.10_01";
-    $VERSION = eval $VERSION;
+    $VERSION = sprintf "%d.%02d", q$Revision: 3.12 $ =~ /(\d+)/g;
     require XSLoader;
     XSLoader::load( __PACKAGE__, $VERSION );
 }

diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm
@@ -3,7 +3,7 @@ package Encode::Unicode;
 use strict;
 use warnings;
 
-our $VERSION = do { my @r = ( q$Revision: 2.18 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.19 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 
 use XSLoader;
 XSLoader::load( __PACKAGE__, $VERSION );
@@ -259,7 +259,7 @@ Consider that "division by zero" of Encode :)
 =head1 SEE ALSO
 
 L<Encode>, L<Encode::Unicode::UTF7>, L<http://www.unicode.org/glossary/>,
-L<http://www.unicode.org/unicode/faq/utf_bom.html>,
+L<http://www.unicode.org/faq/utf_bom.html>,
 
 RFC 2781 L<http://www.ietf.org/rfc/rfc2781.txt>,
 

diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs
@@ -1,5 +1,5 @@
 /*
- $Id: Unicode.xs,v 2.19 2019/01/21 03:09:59 dankogai Exp $
+ $Id: Unicode.xs,v 2.20 2021/07/23 02:26:54 dankogai Exp $
  */
 
 #define IN_UNICODE_XS
@@ -361,6 +361,10 @@ CODE:
     }
 
     if (!temp_result) shrink_buffer(result);
+
+    /* Make sure we have a trailing NUL: */
+    *SvEND(result) = '\0';
+
     if (SvTAINTED(str)) SvTAINTED_on(result); /* propagate taintedness */
     XSRETURN(1);
 }

diff --git a/cpan/Encode/lib/Encode/GSM0338.pm b/cpan/Encode/lib/Encode/GSM0338.pm
@@ -1,5 +1,5 @@
 #
-# $Id: GSM0338.pm,v 2.9 2020/12/02 01:28:17 dankogai Exp dankogai $
+# $Id: GSM0338.pm,v 2.10 2021/05/24 10:56:53 dankogai Exp $
 #
 package Encode::GSM0338;
 
@@ -8,7 +8,7 @@ use warnings;
 use Carp;
 
 use vars qw($VERSION);
-$VERSION = do { my @r = ( q$Revision: 2.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+$VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 
 use Encode qw(:fallbacks);
 
@@ -159,12 +159,12 @@ our %UNI2GSM = (
     "\x{20AC}" => "\x1B\x65",    # EURO SIGN
 );
 our %GSM2UNI = reverse %UNI2GSM;
-our $ESC    = "\x1b";
+our $ESC     = "\x1b";
 
 sub decode ($$;$) {
     my ( $obj, $bytes, $chk ) = @_;
     return undef unless defined $bytes;
-    my $str = substr($bytes, 0, 0); # to propagate taintedness;
+    my $str = substr( $bytes, 0, 0 );    # to propagate taintedness;
     while ( length $bytes ) {
         my $seq = '';
         my $c;
@@ -173,53 +173,57 @@ sub decode ($$;$) {
             $seq .= $c;
         } while ( length $bytes and $c eq $ESC );
         my $u =
-            exists $GSM2UNI{$seq}
-            ? $GSM2UNI{$seq}
-            : ($chk && ref $chk eq 'CODE')
-            ? $chk->( unpack 'C*', $seq )
-            : "\x{FFFD}";
+            exists $GSM2UNI{$seq}          ? $GSM2UNI{$seq}
+          : ( $chk && ref $chk eq 'CODE' ) ? $chk->( unpack 'C*', $seq )
+          :                                  "\x{FFFD}";
         if ( not exists $GSM2UNI{$seq} and $chk and not ref $chk ) {
-            if ( substr($seq, 0, 1) eq $ESC and ($chk & Encode::STOP_AT_PARTIAL) ) {
+            if ( substr( $seq, 0, 1 ) eq $ESC
+                and ( $chk & Encode::STOP_AT_PARTIAL ) )
+            {
                 $bytes .= $seq;
                 last;
             }
-            croak join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) . ' does not map to Unicode' if $chk & Encode::DIE_ON_ERR;
-            carp join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) . ' does not map to Unicode' if $chk & Encode::WARN_ON_ERR;
-            if ($chk & Encode::RETURN_ON_ERR) {
+            croak join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq )
+              . ' does not map to Unicode'
+              if $chk & Encode::DIE_ON_ERR;
+            carp join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq )
+              . ' does not map to Unicode'
+              if $chk & Encode::WARN_ON_ERR;
+            if ( $chk & Encode::RETURN_ON_ERR ) {
                 $bytes .= $seq;
                 last;
             }
         }
         $str .= $u;
     }
-    $_[1] = $bytes if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+    $_[1] = $bytes if not ref $chk and $chk and !( $chk & Encode::LEAVE_SRC );
     return $str;
 }
 
 sub encode($$;$) {
     my ( $obj, $str, $chk ) = @_;
     return undef unless defined $str;
-    my $bytes = substr($str, 0, 0); # to propagate taintedness
+    my $bytes = substr( $str, 0, 0 );    # to propagate taintedness
     while ( length $str ) {
         my $u = substr( $str, 0, 1, '' );
         my $c;
         my $seq =
-            exists $UNI2GSM{$u}
-            ? $UNI2GSM{$u}
-            : ($chk && ref $chk eq 'CODE')
-            ? $chk->( ord($u) )
-            : $UNI2GSM{'?'};
+            exists $UNI2GSM{$u}            ? $UNI2GSM{$u}
+          : ( $chk && ref $chk eq 'CODE' ) ? $chk->( ord($u) )
+          :                                  $UNI2GSM{'?'};
         if ( not exists $UNI2GSM{$u} and $chk and not ref $chk ) {
-            croak sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) if $chk & Encode::DIE_ON_ERR;
-            carp sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) if $chk & Encode::WARN_ON_ERR;
-            if ($chk & Encode::RETURN_ON_ERR) {
+            croak sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name )
+              if $chk & Encode::DIE_ON_ERR;
+            carp sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name )
+              if $chk & Encode::WARN_ON_ERR;
+            if ( $chk & Encode::RETURN_ON_ERR ) {
                 $str .= $u;
                 last;
             }
         }
         $bytes .= $seq;
     }
-    $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+    $_[1] = $str if not ref $chk and $chk and !( $chk & Encode::LEAVE_SRC );
     return $bytes;
 }
 

diff --git a/cpan/Encode/t/Unicode.t b/cpan/Encode/t/Unicode.t
@@ -1,5 +1,5 @@
 #
-# $Id: Unicode.t,v 2.3 2012/08/05 23:08:49 dankogai Exp $
+# $Id: Unicode.t,v 2.4 2021/07/23 02:26:54 dankogai Exp $
 #
 # This script is written entirely in ASCII, even though quoted literals
 # do include non-BMP unicode characters -- Are you happy, jhi?
@@ -25,7 +25,7 @@ use Encode qw(encode decode find_encoding);
 
 #
 # see
-# http://www.unicode.org/unicode/reports/tr19/
+# http://www.unicode.org/reports/tr19/
 #
 
 my $dankogai   = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}";

diff --git a/cpan/Encode/t/Unicode_trailing_nul.t b/cpan/Encode/t/Unicode_trailing_nul.t
@@ -0,0 +1,26 @@
+use strict;
+use Test::More;
+
+use Encode;
+use File::Temp;
+use File::Spec;
+
+# This test relies on https://github.com/Perl/perl5/issues/10623;
+# if that bug is ever fixed then this test may never fail again.
+
+my $foo = Encode::decode("UTF-16LE", "/\0v\0a\0r\0/\0f\0f\0f\0f\0f\0f\0/\0u\0s\0e\0r\0s\0/\0s\0u\0p\0e\0r\0m\0a\0n\0");
+
+my ($fh, $path) = File::Temp::tempfile( CLEANUP => 1 );
+
+diag "temp file: $path";
+
+# Perl gives the internal PV to exec .. which is buggy/wrong but
+# useful here:
+system( $^X, '-e', "open my \$fh, '>>', '$path' or die \$!; print {\$fh} \$ARGV[0]", $foo );
+die if $?;
+
+my $output = do { local $/; <$fh> };
+
+is( $output, "/var/ffffff/users/superman", 'UTF-16 decodes with trailing NUL' );
+
+done_testing();