Skip to content

Commit

Permalink
Upgraded Encode from 3.10_01 to 3.12
Browse files Browse the repository at this point in the history
  • Loading branch information
rjbs committed Aug 9, 2021
1 parent c275db8 commit 7167e19
Show file tree
Hide file tree
Showing 9 changed files with 589 additions and 32 deletions.
3 changes: 3 additions & 0 deletions MANIFEST
Expand Up @@ -636,10 +636,13 @@ cpan/Encode/t/truncated_utf8.t
cpan/Encode/t/undef.t
cpan/Encode/t/unibench.pl benchmark script
cpan/Encode/t/Unicode.t test script
cpan/Encode/t/Unicode_trailing_nul.t
cpan/Encode/t/use-Encode-Alias.t
cpan/Encode/t/utf8ref.t test script
cpan/Encode/t/utf8strict.t test script
cpan/Encode/t/utf8warnings.t
cpan/Encode/t/whatwg-aliases.json
cpan/Encode/t/whatwg-aliases.t
cpan/Encode/t/xml.t
cpan/Encode/TW/Makefile.PL Encode extension
cpan/Encode/TW/TW.pm Encode extension
Expand Down
5 changes: 2 additions & 3 deletions cpan/Encode/Encode.pm
@@ -1,14 +1,13 @@
#
# $Id: Encode.pm,v 3.10 2021/05/18 07:42:45 dankogai Exp dankogai $
# $Id: Encode.pm,v 3.12 2021/08/09 14:17:04 dankogai Exp dankogai $
#
package Encode;
use strict;
use warnings;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
our $VERSION;
BEGIN {
$VERSION = "3.10_01";
$VERSION = eval $VERSION;
$VERSION = sprintf "%d.%02d", q$Revision: 3.12 $ =~ /(\d+)/g;
require XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
}
Expand Down
4 changes: 2 additions & 2 deletions cpan/Encode/Unicode/Unicode.pm
Expand Up @@ -3,7 +3,7 @@ package Encode::Unicode;
use strict;
use warnings;

our $VERSION = do { my @r = ( q$Revision: 2.18 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
our $VERSION = do { my @r = ( q$Revision: 2.19 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };

use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
Expand Down Expand Up @@ -259,7 +259,7 @@ Consider that "division by zero" of Encode :)
=head1 SEE ALSO
L<Encode>, L<Encode::Unicode::UTF7>, L<http://www.unicode.org/glossary/>,
L<http://www.unicode.org/unicode/faq/utf_bom.html>,
L<http://www.unicode.org/faq/utf_bom.html>,
RFC 2781 L<http://www.ietf.org/rfc/rfc2781.txt>,
Expand Down
6 changes: 5 additions & 1 deletion cpan/Encode/Unicode/Unicode.xs
@@ -1,5 +1,5 @@
/*
$Id: Unicode.xs,v 2.19 2019/01/21 03:09:59 dankogai Exp $
$Id: Unicode.xs,v 2.20 2021/07/23 02:26:54 dankogai Exp $
*/

#define IN_UNICODE_XS
Expand Down Expand Up @@ -361,6 +361,10 @@ CODE:
}

if (!temp_result) shrink_buffer(result);

/* Make sure we have a trailing NUL: */
*SvEND(result) = '\0';

if (SvTAINTED(str)) SvTAINTED_on(result); /* propagate taintedness */
XSRETURN(1);
}
Expand Down
52 changes: 28 additions & 24 deletions cpan/Encode/lib/Encode/GSM0338.pm
@@ -1,5 +1,5 @@
#
# $Id: GSM0338.pm,v 2.9 2020/12/02 01:28:17 dankogai Exp dankogai $
# $Id: GSM0338.pm,v 2.10 2021/05/24 10:56:53 dankogai Exp $
#
package Encode::GSM0338;

Expand All @@ -8,7 +8,7 @@ use warnings;
use Carp;

use vars qw($VERSION);
$VERSION = do { my @r = ( q$Revision: 2.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
$VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };

use Encode qw(:fallbacks);

Expand Down Expand Up @@ -159,12 +159,12 @@ our %UNI2GSM = (
"\x{20AC}" => "\x1B\x65", # EURO SIGN
);
our %GSM2UNI = reverse %UNI2GSM;
our $ESC = "\x1b";
our $ESC = "\x1b";

sub decode ($$;$) {
my ( $obj, $bytes, $chk ) = @_;
return undef unless defined $bytes;
my $str = substr($bytes, 0, 0); # to propagate taintedness;
my $str = substr( $bytes, 0, 0 ); # to propagate taintedness;
while ( length $bytes ) {
my $seq = '';
my $c;
Expand All @@ -173,53 +173,57 @@ sub decode ($$;$) {
$seq .= $c;
} while ( length $bytes and $c eq $ESC );
my $u =
exists $GSM2UNI{$seq}
? $GSM2UNI{$seq}
: ($chk && ref $chk eq 'CODE')
? $chk->( unpack 'C*', $seq )
: "\x{FFFD}";
exists $GSM2UNI{$seq} ? $GSM2UNI{$seq}
: ( $chk && ref $chk eq 'CODE' ) ? $chk->( unpack 'C*', $seq )
: "\x{FFFD}";
if ( not exists $GSM2UNI{$seq} and $chk and not ref $chk ) {
if ( substr($seq, 0, 1) eq $ESC and ($chk & Encode::STOP_AT_PARTIAL) ) {
if ( substr( $seq, 0, 1 ) eq $ESC
and ( $chk & Encode::STOP_AT_PARTIAL ) )
{
$bytes .= $seq;
last;
}
croak join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) . ' does not map to Unicode' if $chk & Encode::DIE_ON_ERR;
carp join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) . ' does not map to Unicode' if $chk & Encode::WARN_ON_ERR;
if ($chk & Encode::RETURN_ON_ERR) {
croak join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq )
. ' does not map to Unicode'
if $chk & Encode::DIE_ON_ERR;
carp join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq )
. ' does not map to Unicode'
if $chk & Encode::WARN_ON_ERR;
if ( $chk & Encode::RETURN_ON_ERR ) {
$bytes .= $seq;
last;
}
}
$str .= $u;
}
$_[1] = $bytes if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
$_[1] = $bytes if not ref $chk and $chk and !( $chk & Encode::LEAVE_SRC );
return $str;
}

sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
return undef unless defined $str;
my $bytes = substr($str, 0, 0); # to propagate taintedness
my $bytes = substr( $str, 0, 0 ); # to propagate taintedness
while ( length $str ) {
my $u = substr( $str, 0, 1, '' );
my $c;
my $seq =
exists $UNI2GSM{$u}
? $UNI2GSM{$u}
: ($chk && ref $chk eq 'CODE')
? $chk->( ord($u) )
: $UNI2GSM{'?'};
exists $UNI2GSM{$u} ? $UNI2GSM{$u}
: ( $chk && ref $chk eq 'CODE' ) ? $chk->( ord($u) )
: $UNI2GSM{'?'};
if ( not exists $UNI2GSM{$u} and $chk and not ref $chk ) {
croak sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) if $chk & Encode::DIE_ON_ERR;
carp sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) if $chk & Encode::WARN_ON_ERR;
if ($chk & Encode::RETURN_ON_ERR) {
croak sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name )
if $chk & Encode::DIE_ON_ERR;
carp sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name )
if $chk & Encode::WARN_ON_ERR;
if ( $chk & Encode::RETURN_ON_ERR ) {
$str .= $u;
last;
}
}
$bytes .= $seq;
}
$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
$_[1] = $str if not ref $chk and $chk and !( $chk & Encode::LEAVE_SRC );
return $bytes;
}

Expand Down
4 changes: 2 additions & 2 deletions cpan/Encode/t/Unicode.t
@@ -1,5 +1,5 @@
#
# $Id: Unicode.t,v 2.3 2012/08/05 23:08:49 dankogai Exp $
# $Id: Unicode.t,v 2.4 2021/07/23 02:26:54 dankogai Exp $
#
# This script is written entirely in ASCII, even though quoted literals
# do include non-BMP unicode characters -- Are you happy, jhi?
Expand All @@ -25,7 +25,7 @@ use Encode qw(encode decode find_encoding);

#
# see
# http://www.unicode.org/unicode/reports/tr19/
# http://www.unicode.org/reports/tr19/
#

my $dankogai = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}";
Expand Down
26 changes: 26 additions & 0 deletions cpan/Encode/t/Unicode_trailing_nul.t
@@ -0,0 +1,26 @@
use strict;
use Test::More;

use Encode;
use File::Temp;
use File::Spec;

# This test relies on https://github.com/Perl/perl5/issues/10623;
# if that bug is ever fixed then this test may never fail again.

my $foo = Encode::decode("UTF-16LE", "/\0v\0a\0r\0/\0f\0f\0f\0f\0f\0f\0/\0u\0s\0e\0r\0s\0/\0s\0u\0p\0e\0r\0m\0a\0n\0");

my ($fh, $path) = File::Temp::tempfile( CLEANUP => 1 );

diag "temp file: $path";

# Perl gives the internal PV to exec .. which is buggy/wrong but
# useful here:
system( $^X, '-e', "open my \$fh, '>>', '$path' or die \$!; print {\$fh} \$ARGV[0]", $foo );
die if $?;

my $output = do { local $/; <$fh> };

is( $output, "/var/ffffff/users/superman", 'UTF-16 decodes with trailing NUL' );

done_testing();

0 comments on commit 7167e19

Please sign in to comment.