Skip to content

Commit

Permalink
Move the tests for split /\s/ and Unicode from split.t to split_unico…
Browse files Browse the repository at this point in the history
…de.t

Skip split_unicode.t under minitest, as it uses charnames, which uses
File::Spec, which may not be available.

[Experimentation reveals that git blame by default won't attribute lines past
this commit, unless --find-copies-harder is used. The alternative approach,
copy t/op/split.t as t/op/split_unicode.t as 1 commit, then prune both, fares
no better - by default git blame can't attribute through that *either*.
Again, --find-copies-harder works. Hence, do the partition as this one commit,
as it is simpler, and all other things are equal]
  • Loading branch information
nwc10 committed Mar 6, 2011
1 parent cedc31d commit ab08a36
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 57 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Expand Up @@ -4950,6 +4950,7 @@ t/op/smartmatch.t See if the ~~ operator works
t/op/sort.t See if sort works
t/op/splice.t See if splice works
t/op/split.t See if split works
t/op/split_unicode.t Test split /\s/ and Unicode
t/op/sprintf2.t See if sprintf works
t/op/sprintf.t See if sprintf works
t/op/srand.t See if srand works
Expand Down
58 changes: 1 addition & 57 deletions t/op/split.t
Expand Up @@ -6,7 +6,7 @@ BEGIN {
require './test.pl';
}

plan tests => 252;
plan tests => 102;

$FS = ':';

Expand Down Expand Up @@ -388,62 +388,6 @@ is($cnt, scalar(@ary));
$x = \$a[2];
is (ref $x, 'SCALAR', '#28938 - garbage after extend');
}
{
# check the special casing of split /\s/ and unicode
use charnames qw(:full);
# below test data is extracted from
# PropList-5.0.0.txt
# Date: 2006-06-07, 23:22:52 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2006 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
my @spaces=(
ord("\t"), # Cc <control-0009>
ord("\n"), # Cc <control-000A>
# not PerlSpace # Cc <control-000B>
ord("\f"), # Cc <control-000C>
ord("\r"), # Cc <control-000D>
ord(" "), # Zs SPACE
ord("\N{NEL}"), # Cc <control-0085>
ord("\N{NO-BREAK SPACE}"),
# Zs NO-BREAK SPACE
0x1680, # Zs OGHAM SPACE MARK
0x180E, # Zs MONGOLIAN VOWEL SEPARATOR
0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE
0x2028, # Zl LINE SEPARATOR
0x2029, # Zp PARAGRAPH SEPARATOR
0x202F, # Zs NARROW NO-BREAK SPACE
0x205F, # Zs MEDIUM MATHEMATICAL SPACE
0x3000 # Zs IDEOGRAPHIC SPACE
);
#diag "Have @{[0+@spaces]} to test\n";
foreach my $cp (@spaces) {
my $msg = sprintf "Space: U+%04x", $cp;
my $space = chr($cp);
my $str="A:$space:B\x{FFFD}";
chop $str;

my @res=split(/\s+/,$str);
my $cnt=split(/\s+/,$str);
ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/");
is($cnt, scalar(@res), "$msg - /\\s+/ (count)");

my $s2 = "$space$space:A:$space$space:B\x{FFFD}";
chop $s2;

my @r2 = split(' ',$s2);
my $c2 = split(' ',$s2);
ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '");
is($c2, scalar(@r2), "$msg - ' ' (count)");

my @r3 = split(/\s+/, $s2);
my $c3 = split(/\s+/, $s2);
ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2");
is($c3, scalar(@r3), "$msg - /\\s+/ No.2 (count)");
}
}

{
my $src = "ABC \0 FOO \0 XYZ";
Expand Down
64 changes: 64 additions & 0 deletions t/op/split_unicode.t
@@ -0,0 +1,64 @@
#!./perl

BEGIN {
require './test.pl';
skip_all_if_miniperl("no dynamic loading on miniperl, no File::Spec (used by charnames)");
plan(tests => 150);
}

{
# check the special casing of split /\s/ and unicode
use charnames qw(:full);
# below test data is extracted from
# PropList-5.0.0.txt
# Date: 2006-06-07, 23:22:52 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2006 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
my @spaces=(
ord("\t"), # Cc <control-0009>
ord("\n"), # Cc <control-000A>
# not PerlSpace # Cc <control-000B>
ord("\f"), # Cc <control-000C>
ord("\r"), # Cc <control-000D>
ord(" "), # Zs SPACE
ord("\N{NEL}"), # Cc <control-0085>
ord("\N{NO-BREAK SPACE}"),
# Zs NO-BREAK SPACE
0x1680, # Zs OGHAM SPACE MARK
0x180E, # Zs MONGOLIAN VOWEL SEPARATOR
0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE
0x2028, # Zl LINE SEPARATOR
0x2029, # Zp PARAGRAPH SEPARATOR
0x202F, # Zs NARROW NO-BREAK SPACE
0x205F, # Zs MEDIUM MATHEMATICAL SPACE
0x3000 # Zs IDEOGRAPHIC SPACE
);
#diag "Have @{[0+@spaces]} to test\n";
foreach my $cp (@spaces) {
my $msg = sprintf "Space: U+%04x", $cp;
my $space = chr($cp);
my $str="A:$space:B\x{FFFD}";
chop $str;

my @res=split(/\s+/,$str);
my $cnt=split(/\s+/,$str);
ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/");
is($cnt, scalar(@res), "$msg - /\\s+/ (count)");

my $s2 = "$space$space:A:$space$space:B\x{FFFD}";
chop $s2;

my @r2 = split(' ',$s2);
my $c2 = split(' ',$s2);
ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '");
is($c2, scalar(@r2), "$msg - ' ' (count)");

my @r3 = split(/\s+/, $s2);
my $c3 = split(/\s+/, $s2);
ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2");
is($c3, scalar(@r3), "$msg - /\\s+/ No.2 (count)");
}
}

0 comments on commit ab08a36

Please sign in to comment.