Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move the tests for split /\s/ and Unicode from split.t to split_unico…
…de.t Skip split_unicode.t under minitest, as it uses charnames, which uses File::Spec, which may not be available. [Experimentation reveals that git blame by default won't attribute lines past this commit, unless --find-copies-harder is used. The alternative approach, copy t/op/split.t as t/op/split_unicode.t as 1 commit, then prune both, fares no better - by default git blame can't attribute through that *either*. Again, --find-copies-harder works. Hence, do the partition as this one commit, as it is simpler, and all other things are equal]
- Loading branch information
Showing
3 changed files
with
66 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#!./perl | ||
|
||
BEGIN { | ||
require './test.pl'; | ||
skip_all_if_miniperl("no dynamic loading on miniperl, no File::Spec (used by charnames)"); | ||
plan(tests => 150); | ||
} | ||
|
||
{ | ||
# check the special casing of split /\s/ and unicode | ||
use charnames qw(:full); | ||
# below test data is extracted from | ||
# PropList-5.0.0.txt | ||
# Date: 2006-06-07, 23:22:52 GMT [MD] | ||
# | ||
# Unicode Character Database | ||
# Copyright (c) 1991-2006 Unicode, Inc. | ||
# For terms of use, see http://www.unicode.org/terms_of_use.html | ||
# For documentation, see UCD.html | ||
my @spaces=( | ||
ord("\t"), # Cc <control-0009> | ||
ord("\n"), # Cc <control-000A> | ||
# not PerlSpace # Cc <control-000B> | ||
ord("\f"), # Cc <control-000C> | ||
ord("\r"), # Cc <control-000D> | ||
ord(" "), # Zs SPACE | ||
ord("\N{NEL}"), # Cc <control-0085> | ||
ord("\N{NO-BREAK SPACE}"), | ||
# Zs NO-BREAK SPACE | ||
0x1680, # Zs OGHAM SPACE MARK | ||
0x180E, # Zs MONGOLIAN VOWEL SEPARATOR | ||
0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE | ||
0x2028, # Zl LINE SEPARATOR | ||
0x2029, # Zp PARAGRAPH SEPARATOR | ||
0x202F, # Zs NARROW NO-BREAK SPACE | ||
0x205F, # Zs MEDIUM MATHEMATICAL SPACE | ||
0x3000 # Zs IDEOGRAPHIC SPACE | ||
); | ||
#diag "Have @{[0+@spaces]} to test\n"; | ||
foreach my $cp (@spaces) { | ||
my $msg = sprintf "Space: U+%04x", $cp; | ||
my $space = chr($cp); | ||
my $str="A:$space:B\x{FFFD}"; | ||
chop $str; | ||
|
||
my @res=split(/\s+/,$str); | ||
my $cnt=split(/\s+/,$str); | ||
ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/"); | ||
is($cnt, scalar(@res), "$msg - /\\s+/ (count)"); | ||
|
||
my $s2 = "$space$space:A:$space$space:B\x{FFFD}"; | ||
chop $s2; | ||
|
||
my @r2 = split(' ',$s2); | ||
my $c2 = split(' ',$s2); | ||
ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '"); | ||
is($c2, scalar(@r2), "$msg - ' ' (count)"); | ||
|
||
my @r3 = split(/\s+/, $s2); | ||
my $c3 = split(/\s+/, $s2); | ||
ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2"); | ||
is($c3, scalar(@r3), "$msg - /\\s+/ No.2 (count)"); | ||
} | ||
} |