Skip to content

Commit

Permalink
Tell mktables what Unicode version mk_invlist.pl handles
Browse files Browse the repository at this point in the history
A downside of supporting the Unicode break properties like \b{gcb},
\b{lb} is that these aren't very mature in the Standard, and so code
likely has to change when updating Perl to support a new version of the
Standard.

And the new rules may not be backwards compatible.  This commit creates
a mechanism to tell mktables the Unicode version that the rules are
written for.  If that is not the same version as being compiled, the
test file marks any failing boundary tests as TODO, and outputs a
warning if the compiled version is later than the code expects, to
alert you to the fact that the code needs to be updated.
  • Loading branch information
khwilliamson committed Jun 22, 2016
1 parent b6c0faa commit 6295dc1
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 10 deletions.
2 changes: 1 addition & 1 deletion charclass_invlists.h
Expand Up @@ -87887,7 +87887,7 @@ static const U8 WB_table[19][19] = {
* 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
* 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
* a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
* 2e9c8c898fd78231c21ff0da9facb8d231bf419bde94dc63075dff904be4f5f7 lib/unicore/mktables
* 45321b549a605b65ead1e83cdb90fdd9c5a6c8731a537197f335bab251b4e778 lib/unicore/mktables
* 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
* 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
* 12bd58cb9d5a99f631ca95e269f7f9c90dacaf81020efa5d95a995f3cdc19200 regen/mk_invlists.pl
Expand Down
56 changes: 48 additions & 8 deletions lib/unicore/mktables
Expand Up @@ -36,6 +36,17 @@ my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/;

sub NON_ASCII_PLATFORM { ord("A") != 65 }

# When a new version of Unicode is published, unfortunately the algorithms for
# dealing with various bounds, like \b{gcb}, \b{lb} may have to be updated
# manually. The changes may or may not be backward compatible with older
# releases. The code is in regen/mk_invlist.pl and regexec.c. Make the
# changes, then come back here and set the variable below to what version the
# code is expecting. If a newer version of Unicode is being compiled than
# expected, a warning will be generated. If an older version is being
# compiled, any bounds tests that fail in the generated test file (-maketest
# option) will be marked as TODO.
my $version_of_mk_invlist_bounds = v8.0.0;

##########################################################################
#
# mktables -- create the runtime Perl Unicode files (lib/unicore/.../*.pl),
Expand Down Expand Up @@ -18788,9 +18799,16 @@ sub make_property_test_script() {
$property->DESTROY();
}

# Make any test of the boundary (break) properties TODO if the code
# doesn't match the version being compiled
my $TODO_FAILING_BREAKS = ($version_of_mk_invlist_bounds ne $v_version)
? "\nsub TODO_FAILING_BREAKS { 1 }\n"
: "\nsub TODO_FAILING_BREAKS { 0 }\n";

&write($t_path,
0, # Not utf8;
[$HEADER,
$TODO_FAILING_BREAKS,
<DATA>,
@output,
(map {"Test_GCB('$_');\n"} @backslash_X_tests),
Expand Down Expand Up @@ -19721,6 +19739,13 @@ if ($verbosity >= $NORMAL_VERBOSITY && ! $debug_skip) {
}
print "\nAll done\n" if $verbosity >= $VERBOSE;
}

if ($version_of_mk_invlist_bounds lt $v_version) {
Carp::my_carp("WARNING: \\b{} algorithms (regen/mk_invlist.pl) need"
. " to be checked and possibly updated to Unicode"
. " $string_version");
}

exit(0);

# TRAILING CODE IS USED BY make_property_test_script()
Expand Down Expand Up @@ -20015,12 +20040,21 @@ sub _test_break($$) {
my $pattern = "(?$modifier:$break_pattern)";

# Actually do the test
my $matched_text;
my $matched = $string =~ qr/$pattern/;
print "not " unless $matched;
if ($matched) {
$matched_text = "matched";
}
else {
$matched_text = "failed to match";
print "not ";

# Fancy display of test results
$matched = ($matched) ? "matched" : "failed to match";
print "ok ", ++$Tests, " - \"$display_string\" $matched /$pattern/$display_upgrade; line $line $display_locale$comment\n";
if (TODO_FAILING_BREAKS) {
$comment = " # $comment" unless $comment =~ / ^ \s* \# /x;
$comment =~ s/#/# TODO/;
}
}
print "ok ", ++$Tests, " - \"$display_string\" $matched_text /$pattern/$display_upgrade; line $line $display_locale$comment\n";

# Only print the comment on the first use of this line
$comment = "";
Expand All @@ -20031,8 +20065,10 @@ sub _test_break($$) {
my $B_pattern = "$1$2";
$matched = $string =~ qr/$B_pattern/;
print "not " unless $matched;
$matched = ($matched) ? "matched" : "failed to match";
print "ok ", ++$Tests, " - \"$display_string\" $matched /$B_pattern/$display_upgrade; line $line $display_locale\n";
$matched_text = ($matched) ? "matched" : "failed to match";
print "ok ", ++$Tests, " - \"$display_string\" $matched_text /$B_pattern/$display_upgrade; line $line $display_locale";
print " # TODO" if TODO_FAILING_BREAKS && ! $matched;
print "\n";
}
}

Expand All @@ -20057,7 +20093,9 @@ sub _test_break($$) {
} else {
$matches[$i] = join("", map { sprintf "\\x{%04X}", ord $_ }
split "", $matches[$i]);
print "not ok $Tests - In \"$display_string\" =~ /(\\X)/g, \\X #",
print "not ok $Tests -";
print " # TODO" if TODO_FAILING_BREAKS;
print " In \"$display_string\" =~ /(\\X)/g, \\X #",
$i + 1,
" should have matched $should_display[$i]",
" but instead matched $matches[$i]",
Expand All @@ -20071,7 +20109,9 @@ sub _test_break($$) {
if (@matches == @should_match) {
print "ok $Tests - Nothing was left over; line $line\n";
} else {
print "not ok $Tests - There were ", scalar @should_match, " \\X matches expected, but got ", scalar @matches, " instead; line $line\n";
print "not ok $Tests - There were ", scalar @should_match, " \\X matches expected, but got ", scalar @matches, " instead; line $line";
print " # TODO" if TODO_FAILING_BREAKS;
print "\n";
}
}

Expand Down
2 changes: 1 addition & 1 deletion regcharclass.h
Expand Up @@ -1895,7 +1895,7 @@
* 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
* 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
* a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
* 2e9c8c898fd78231c21ff0da9facb8d231bf419bde94dc63075dff904be4f5f7 lib/unicore/mktables
* 45321b549a605b65ead1e83cdb90fdd9c5a6c8731a537197f335bab251b4e778 lib/unicore/mktables
* 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
* 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
* d9c04ac46bdd81bb3e26519f2b8eb6242cb12337205add3f7cf092b0c58dccc4 regen/regcharclass.pl
Expand Down

0 comments on commit 6295dc1

Please sign in to comment.