Skip to content

Commit

Permalink
APItest/t/utf8.t: Simplify some tests
Browse files Browse the repository at this point in the history
The complicated nested loops of tests this commit replaces don't need to
be such.  To test utf8_is_invariant_string, we just need to put a single
variant in each position of a string that spans over full word (since we
have full-word lookup now) and the partial words on either side.  We set
those partial words up to be one byte each less than a full word.  The
code needs to work on strings that don't start on a full word, and don't
end on one, and this commit continues to do that.

An assert is added to the XS code to verify that we are indeed starting
something not on a full-word boundary.
  • Loading branch information
khwilliamson committed Dec 5, 2017
1 parent 2f6e257 commit 62e2a96
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 23 deletions.
4 changes: 4 additions & 0 deletions ext/XS-APItest/APItest.xs
Expand Up @@ -6018,6 +6018,10 @@ test_is_utf8_invariant_string_loc(char *s, STRLEN offset, STRLEN len)
AV *av;
const U8 * ep = NULL;
CODE:
/* 'offset' is used to get the string to not necessarily start on a
* word boundary. But in order to work properly, the string passed
* must start on such a boundary */
assert((PTR2nat(s) & (UVSIZE -1)) == 0);
av = newAV();
av_push(av, newSViv(is_utf8_invariant_string_loc((U8 *) s + offset, len, &ep)));
av_push(av, newSViv(ep - ((U8 *) s + offset)));
Expand Down
69 changes: 46 additions & 23 deletions ext/XS-APItest/t/utf8.t
Expand Up @@ -14,29 +14,52 @@ BEGIN {
$|=1;

use XS::APItest;


my $s = "A" x 100 ;
my $ret_ref = test_is_utf8_invariant_string_loc($s, 0, length $s);
is($ret_ref->[0], 1, "is_utf8_invariant_string_loc returns TRUE for invariant");

my $above_word_length = 9;
for my $initial (0 .. $above_word_length) {
for my $offset (0 .. $above_word_length) {
for my $trailing (0 .. $above_word_length) {
if ($initial >= $offset) {
my $variant_pos = $initial - $offset;
$s = "A" x $initial . "\x80" . "A" x $trailing;
my $ret_ref = test_is_utf8_invariant_string_loc($s, $offset,
length $s);
is($ret_ref->[0], 0, "is_utf8_invariant_string_loc returns"
. " FALSE for variant at $variant_pos,"
. " first $offset ignored)");
is($ret_ref->[1], $variant_pos,
" And returns the correct position");
}
}
}
use Config;
my $word_length = $Config{uvsize};

# Below we test some byte-oriented functions that look for UTF-8 variant bytes
# and can work on full words at a time. Hence this is not black box testing.
# We know how long a word is, and we set things up so that we test is a string
# that has a non-full word, then a full word, then a non-full word. This way
# we catch stragglers on either side of the full word. We rely on the XS test
# functions to make sure that the initial non-full-word doesn't start on a
# word boundary...

my $offset = 1; # ... rather, it should fill almost a word

# We choose an invariant and a variant that are at the edges. And, just in
# case the EBCDIC ever changes to do per-word, we choose arbitrarily an
# invariant that has most of its bits set natively, and a variant that has
# most unset. First create versions for display in the test names.
my $display_invariant = isASCII ? "7F" : sprintf "%02X", utf8::unicode_to_native(0x9F);
my $display_variant = isASCII ? "80" : sprintf "%02X", utf8::unicode_to_native(0xA0);
my $invariant = chr hex $display_invariant;
my $variant = chr hex $display_variant;

# We create a string with the correct number of bytes. The -1 is to make the
# final portion not fill a full word. (We use $offset to do the same in the
# XS code for the initial portion.)
my $string_length = 3 * $word_length - 1;
my $all_invariants = $invariant x $string_length;
my $display_all_invariants = $display_invariant x $string_length;

my $ret_ref = test_is_utf8_invariant_string_loc($all_invariants, $offset,
length $all_invariants);
is($ret_ref->[0], 1, "is_utf8_invariant_string_loc returns TRUE for "
. substr($display_all_invariants, 2 * $offset));

# Just create a string with a single variant, in all the possible positions.
for my $pos ($offset .. length($all_invariants) - 1) {
my $test_string = $all_invariants;
my $test_display = $display_all_invariants;

substr($test_string, $pos, 1) = $variant;
substr($test_display, $pos * 2, 2) = $display_variant;
my $ret_ref = test_is_utf8_invariant_string_loc($test_string, $offset,
length $test_string);
is($ret_ref->[0], 0, "is_utf8_invariant_string_loc returns FALSE for "
. substr($test_display, 2 * $offset));
is($ret_ref->[1], $pos - $offset, " And returns the correct position");
}

my $pound_sign = chr utf8::unicode_to_native(163);
Expand Down

0 comments on commit 62e2a96

Please sign in to comment.