Permalink
Browse files

Fix overflow in uniprop lookups. Closes issue #566

Use MVMint64 instead of MVMGrapheme32 so we don't get an overflow.

In ucd2c.pl: fix some syntax errors. I am not sure why they are
showing now and not earlier. This was on Perl v5.24.1
  • Loading branch information...
samcv committed Jul 7, 2017
1 parent f621f21 commit a3e986924ae26aae1e7f2352843b631a49cbb34e
Showing with 29 additions and 29 deletions.
  1. +5 −5 src/strings/ops.h
  2. +5 −5 src/strings/unicode_db.c
  3. +5 −5 src/strings/unicode_ops.c
  4. +14 −14 tools/ucd2c.pl
@@ -78,11 +78,11 @@ MVMObject * MVM_string_split(MVMThreadContext *tc, MVMString *separator, MVMStri
MVMString * MVM_string_join(MVMThreadContext *tc, MVMString *separator, MVMObject *input);
MVMint64 MVM_string_char_at_in_string(MVMThreadContext *tc, MVMString *a, MVMint64 offset, MVMString *b);
MVMint64 MVM_string_offset_has_unicode_property_value(MVMThreadContext *tc, MVMString *s, MVMint64 offset, MVMint64 property_code, MVMint64 property_value_code);
MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMGrapheme32 grapheme, MVMint64 property_code, MVMint64 property_value_code);
MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMGrapheme32 grapheme, MVMint64 property_code);
const char * MVM_unicode_codepoint_get_property_cstr(MVMThreadContext *tc, MVMGrapheme32 grapheme, MVMint64 property_code);
MVMint64 MVM_unicode_codepoint_get_property_int(MVMThreadContext *tc, MVMGrapheme32 grapheme, MVMint64 property_code);
MVMint64 MVM_unicode_codepoint_get_property_bool(MVMThreadContext *tc, MVMGrapheme32 grapheme, MVMint64 property_code);
MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code, MVMint64 property_value_code);
MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code);
const char * MVM_unicode_codepoint_get_property_cstr(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code);
MVMint64 MVM_unicode_codepoint_get_property_int(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code);
MVMint64 MVM_unicode_codepoint_get_property_bool(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code);
MVMString * MVM_unicode_get_name(MVMThreadContext *tc, MVMint64 grapheme);
MVMString * MVM_string_indexing_optimized(MVMThreadContext *tc, MVMString *s);
MVMString * MVM_string_escape(MVMThreadContext *tc, MVMString *s);
@@ -66837,11 +66837,11 @@ static char *NFG_QC_enums[3] = {
};
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint32 codepoint);
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint64 codepoint);
static const char *bogus = "<BOGUS>"; /* only for table too short; return null string for no mapping */
static const char* MVM_unicode_get_property_str(MVMThreadContext *tc, MVMint32 codepoint, MVMint64 property_code) {
static const char* MVM_unicode_get_property_str(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
MVMuint32 switch_val = (MVMuint32)property_code;
MVMint32 result_val = 0; /* we'll never have negatives, but so */
MVMuint32 codepoint_row = MVM_codepoint_to_row_index(tc, codepoint);
@@ -66928,7 +66928,7 @@ static const char* MVM_unicode_get_property_str(MVMThreadContext *tc, MVMint32 c
}
}
static MVMint32 MVM_unicode_get_property_int(MVMThreadContext *tc, MVMint32 codepoint, MVMint64 property_code) {
static MVMint32 MVM_unicode_get_property_int(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
MVMuint32 switch_val = (MVMuint32)property_code;
MVMuint32 codepoint_row = MVM_codepoint_to_row_index(tc, codepoint);
MVMuint16 bitfield_row;
@@ -67574,12 +67574,12 @@ MVMint32 MVM_unicode_is_in_block(MVMThreadContext *tc, MVMString *str, MVMint64
return in_block;
}
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint32 codepoint) {
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint64 codepoint) {
MVMint32 plane = codepoint >> 16;
if (codepoint < 0) {
MVM_exception_throw_adhoc(tc, "Error, MoarVM cannot get Unicode codepoint property for synthetic codepoint %i", codepoint);
MVM_exception_throw_adhoc(tc, "Error, MoarVM cannot get Unicode codepoint property for synthetic codepoint %"PRId64"", codepoint);
}
if (plane == 0) {
@@ -240,7 +240,7 @@ MVMString * MVM_unicode_get_name(MVMThreadContext *tc, MVMint64 codepoint) {
return MVM_string_ascii_decode(tc, tc->instance->VMString, name, strlen(name));
}
MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint64 property_code) {
MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
const char * const str = MVM_unicode_get_property_str(tc, codepoint, property_code);
if (!str)
@@ -249,23 +249,23 @@ MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMGrap
return MVM_string_ascii_decode(tc, tc->instance->VMString, str, strlen(str));
}
const char * MVM_unicode_codepoint_get_property_cstr(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint64 property_code) {
const char * MVM_unicode_codepoint_get_property_cstr(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
return MVM_unicode_get_property_str(tc, codepoint, property_code);
}
MVMint64 MVM_unicode_codepoint_get_property_int(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint64 property_code) {
MVMint64 MVM_unicode_codepoint_get_property_int(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
if (property_code == 0)
return 0;
return (MVMint64)MVM_unicode_get_property_int(tc, codepoint, property_code);
}
MVMint64 MVM_unicode_codepoint_get_property_bool(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint64 property_code) {
MVMint64 MVM_unicode_codepoint_get_property_bool(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
if (property_code == 0)
return 0;
return (MVMint64)MVM_unicode_get_property_int(tc, codepoint, property_code) != 0;
}
MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint64 property_code, MVMint64 property_value_code) {
MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code, MVMint64 property_value_code) {
if (property_code == 0)
return 0;
return (MVMint64)MVM_unicode_get_property_int(tc,
@@ -17,7 +17,7 @@
my $DEBUG = $ENV{UCD2CDEBUG} // 0;
my @name_lines;
if $DEBUG {
if ($DEBUG) {
open(LOG, ">extents") or die "can't create extents: $!";
binmode LOG, ':encoding(UTF-8)';
}
@@ -124,16 +124,16 @@ sub main {
tweak_nfg_qc();
# Allocate all the things
progress "done.\nallocating bitfield...";
progress("done.\nallocating bitfield...");
my $allocated_properties = allocate_bitfield();
# Compute all the things
progress "done.\ncomputing all properties...";
progress("done.\ncomputing all properties...");
compute_properties($allocated_properties);
# Make the things less
progress "...done.\ncomputing collapsed properties table...";
progress("...done.\ncomputing collapsed properties table...");
compute_bitfield($first_point);
# Emit all the things
progress "...done.\nemitting unicode_db.c...";
progress("...done.\nemitting unicode_db.c...");
emit_bitfield($first_point);
$extents = emit_codepoints_and_planes($first_point);
emit_case_changes($first_point);
@@ -338,7 +338,7 @@ sub least_int_ge_lg2 {
sub each_line {
my ($fname, $fn, $force) = @_;
progress "done.\nprocessing $fname.txt...";
progress("done.\nprocessing $fname.txt...");
map {
chomp;
$fn->($_) unless !$force && /^(?:#|\s*$)/;
@@ -632,22 +632,22 @@ sub emit_codepoint_row_lookup {
}
$i++;
}
my $out = "static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint32 codepoint) {\n
my $out = "static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint64 codepoint) {\n
MVMint32 plane = codepoint >> 16;
if (codepoint < 0) {
MVM_exception_throw_adhoc(tc, \"Error, MoarVM cannot get Unicode codepoint property for synthetic codepoint %i", codepoint);
MVM_exception_throw_adhoc(tc, \"Error, MoarVM cannot get Unicode codepoint property for synthetic codepoint \%\"PRId64\"\", codepoint);
}
if (plane == 0) {"
.emit_binary_search_algorithm($extents, 0, 1, $SMP_start - 1, " ")."
. emit_binary_search_algorithm($extents, 0, 1, $SMP_start - 1, " ") . "
}
else {
if (plane < 0 || plane > 16 || codepoint > 0x10FFFD) {
return -1;
}
else {".emit_binary_search_algorithm($extents, $SMP_start,
int(($SMP_start + scalar(@$extents)-1)/2), scalar(@$extents) - 1, " ")."
else {" . emit_binary_search_algorithm($extents, $SMP_start,
int(($SMP_start + scalar(@$extents)-1)/2), scalar(@$extents) - 1, " ") . "
}
}
}";
@@ -724,7 +724,7 @@ sub emit_property_value_lookup {
my $enumtables = "\n\n";
our $hout = "typedef enum {\n";
my $out = "
static MVMint32 MVM_unicode_get_property_int(MVMThreadContext *tc, MVMint32 codepoint, MVMint64 property_code) {
static MVMint32 MVM_unicode_get_property_int(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
MVMuint32 switch_val = (MVMuint32)property_code;
MVMuint32 codepoint_row = MVM_codepoint_to_row_index(tc, codepoint);
MVMuint16 bitfield_row;
@@ -738,11 +738,11 @@ sub emit_property_value_lookup {
case 0: return 0;";
my $eout = "
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint32 codepoint);
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint64 codepoint);
static const char *bogus = \"<BOGUS>\"; /* only for table too short; return null string for no mapping */
static const char* MVM_unicode_get_property_str(MVMThreadContext *tc, MVMint32 codepoint, MVMint64 property_code) {
static const char* MVM_unicode_get_property_str(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
MVMuint32 switch_val = (MVMuint32)property_code;
MVMint32 result_val = 0; /* we'll never have negatives, but so */
MVMuint32 codepoint_row = MVM_codepoint_to_row_index(tc, codepoint);

0 comments on commit a3e9869

Please sign in to comment.