Skip to content

Commit

Permalink
Fix overflow in uniprop lookups. Closes issue #566
Browse files Browse the repository at this point in the history
Use MVMint64 instead of MVMGrapheme32 so we don't get an overflow.

In ucd2c.pl: fix some syntax errors. I am not sure why they are
showing now and not earlier. This was on Perl v5.24.1
  • Loading branch information
samcv committed Jul 7, 2017
1 parent f621f21 commit a3e9869
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 29 deletions.
10 changes: 5 additions & 5 deletions src/strings/ops.h
Expand Up @@ -78,11 +78,11 @@ MVMObject * MVM_string_split(MVMThreadContext *tc, MVMString *separator, MVMStri
MVMString * MVM_string_join(MVMThreadContext *tc, MVMString *separator, MVMObject *input);
MVMint64 MVM_string_char_at_in_string(MVMThreadContext *tc, MVMString *a, MVMint64 offset, MVMString *b);
MVMint64 MVM_string_offset_has_unicode_property_value(MVMThreadContext *tc, MVMString *s, MVMint64 offset, MVMint64 property_code, MVMint64 property_value_code);
MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMGrapheme32 grapheme, MVMint64 property_code, MVMint64 property_value_code);
MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMGrapheme32 grapheme, MVMint64 property_code);
const char * MVM_unicode_codepoint_get_property_cstr(MVMThreadContext *tc, MVMGrapheme32 grapheme, MVMint64 property_code);
MVMint64 MVM_unicode_codepoint_get_property_int(MVMThreadContext *tc, MVMGrapheme32 grapheme, MVMint64 property_code);
MVMint64 MVM_unicode_codepoint_get_property_bool(MVMThreadContext *tc, MVMGrapheme32 grapheme, MVMint64 property_code);
MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code, MVMint64 property_value_code);
MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code);
const char * MVM_unicode_codepoint_get_property_cstr(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code);
MVMint64 MVM_unicode_codepoint_get_property_int(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code);
MVMint64 MVM_unicode_codepoint_get_property_bool(MVMThreadContext *tc, MVMint64 grapheme, MVMint64 property_code);
MVMString * MVM_unicode_get_name(MVMThreadContext *tc, MVMint64 grapheme);
MVMString * MVM_string_indexing_optimized(MVMThreadContext *tc, MVMString *s);
MVMString * MVM_string_escape(MVMThreadContext *tc, MVMString *s);
Expand Down
10 changes: 5 additions & 5 deletions src/strings/unicode_db.c
Expand Up @@ -66837,11 +66837,11 @@ static char *NFG_QC_enums[3] = {
};


static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint32 codepoint);
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint64 codepoint);

static const char *bogus = "<BOGUS>"; /* only for table too short; return null string for no mapping */

static const char* MVM_unicode_get_property_str(MVMThreadContext *tc, MVMint32 codepoint, MVMint64 property_code) {
static const char* MVM_unicode_get_property_str(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
MVMuint32 switch_val = (MVMuint32)property_code;
MVMint32 result_val = 0; /* we'll never have negatives, but so */
MVMuint32 codepoint_row = MVM_codepoint_to_row_index(tc, codepoint);
Expand Down Expand Up @@ -66928,7 +66928,7 @@ static const char* MVM_unicode_get_property_str(MVMThreadContext *tc, MVMint32 c
}
}

static MVMint32 MVM_unicode_get_property_int(MVMThreadContext *tc, MVMint32 codepoint, MVMint64 property_code) {
static MVMint32 MVM_unicode_get_property_int(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
MVMuint32 switch_val = (MVMuint32)property_code;
MVMuint32 codepoint_row = MVM_codepoint_to_row_index(tc, codepoint);
MVMuint16 bitfield_row;
Expand Down Expand Up @@ -67574,12 +67574,12 @@ MVMint32 MVM_unicode_is_in_block(MVMThreadContext *tc, MVMString *str, MVMint64

return in_block;
}
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint32 codepoint) {
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint64 codepoint) {

MVMint32 plane = codepoint >> 16;

if (codepoint < 0) {
MVM_exception_throw_adhoc(tc, "Error, MoarVM cannot get Unicode codepoint property for synthetic codepoint %i", codepoint);
MVM_exception_throw_adhoc(tc, "Error, MoarVM cannot get Unicode codepoint property for synthetic codepoint %"PRId64"", codepoint);
}

if (plane == 0) {
Expand Down
10 changes: 5 additions & 5 deletions src/strings/unicode_ops.c
Expand Up @@ -240,7 +240,7 @@ MVMString * MVM_unicode_get_name(MVMThreadContext *tc, MVMint64 codepoint) {
return MVM_string_ascii_decode(tc, tc->instance->VMString, name, strlen(name));
}

MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint64 property_code) {
MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
const char * const str = MVM_unicode_get_property_str(tc, codepoint, property_code);

if (!str)
Expand All @@ -249,23 +249,23 @@ MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMGrap
return MVM_string_ascii_decode(tc, tc->instance->VMString, str, strlen(str));
}

const char * MVM_unicode_codepoint_get_property_cstr(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint64 property_code) {
const char * MVM_unicode_codepoint_get_property_cstr(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
return MVM_unicode_get_property_str(tc, codepoint, property_code);
}

MVMint64 MVM_unicode_codepoint_get_property_int(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint64 property_code) {
MVMint64 MVM_unicode_codepoint_get_property_int(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
if (property_code == 0)
return 0;
return (MVMint64)MVM_unicode_get_property_int(tc, codepoint, property_code);
}

MVMint64 MVM_unicode_codepoint_get_property_bool(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint64 property_code) {
MVMint64 MVM_unicode_codepoint_get_property_bool(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
if (property_code == 0)
return 0;
return (MVMint64)MVM_unicode_get_property_int(tc, codepoint, property_code) != 0;
}

MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint64 property_code, MVMint64 property_value_code) {
MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code, MVMint64 property_value_code) {
if (property_code == 0)
return 0;
return (MVMint64)MVM_unicode_get_property_int(tc,
Expand Down
28 changes: 14 additions & 14 deletions tools/ucd2c.pl
Expand Up @@ -17,7 +17,7 @@
my $DEBUG = $ENV{UCD2CDEBUG} // 0;

my @name_lines;
if $DEBUG {
if ($DEBUG) {
open(LOG, ">extents") or die "can't create extents: $!";
binmode LOG, ':encoding(UTF-8)';
}
Expand Down Expand Up @@ -124,16 +124,16 @@ sub main {
tweak_nfg_qc();

# Allocate all the things
progress "done.\nallocating bitfield...";
progress("done.\nallocating bitfield...");
my $allocated_properties = allocate_bitfield();
# Compute all the things
progress "done.\ncomputing all properties...";
progress("done.\ncomputing all properties...");
compute_properties($allocated_properties);
# Make the things less
progress "...done.\ncomputing collapsed properties table...";
progress("...done.\ncomputing collapsed properties table...");
compute_bitfield($first_point);
# Emit all the things
progress "...done.\nemitting unicode_db.c...";
progress("...done.\nemitting unicode_db.c...");
emit_bitfield($first_point);
$extents = emit_codepoints_and_planes($first_point);
emit_case_changes($first_point);
Expand Down Expand Up @@ -338,7 +338,7 @@ sub least_int_ge_lg2 {
sub each_line {
my ($fname, $fn, $force) = @_;
progress "done.\nprocessing $fname.txt...";
progress("done.\nprocessing $fname.txt...");
map {
chomp;
$fn->($_) unless !$force && /^(?:#|\s*$)/;
Expand Down Expand Up @@ -632,22 +632,22 @@ sub emit_codepoint_row_lookup {
}
$i++;
}
my $out = "static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint32 codepoint) {\n
my $out = "static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint64 codepoint) {\n
MVMint32 plane = codepoint >> 16;
if (codepoint < 0) {
MVM_exception_throw_adhoc(tc, \"Error, MoarVM cannot get Unicode codepoint property for synthetic codepoint %i", codepoint);
MVM_exception_throw_adhoc(tc, \"Error, MoarVM cannot get Unicode codepoint property for synthetic codepoint \%\"PRId64\"\", codepoint);
}
if (plane == 0) {"
.emit_binary_search_algorithm($extents, 0, 1, $SMP_start - 1, " ")."
. emit_binary_search_algorithm($extents, 0, 1, $SMP_start - 1, " ") . "
}
else {
if (plane < 0 || plane > 16 || codepoint > 0x10FFFD) {
return -1;
}
else {".emit_binary_search_algorithm($extents, $SMP_start,
int(($SMP_start + scalar(@$extents)-1)/2), scalar(@$extents) - 1, " ")."
else {" . emit_binary_search_algorithm($extents, $SMP_start,
int(($SMP_start + scalar(@$extents)-1)/2), scalar(@$extents) - 1, " ") . "
}
}
}";
Expand Down Expand Up @@ -724,7 +724,7 @@ sub emit_property_value_lookup {
my $enumtables = "\n\n";
our $hout = "typedef enum {\n";
my $out = "
static MVMint32 MVM_unicode_get_property_int(MVMThreadContext *tc, MVMint32 codepoint, MVMint64 property_code) {
static MVMint32 MVM_unicode_get_property_int(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
MVMuint32 switch_val = (MVMuint32)property_code;
MVMuint32 codepoint_row = MVM_codepoint_to_row_index(tc, codepoint);
MVMuint16 bitfield_row;
Expand All @@ -738,11 +738,11 @@ sub emit_property_value_lookup {
case 0: return 0;";
my $eout = "
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint32 codepoint);
static MVMint32 MVM_codepoint_to_row_index(MVMThreadContext *tc, MVMint64 codepoint);
static const char *bogus = \"<BOGUS>\"; /* only for table too short; return null string for no mapping */
static const char* MVM_unicode_get_property_str(MVMThreadContext *tc, MVMint32 codepoint, MVMint64 property_code) {
static const char* MVM_unicode_get_property_str(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
MVMuint32 switch_val = (MVMuint32)property_code;
MVMint32 result_val = 0; /* we'll never have negatives, but so */
MVMuint32 codepoint_row = MVM_codepoint_to_row_index(tc, codepoint);
Expand Down

0 comments on commit a3e9869

Please sign in to comment.