Skip to content

Commit

Permalink
Another big speed-up for reading long lines.
Browse files Browse the repository at this point in the history
Just skip over buffers that can't possibly contain the separator, so
we save on expensively scanning them. With this and the previous
commit, the heap profiler reaches its prompt within a second or so
when reading in a 25MB heap profile.
  • Loading branch information
jnthn committed Apr 1, 2016
1 parent 7d59b00 commit b9afd77
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
25 changes: 24 additions & 1 deletion src/strings/decode_stream.c
Expand Up @@ -211,7 +211,9 @@ MVMString * MVM_string_decodestream_get_chars(MVMThreadContext *tc, MVMDecodeStr

/* Gets characters up until one of the specified separators is encountered. If
* we do not encounter it, returns 0. This may mean more input buffers are needed
* or that we reached the end of the stream. */
* or that we reached the end of the stream. Note that it assumes the separator
* will exist near the end of the buffer, if it occurs at all, due to decode
* streams looking for stoppers. */
static MVMint32 have_separator(MVMThreadContext *tc, MVMDecodeStreamChars *start_chars, MVMint32 start_pos,
MVMDecodeStreamSeparators *sep_spec, MVMint32 sep_idx, MVMint32 sep_graph_pos) {
MVMint32 sep_pos = 1;
Expand All @@ -236,6 +238,17 @@ static MVMint32 find_separator(MVMThreadContext *tc, const MVMDecodeStream *ds,
MVMDecodeStreamSeparators *sep_spec, MVMint32 *sep_length) {
MVMint32 sep_loc = 0;
MVMDecodeStreamChars *cur_chars = ds->chars_head;

/* First, skip over any buffers we need not consider. */
MVMint32 max_sep_chars = MVM_string_decode_stream_sep_max_chars(tc, sep_spec);
while (cur_chars && cur_chars->next) {
if (cur_chars->next->length < max_sep_chars)
break;
sep_loc += cur_chars->length;
cur_chars = cur_chars->next;
}

/* Now scan for the separator. */
while (cur_chars) {
MVMint32 start = cur_chars == ds->chars_head ? ds->chars_head_pos : 0;
MVMint32 i, j;
Expand Down Expand Up @@ -515,6 +528,16 @@ void MVM_string_decode_stream_sep_from_strings(MVMThreadContext *tc, MVMDecodeSt
}
}

/* Rerturns the maximum length of any separator, in graphemes. */
MVMint32 MVM_string_decode_stream_sep_max_chars(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec) {
MVMint32 i;
MVMint32 max_length = 1;
for (i = 0; i < sep_spec->num_seps; i++)
if (sep_spec->sep_lengths[i] > max_length)
max_length = sep_spec->sep_lengths[i];
return max_length;
}

/* Cleans up memory associated with a stream separator set. */
void MVM_string_decode_stream_sep_destroy(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec) {
MVM_free(sep_spec->sep_lengths);
Expand Down
1 change: 1 addition & 0 deletions src/strings/decode_stream.h
Expand Up @@ -85,4 +85,5 @@ MVMint32 MVM_string_decodestream_is_empty(MVMThreadContext *tc, MVMDecodeStream
void MVM_string_decodestream_destory(MVMThreadContext *tc, MVMDecodeStream *ds);
void MVM_string_decode_stream_sep_default(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec);
void MVM_string_decode_stream_sep_from_strings(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec, MVMString **seps, MVMint32 num_seps);
MVMint32 MVM_string_decode_stream_sep_max_chars(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec);
void MVM_string_decode_stream_sep_destroy(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec);

0 comments on commit b9afd77

Please sign in to comment.