Permalink
Browse files

Introduce a max final grapheme codepoint filter.

This means we can usually avoid a separator search loop entirely in
the common case where separators are control chars. In the Perl 6
million line file benchmark, where each line is 60 chars long, this
saves 845 million cycles. (How much it helps will depend on line
length; the longer the line, the more it will help.)
  • Loading branch information...
jnthn committed Jun 16, 2017
1 parent a8f2ac7 commit 8fa18578372819434cbc26901fe45e6f7e4c0f7c
Showing with 11 additions and 1 deletion.
  1. +4 −0 src/strings/decode_stream.c
  2. +7 −1 src/strings/decode_stream.h
@@ -543,6 +543,7 @@ void MVM_string_decodestream_destroy(MVMThreadContext *tc, MVMDecodeStream *ds)
* faster line reading. */
static void cache_sep_info(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec) {
MVMGrapheme32 *final_graphemes = MVM_malloc(sep_spec->num_seps * sizeof(MVMGrapheme32));
MVMint32 max_final_grapheme = -1;
MVMint32 max_sep_length = 1;
MVMint32 cur_sep_pos = 0;
MVMint32 i;
@@ -552,9 +553,12 @@ static void cache_sep_info(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_
max_sep_length = length;
cur_sep_pos += length;
final_graphemes[i] = sep_spec->sep_graphemes[cur_sep_pos - 1];
if (final_graphemes[i] > max_final_grapheme)
max_final_grapheme = final_graphemes[i];
}
sep_spec->max_sep_length = max_sep_length;
sep_spec->final_graphemes = final_graphemes;
sep_spec->max_final_grapheme = max_final_grapheme;
}
/* Sets a decode stream separator to its default value. */
@@ -61,14 +61,20 @@ struct MVMDecodeStreamSeparators {
/* Cached final graphemes, for quick lookups in stream_maybe_sep. */
MVMGrapheme32 *final_graphemes;
/* Since separators are most often control chars, we can quickly filter
* out many graphemes without a separator search by keeping around the
* maximum codepoint/synthetic index of any final grapheme and doing a
* quick comparison. */
MVMGrapheme32 max_final_grapheme;
};
/* Checks if we may have encountered one of the separators. This just looks to
* see if we hit the final grapheme of any of the separators, which is all we
* demand the actual encodings themselves work out (multi-grapheme separators
* are handled in the decode stream logic itself). */
MVM_STATIC_INLINE MVMint32 MVM_string_decode_stream_maybe_sep(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec, MVMGrapheme32 g) {
if (sep_spec) {
if (sep_spec && g <= sep_spec->max_final_grapheme) {
MVMint32 i;
for (i = 0; i < sep_spec->num_seps; i++)
if (sep_spec->final_graphemes[i] == g)

0 comments on commit 8fa1857

Please sign in to comment.