Permalink
Browse files

Replace all instances of ; in the rowparser loop with , ( ~15% speedup )

The functional diff best examined via `... --color-words -w lib ...`

This is another microoptimization, which is nevertheless measurable on 1000+
row resultsets. Investigation into this was prompted by a curious comment made
by bulk88[1] in response to xdg[2]:

  Every ";" is an op that executes that adjusts current line number

Given that we already generate all this code, and it is not intended to be
read by a human, swapping ; with , is only logical. A (very very crude) check
with the included benchmarker indeed shows a repeatable difference on 5.16.2

[1] http://www.nntp.perl.org/group/perl.perl5.porters/2015/05/msg228074.html
[2] http://www.nntp.perl.org/group/perl.perl5.porters/2015/05/msg228068.html
  • Loading branch information...
ribasushi committed May 20, 2015
1 parent 164aab8 commit 05a5ca4b941f0741e91d0972334ab96b96e13f4d
@@ -0,0 +1,83 @@
### BEGIN LITERAL STRING EVAL
my $rows_pos = 0;
my ($result_pos, @collapse_idx, $cur_row_data, %cur_row_ids );
# this loop is a bit arcane - the rationale is that the passed in
# $_[0] will either have only one row (->next) or will have all
# rows already pulled in (->all and/or unordered). Given that the
# result can be rather large - we reuse the same already allocated
# array, since the collapsed prefetch is smaller by definition.
# At the end we cut the leftovers away and move on.
while ($cur_row_data = (
(
$rows_pos >= 0
and
(
$_[0][$rows_pos++]
or
# It may be tempting to drop the -1 and undef $rows_pos instead
# thus saving the >= comparison above as well
# However NULL-handlers and underdefined root markers both use
# $rows_pos as a last-resort-uniqueness marker (it either is
# monotonically increasing while we parse ->all, or is set at
# a steady -1 when we are dealing with a single root node). For
# the time being the complication of changing all callsites seems
# overkill, for what is going to be a very modest saving of ops
( ($rows_pos = -1), undef )
)
)
or
( $_[1] and $_[1]->() )
) ) {
# the undef checks may or may not be there
# depending on whether we prune or not
#
# due to left joins some of the ids may be NULL/undef, and
# won't play well when used as hash lookups
# we also need to differentiate NULLs on per-row/per-col basis
# (otherwise folding of optional 1:1s will be greatly confused
( @cur_row_ids{( 0, 1, 5, 6, 8, 10 )} = (
@{$cur_row_data}[( 0, 1, 5, 6, 8, 10 )]
) ),
# in the case of an underdefined root - calculate the virtual id (otherwise no code at all)
# if we were supplied a coderef - we are collapsing lazily (the set
# is ordered properly)
# as long as we have a result already and the next result is new we
# return the pre-read data and bail
( $_[1] and $result_pos and ! $collapse_idx[0]{ $cur_row_ids{1} } and (unshift @{$_[2]}, $cur_row_data) and last ),
# the rel assemblers
( $collapse_idx[0]{ $cur_row_ids{1} } //= $_[0][$result_pos++] = [ { "genreid" => $cur_row_data->[4], "latest_cd" => $cur_row_data->[7], "year" => $cur_row_data->[3] } ] ),
( $collapse_idx[0]{ $cur_row_ids{1} }[1]{"existing_single_track"} //= $collapse_idx[1]{ $cur_row_ids{1} } = [ ] ),
( $collapse_idx[1]{ $cur_row_ids{1} }[1]{"cd"} //= $collapse_idx[2]{ $cur_row_ids{1} } = [ ] ),
( $collapse_idx[2]{ $cur_row_ids{1} }[1]{"artist"} //= $collapse_idx[3]{ $cur_row_ids{1} } = [ { "artistid" => $cur_row_data->[1] } ] ),
( ( ! defined $cur_row_data->[6] )
? $collapse_idx[3]{ $cur_row_ids{1} }[1]{"cds"} = []
: do {
( (! $collapse_idx[4]{ $cur_row_ids{1} }{ $cur_row_ids{6} }) and push @{$collapse_idx[3]{ $cur_row_ids{1} }[1]{"cds"}}, $collapse_idx[4]{ $cur_row_ids{1} }{ $cur_row_ids{6} } = [ { "cdid" => $cur_row_data->[6], "genreid" => $cur_row_data->[9], "year" => $cur_row_data->[2] } ] ),
( ( ! defined $cur_row_data->[8] )
? $collapse_idx[4]{ $cur_row_ids{1} }{ $cur_row_ids{6} }[1]{"tracks"} = []
: do {
( (! $collapse_idx[5]{ $cur_row_ids{1} }{ $cur_row_ids{6} }{ $cur_row_ids{8} }) and push @{$collapse_idx[4]{ $cur_row_ids{1} }{ $cur_row_ids{6} }[1]{"tracks"}}, $collapse_idx[5]{ $cur_row_ids{1} }{ $cur_row_ids{6} }{ $cur_row_ids{8} } = [ { "title" => $cur_row_data->[8] } ] ),
} ),
} ),
( ( ! defined $cur_row_data->[5] )
? $collapse_idx[0]{ $cur_row_ids{1} }[1]{"tracks"} = []
: do {
( (! $collapse_idx[6]{ $cur_row_ids{1} }{ $cur_row_ids{5} }) and push @{$collapse_idx[0]{ $cur_row_ids{1} }[1]{"tracks"}}, $collapse_idx[6]{ $cur_row_ids{1} }{ $cur_row_ids{5} } = [ { "title" => $cur_row_data->[5] } ] ),
( ( ! defined $cur_row_data->[10] )
? $collapse_idx[6]{ $cur_row_ids{1} }{ $cur_row_ids{5} }[1]{"lyrics"} = []
: do {
( $collapse_idx[6]{ $cur_row_ids{1} }{ $cur_row_ids{5} }[1]{"lyrics"} //= $collapse_idx[7]{ $cur_row_ids{1} }{ $cur_row_ids{5} }{ $cur_row_ids{10} } = [ ] ),
( (! $collapse_idx[8]{ $cur_row_ids{0} }{ $cur_row_ids{1} }{ $cur_row_ids{5} }{ $cur_row_ids{10} }) and push @{$collapse_idx[7]{ $cur_row_ids{1} }{ $cur_row_ids{5} }{ $cur_row_ids{10} }[1]{"existing_lyric_versions"}}, $collapse_idx[8]{ $cur_row_ids{0} }{ $cur_row_ids{1} }{ $cur_row_ids{5} }{ $cur_row_ids{10} } = [ { "lyric_id" => $cur_row_data->[10], "text" => $cur_row_data->[0] } ] ),
} ),
} ),
}
$#{$_[0]} = $result_pos - 1; # truncate the passed in array to where we filled it with results
### END LITERAL STRING EVAL
@@ -0,0 +1,83 @@
### BEGIN LITERAL STRING EVAL
my $rows_pos = 0;
my ($result_pos, @collapse_idx, $cur_row_data, %cur_row_ids );
# this loop is a bit arcane - the rationale is that the passed in
# $_[0] will either have only one row (->next) or will have all
# rows already pulled in (->all and/or unordered). Given that the
# result can be rather large - we reuse the same already allocated
# array, since the collapsed prefetch is smaller by definition.
# At the end we cut the leftovers away and move on.
while ($cur_row_data = (
(
$rows_pos >= 0
and
(
$_[0][$rows_pos++]
or
# It may be tempting to drop the -1 and undef $rows_pos instead
# thus saving the >= comparison above as well
# However NULL-handlers and underdefined root markers both use
# $rows_pos as a last-resort-uniqueness marker (it either is
# monotonically increasing while we parse ->all, or is set at
# a steady -1 when we are dealing with a single root node). For
# the time being the complication of changing all callsites seems
# overkill, for what is going to be a very modest saving of ops
( ($rows_pos = -1), undef )
)
)
or
( $_[1] and $_[1]->() )
) ) {
# the undef checks may or may not be there
# depending on whether we prune or not
#
# due to left joins some of the ids may be NULL/undef, and
# won't play well when used as hash lookups
# we also need to differentiate NULLs on per-row/per-col basis
# (otherwise folding of optional 1:1s will be greatly confused
@cur_row_ids{( 0, 1, 5, 6, 8, 10 )} = (
@{$cur_row_data}[( 0, 1, 5, 6, 8, 10 )]
);
# in the case of an underdefined root - calculate the virtual id (otherwise no code at all)
# if we were supplied a coderef - we are collapsing lazily (the set
# is ordered properly)
# as long as we have a result already and the next result is new we
# return the pre-read data and bail
$_[1] and $result_pos and ! $collapse_idx[0]{ $cur_row_ids{1} } and (unshift @{$_[2]}, $cur_row_data) and last;
# the rel assemblers
$collapse_idx[0]{ $cur_row_ids{1} } //= $_[0][$result_pos++] = [ { "genreid" => $cur_row_data->[4], "latest_cd" => $cur_row_data->[7], "year" => $cur_row_data->[3] } ];
$collapse_idx[0]{ $cur_row_ids{1} }[1]{"existing_single_track"} //= $collapse_idx[1]{ $cur_row_ids{1} } = [ ];
$collapse_idx[1]{ $cur_row_ids{1} }[1]{"cd"} //= $collapse_idx[2]{ $cur_row_ids{1} } = [ ];
$collapse_idx[2]{ $cur_row_ids{1} }[1]{"artist"} //= $collapse_idx[3]{ $cur_row_ids{1} } = [ { "artistid" => $cur_row_data->[1] } ];
( ! defined $cur_row_data->[6] )
? $collapse_idx[3]{ $cur_row_ids{1} }[1]{"cds"} = []
: do {
(! $collapse_idx[4]{ $cur_row_ids{1} }{ $cur_row_ids{6} }) and push @{$collapse_idx[3]{ $cur_row_ids{1} }[1]{"cds"}}, $collapse_idx[4]{ $cur_row_ids{1} }{ $cur_row_ids{6} } = [ { "cdid" => $cur_row_data->[6], "genreid" => $cur_row_data->[9], "year" => $cur_row_data->[2] } ];
( ! defined $cur_row_data->[8] )
? $collapse_idx[4]{ $cur_row_ids{1} }{ $cur_row_ids{6} }[1]{"tracks"} = []
: do {
(! $collapse_idx[5]{ $cur_row_ids{1} }{ $cur_row_ids{6} }{ $cur_row_ids{8} }) and push @{$collapse_idx[4]{ $cur_row_ids{1} }{ $cur_row_ids{6} }[1]{"tracks"}}, $collapse_idx[5]{ $cur_row_ids{1} }{ $cur_row_ids{6} }{ $cur_row_ids{8} } = [ { "title" => $cur_row_data->[8] } ];
};
};
( ! defined $cur_row_data->[5] )
? $collapse_idx[0]{ $cur_row_ids{1} }[1]{"tracks"} = []
: do {
(! $collapse_idx[6]{ $cur_row_ids{1} }{ $cur_row_ids{5} }) and push @{$collapse_idx[0]{ $cur_row_ids{1} }[1]{"tracks"}}, $collapse_idx[6]{ $cur_row_ids{1} }{ $cur_row_ids{5} } = [ { "title" => $cur_row_data->[5] } ];
( ! defined $cur_row_data->[10] )
? $collapse_idx[6]{ $cur_row_ids{1} }{ $cur_row_ids{5} }[1]{"lyrics"} = []
: do {
$collapse_idx[6]{ $cur_row_ids{1} }{ $cur_row_ids{5} }[1]{"lyrics"} //= $collapse_idx[7]{ $cur_row_ids{1} }{ $cur_row_ids{5} }{ $cur_row_ids{10} } = [ ];
(! $collapse_idx[8]{ $cur_row_ids{0} }{ $cur_row_ids{1} }{ $cur_row_ids{5} }{ $cur_row_ids{10} }) and push @{$collapse_idx[7]{ $cur_row_ids{1} }{ $cur_row_ids{5} }{ $cur_row_ids{10} }[1]{"existing_lyric_versions"}}, $collapse_idx[8]{ $cur_row_ids{0} }{ $cur_row_ids{1} }{ $cur_row_ids{5} }{ $cur_row_ids{10} } = [ { "lyric_id" => $cur_row_data->[10], "text" => $cur_row_data->[0] } ];
};
};
}
$#{$_[0]} = $result_pos - 1; # truncate the passed in array to where we filled it with results
### END LITERAL STRING EVAL
@@ -0,0 +1,28 @@
use warnings;
use strict;
use Benchmark qw( cmpthese :hireswallclock);
use Sereal;
use Devel::Dwarn;
my ($semicol, $comma) = map {
my $src = do { local (@ARGV, $/) = $_; <> };
eval "sub { use strict; use warnings; use warnings FATAL => 'uninitialized'; $src }" or die $@;
} qw( semicol.src comma.src );
my $enc = Sereal::Encoder->new;
my $dec = Sereal::Decoder->new;
for my $iters ( 100, 10_000, 100_000 ) {
my $dataset = [];
push @$dataset, [ (scalar @$dataset) x 11 ]
while @$dataset < $iters;
my $ice = $enc->encode($dataset);
print "\nTiming $iters 'rows'...\n";
cmpthese( -10, {
semicol => sub { $semicol->($dec->decode($ice)) },
comma => sub { $comma->($dec->decode($ice)) },
})
}
@@ -133,7 +133,7 @@ sub assemble_collapsing_parser {
$top_node_key = "{ \$cur_row_ids{$virtual_column_idx} }";
$top_node_key_assembler = sprintf " \$cur_row_ids{%d} = ( %s ); ",
$top_node_key_assembler = sprintf "( \$cur_row_ids{%d} = (%s) ),",
$virtual_column_idx,
"\n" . join( "\n or\n", @path_parts, qq{"\0\$rows_pos\0"} )
;
@@ -155,16 +155,16 @@ sub assemble_collapsing_parser {
%{ $stats->{idcols_seen} },
} };
my $row_id_defs = sprintf "\@cur_row_ids{( %s )} = ( \n%s \n );",
my $row_id_defs = sprintf "( \@cur_row_ids{( %s )} = (\n%s\n ) ),",
join (', ', @row_ids ),
# in case we prune - we will never hit undefs/NULLs as pigeon-hole-criteria
( $args->{prune_null_branches}
? sprintf( '@{$cur_row_data}[( %s )]', join ', ', @row_ids )
: join (",\n", map {
my $quoted_null_val = qq( "\0NULL\xFF\${rows_pos}\xFF${_}\0" );
my $quoted_null_val = qq("\0NULL\xFF\${rows_pos}\xFF${_}\0");
HAS_DOR
? qq! ( \$cur_row_data->[$_] // $quoted_null_val ) !
: qq! ( defined(\$cur_row_data->[$_]) ? \$cur_row_data->[$_] : $quoted_null_val ) !
? qq!( \$cur_row_data->[$_] // $quoted_null_val )!
: qq!( defined(\$cur_row_data->[$_]) ? \$cur_row_data->[$_] : $quoted_null_val )!
} @row_ids)
)
;
@@ -218,7 +218,7 @@ sub assemble_collapsing_parser {
# is ordered properly)
# as long as we have a result already and the next result is new we
# return the pre-read data and bail
$_[1] and $result_pos and ! $collapse_idx[0]%3$s and (unshift @{$_[2]}, $cur_row_data) and last;
( $_[1] and $result_pos and ! $collapse_idx[0]%3$s and (unshift @{$_[2]}, $cur_row_data) and last ),
# the rel assemblers
%4$s
@@ -271,7 +271,7 @@ sub __visit_infmap_collapse {
my @src;
if ($cur_node_idx == 0) {
push @src, sprintf( '%s %s $_[0][$result_pos++] = %s;',
push @src, sprintf( '( %s %s $_[0][$result_pos++] = %s ),',
$node_idx_slot,
(HAS_DOR ? '//=' : '||='),
$me_struct || '{}',
@@ -285,15 +285,15 @@ sub __visit_infmap_collapse {
);
if ($args->{collapse_map}->{-is_single}) {
push @src, sprintf ( '%s %s %s%s;',
push @src, sprintf ( '( %s %s %s%s ),',
$parent_attach_slot,
(HAS_DOR ? '//=' : '||='),
$node_idx_slot,
$me_struct ? " = $me_struct" : '',
);
}
else {
push @src, sprintf('(! %s) and push @{%s}, %s%s;',
push @src, sprintf('( (! %s) and push @{%s}, %s%s ),',
$node_idx_slot,
$parent_attach_slot,
$node_idx_slot,
@@ -332,7 +332,7 @@ sub __visit_infmap_collapse {
if ($args->{prune_null_branches}) {
# start of wrap of the entire chain in a conditional
splice @src, $rel_src_pos, 0, sprintf "( ! defined %s )\n ? %s%s{%s} = %s\n : do {",
splice @src, $rel_src_pos, 0, sprintf "( ( ! defined %s )\n ? %s%s{%s} = %s\n : do {",
"\$cur_row_data->[$first_distinct_child_idcol]",
$node_idx_slot,
$args->{hri_style} ? '' : '[1]',
@@ -341,11 +341,11 @@ sub __visit_infmap_collapse {
;
# end of wrap
push @src, '};'
push @src, '} ),'
}
else {
splice @src, $rel_src_pos + 1, 0, sprintf ( '(defined %s) or bless (%s[1]{%s}, %s);',
splice @src, $rel_src_pos + 1, 0, sprintf ( '( (defined %s) or bless (%s[1]{%s}, %s) ),',
"\$cur_row_data->[$first_distinct_child_idcol]",
$node_idx_slot,
perlstring($rel),
Oops, something went wrong.

0 comments on commit 05a5ca4

Please sign in to comment.