Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Make it 13x faster by making the code unreadable
Also, potentially up to 39x faster using the recommendation below.

Every single change in this commit actually had a positive impact on
the performance. It is sad that one has to use nqp for everything to
get things working reasonably fast.

By all means *do not use .min method* in your user code if you want to
get every last drop of performance. At this point everything you do
outside of Sift4 module will have a much bigger impact on the
performance.

For example, this is insanely slow:

  say @a.min({ sift4($_, $str, 5) });

And this is twice faster:

  my $ans = ‘default value’;
  my $ans-min = 6;

  for @A {
      my $dist = sift4($_, $str, 5, 5);
      if $dist < $ans-min {
          $ans = $_;
          $ans-min = $dist;
      }
  }

One would think that in the code above 「sift4($_, $str, 5, 5)」 can be
changed to 「sift4($_, $str, 5, $ans-min)」, but no, for some reason it
does not work. Whether it is an issue in this implementation or in the
algorithm itself I don't know. Should be investigated I think.

Anyway, to make it easier for the users I think this module should
provide a function for processing an array (so that users are less
likely to screw it up).

Random testing suggests that these changes do not affect
functionality.
  • Loading branch information
AlexDaniel committed Jan 10, 2017
1 parent a079c5d commit 524f98c
Showing 1 changed file with 84 additions and 62 deletions.
146 changes: 84 additions & 62 deletions lib/Text/Diff/Sift4.pm6
Expand Up @@ -3,79 +3,101 @@ use nqp;

unit module Text::Diff::Sift4;

sub sift4(Str() $s1, Str() $s2, Int $maxOffset = 100, Int $maxDistance = 100 --> Int) is export {
sub sift4(Str:D() $s1, Str:D() $s2, Int $maxOffset = 100, Int $maxDistance = 100 --> Int) is export {
my int $l1 = nqp::chars($s1);
my int $l2 = nqp::chars($s2);

return !$s2 ?? 0 !! $l2 if !$s1 or !$l1;
return $l1 if !$s2 or !$l2;

my int ($c1, $c2, $lcss, $local_cs, $trans) = (0, 0, 0, 0, 0);
my @offset_arr;

while nqp::islt_i($c1, $l1) and nqp::islt_i($c2, $l2) {
if nqp::eqat($s1, nqp::substr($s2, $c2, 1), $c1) {
++$local_cs;
my Bool $isTrans = False;
my int $i = 0;
while nqp::islt_i($i, @offset_arr.elems) {
my %ofs := @offset_arr[$i];
if nqp::isle_i($c1, %ofs<c1>) or nqp::isle_i($c2, %ofs<c2>) {
$isTrans = ?nqp::isge_i(nqp::abs_i($c2 - $c1), nqp::abs_i(%ofs<c2> - %ofs<c1>));
if $isTrans {
++$trans;
} elsif !%ofs<trans> {
%ofs<trans> = True;
++$trans;
}
last;
} else {
if nqp::isgt_i($c1, %ofs<c2>) and nqp::isgt_i($c2, %ofs<c1>) {
@offset_arr.splice($i, 1);
} else {
++$i;
}
}
}
@offset_arr.push({c1 => $c1, c2 => $c2, trans => $isTrans});
return $l2 unless $l1;
return $l1 unless $l2;

my int $c1;
my int $c2;
my int $lcss;
my int $local_cs;
my int $trans;

my int $max_offset = $maxOffset;
my int $max_distance = $maxDistance;

my $offset_arr := nqp::list_i;

my int $isTrans;
my int $i;

while nqp::islt_i($c1, $l1) && nqp::islt_i($c2, $l2) {
if nqp::iseq_i(nqp::ordat($s1, $c1), nqp::ordat($s2, $c2)) {
nqp::stmts(
($isTrans = 0),
($i = 0),
($local_cs = nqp::add_i($local_cs, 1)),

nqp::while(nqp::islt_i($i, nqp::elems($offset_arr)),
nqp::stmts(
(my int $_trans = nqp::atpos_i($offset_arr, nqp::add_i($i, 0))),
(my int $_c1 = nqp::atpos_i($offset_arr, nqp::add_i($i, 1))),
(my int $_c2 = nqp::atpos_i($offset_arr, nqp::add_i($i, 2))),

nqp::if(nqp::isle_i($c1, $_c1) || nqp::isle_i($c2, $_c2),
nqp::stmts(
($isTrans = nqp::isge_i(nqp::abs_i(nqp::sub_i($c2, $c1)), nqp::abs_i(nqp::sub_i($_c2, $_c1)))),
nqp::if($isTrans,
($trans = nqp::add_i($trans, 1)),
nqp::unless($_trans,
nqp::stmts(nqp::bindpos_i($offset_arr, $i, 1),
($trans = nqp::add_i($trans, 1))))),
($i = 2147483647)),

nqp::if((nqp::isgt_i($c1, $_c2) && nqp::isgt_i($c2, $_c1)),
nqp::splice($offset_arr, nqp::list, $i, 3),
($i = nqp::add_i($i, 3)))))),
nqp::push_i($offset_arr, $isTrans),
nqp::push_i($offset_arr, $c1),
nqp::push_i($offset_arr, $c2))
} else {
$lcss += $local_cs;
$local_cs = 0;

$c1 = $c2 = ($c1 min $c2) if nqp::isne_i($c1, $c2);

loop (my int $i = 0; nqp::islt_i($i, $maxOffset) and (nqp::islt_i($c1 + $i, $l1) or nqp::islt_i($c2 + $i, $l2)); ++$i) {
if nqp::islt_i($c1 + $i, $l1) and nqp::eqat($s1, nqp::substr($s2, $c2, 1), $c1 + $i) {
$c1 += $i - 1;
--$c2;
last;
}
if nqp::islt_i($c2 + $i, $l2) and nqp::eqat($s1, nqp::substr($s2, $c2 + $i, 1), $c1) {
$c2 += $i - 1;
--$c1;
last;
}
}
}
nqp::stmts(
($lcss = nqp::add_i($lcss, $local_cs)),
($local_cs = 0),

++$c1;
++$c2;
nqp::if(nqp::isne_i($c1, $c2), ($c1 = $c2 = nqp::isle_i($c1, $c2) ?? $c1 !! $c2)),

if $maxDistance {
my int $tempDistance = ($c1 max $c2) - $lcss + $trans;
return $tempDistance if nqp::isge_i($tempDistance, $maxDistance);
($i = 0),
nqp::while(
nqp::islt_i($i, $max_offset) && (nqp::islt_i(nqp::add_i($c1, $i), $l1) ||
nqp::islt_i(nqp::add_i($c2, $i), $l2)),
nqp::stmts(
nqp::if(nqp::islt_i(nqp::add_i($c1, $i), $l1) &&
nqp::iseq_i(nqp::ordat($s1, nqp::add_i($c1, $i)),
nqp::ordat($s2, $c2)),
nqp::stmts(
($c1 = nqp::sub_i(nqp::add_i($c1, $i), 1)),
($c2 = nqp::sub_i($c2, 1)),
($i = 2147483647))),
nqp::if(nqp::islt_i(nqp::add_i($c2, $i), $l2) &&
nqp::iseq_i(nqp::ordat($s1, $c1), nqp::ordat($s2, nqp::add_i($c2, $i))),
nqp::stmts(
($c2 = nqp::sub_i(nqp::add_i($c2, $i), 1)),
($c1 = nqp::sub_i($c1, 1)),
($i = 2147483647))),
($i = nqp::add_i($i, 1)))))
}

if nqp::isge_i($c1, $l1) or nqp::isge_i($c2, $l2) {
$lcss += $local_cs;
$local_cs = 0;
$c1 = $c2 = ($c1 min $c2);
}
$c1 = nqp::add_i($c1, 1);
$c2 = nqp::add_i($c2, 1);

nqp::if(nqp::isge_i( nqp::add_i(nqp::sub_i((nqp::isge_i($c1, $c2) ?? $c1 !! $c2), $lcss), $trans), $max_distance),
nqp::if($max_distance, return nqp::add_i(nqp::sub_i((nqp::isge_i($c1, $c2) ?? $c1 !! $c2), $lcss), $trans)));

nqp::if(nqp::isge_i($c1, $l1) || nqp::isge_i($c2, $l2),
nqp::stmts(
($lcss = nqp::add_i($lcss, $local_cs)),
($local_cs = 0),
($c1 = $c2 = (nqp::isle_i($c1, $c2) ?? $c1 !! $c2)))
);
}

$lcss += $local_cs;
$lcss = nqp::add_i($lcss, $local_cs);

($l1 max $l2) - $lcss + $trans;
nqp::add_i(nqp::sub_i((nqp::isge_i($l1, $l2) ?? $l1 !! $l2), $lcss), $trans)
}

# vim: ft=perl6

0 comments on commit 524f98c

Please sign in to comment.