Skip to content

Commit

Permalink
Codex: Fine-tuned tag search algorithm
Browse files Browse the repository at this point in the history
By default, levenshtein is case sensitive, so certain matches don't
work quite like they should, particularly because additional automatic
tag substitution is being used (libdoom => Doom).

Now certain case insensitive cases are given decreased cost
(substring, starts with, exact match).
  • Loading branch information
skyjake committed Jul 27, 2013
1 parent 4759e7c commit 0710c9c
Showing 1 changed file with 22 additions and 3 deletions.
25 changes: 22 additions & 3 deletions doomsday/build/scripts/codex.py
Expand Up @@ -13,6 +13,13 @@
if sys.argv > 1:
OUT_DIR = sys.argv[1]

#aliases = {
#'Chex Quest': ['chex'],
#'Doom': ['libdoom'],
#'Heretic': ['libheretic'],
#'Hexen': ['libhexen']
#}

class Commit:
def __init__(self, subject, author, date, link, hash):
self.subject = subject
Expand Down Expand Up @@ -351,15 +358,27 @@ def print_date_sorted_commits(out, coms, tag, linkSuffix='', colorIdx=None):
$style = '';
}
$destination = "index.html";
$best = -1;
$best = -1.0;
if(strlen($input) > 0 && strlen($input) < 60) {
foreach($tags as $tag => $link) {
$lev = levenshtein($input, $tag);
$lev = (float) levenshtein($input, $tag); // case sensitive
if(stripos($tag, $input) !== FALSE || stripos($input, $tag) !== FALSE) {
// Found as a case insensitive substring, increase likelihood.
$lev = $lev/2.0;
}
if(stripos($tag, $input) === 0) {
// Increase likelihood further if the match is in the beginning.
$lev = $lev/2.0;
}
if(!strcasecmp($tag, $input) == 0) {
// Case insensitive direct match, increase likelihood.
$lev = $lev/2.0;
}
if($lev == 0) {
$destination = "tag_$style$link.html";
break;
}
if($lev < $best || $best < 0) {
if($best < 0 || $lev < $best) {
$destination = "tag_$style$link.html";
$best = $lev;
}
Expand Down

0 comments on commit 0710c9c

Please sign in to comment.