Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update REPL.fuzzyscore to use string distance #50412

Merged
merged 1 commit into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 52 additions & 13 deletions stdlib/REPL/src/docview.jl
Original file line number Diff line number Diff line change
Expand Up @@ -624,22 +624,61 @@ bestmatch(needle, haystack) =
longer(matchinds(needle, haystack, acronym = true),
matchinds(needle, haystack))

avgdistance(xs) =
isempty(xs) ? 0 :
(xs[end] - xs[1] - length(xs)+1)/length(xs)
# Optimal string distance: Counts the minimum number of insertions, deletions,
# transpositions or substitutions to go from one string to the other.
function string_distance(a::AbstractString, lena::Integer, b::AbstractString, lenb::Integer)
jakobnissen marked this conversation as resolved.
Show resolved Hide resolved
if lena > lenb
a, b = b, a
lena, lenb = lenb, lena
end
start = 0
for (i, j) in zip(a, b)
if a == b
start += 1
else
break
end
end
start == lena && return lenb - start
vzero = collect(1:(lenb - start))
vone = similar(vzero)
prev_a, prev_b = first(a), first(b)
current = 0
for (i, ai) in enumerate(a)
i > start || (prev_a = ai; continue)
left = i - start - 1
current = i - start
transition_next = 0
for (j, bj) in enumerate(b)
j > start || (prev_b = bj; continue)
# No need to look beyond window of lower right diagonal
above = current
this_transition = transition_next
transition_next = vone[j - start]
vone[j - start] = current = left
left = vzero[j - start]
if ai != bj
# Minimum between substitution, deletion and insertion
current = min(current + 1, above + 1, left + 1)
if i > start + 1 && j > start + 1 && ai == prev_b && prev_a == bj
current = min(current, (this_transition += 1))
end
end
vzero[j - start] = current
prev_b = bj
end
prev_a = ai
end
current
end

function fuzzyscore(needle, haystack)
score = 0.
is, acro = bestmatch(needle, haystack)
score += (acro ? 2 : 1)*length(is) # Matched characters
score -= 2(length(needle)-length(is)) # Missing characters
!acro && (score -= avgdistance(is)/10) # Contiguous
!isempty(is) && (score -= sum(is)/length(is)/100) # Closer to beginning
return score
function fuzzyscore(needle::AbstractString, haystack::AbstractString)
lena, lenb = length(needle), length(haystack)
1 - (string_distance(needle, lena, haystack, lenb) / max(lena, lenb))
end

function fuzzysort(search::String, candidates::Vector{String})
scores = map(cand -> (fuzzyscore(search, cand), -Float64(levenshtein(search, cand))), candidates)
scores = map(cand -> fuzzyscore(search, cand), candidates)
candidates[sortperm(scores)] |> reverse
end

Expand Down Expand Up @@ -690,7 +729,7 @@ function printmatches(io::IO, word, matches; cols::Int = _displaysize(io)[2])
total = 0
for match in matches
total + length(match) + 1 > cols && break
fuzzyscore(word, match) < 0 && break
fuzzyscore(word, match) < 0.5 && break
print(io, " ")
printmatch(io, word, match)
total += length(match) + 1
Expand Down
9 changes: 9 additions & 0 deletions stdlib/REPL/test/docview.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ end
# https://github.com/JunoLab/FuzzyCompletions.jl/issues/7
# shouldn't throw when there is a space in a middle of query
@test (REPL.matchinds("a ", "a file.txt"); true)
@test isapprox(REPL.fuzzyscore("abcdef", ""), 0.0; atol=0.001)
@test 0.8 < REPL.fuzzyscore(
"supercalifragilisticexpialidocious",
"bupercalifragilisticexpialidocious"
) < 1.0

# Unicode
@test 1.0 > REPL.fuzzyscore("αkδψm", "αkδm") > 0.0
@test 1.0 > REPL.fuzzyscore("αkδψm", "α") > 0.0
end

@testset "Unicode doc lookup (#41589)" begin
Expand Down